text_processing_utils.py 464 B

1234567891011121314151617
  1. import re
  2. def remove_leading_symbols(text: str) -> str:
  3. """
  4. Remove leading punctuation or symbols from the given text.
  5. Args:
  6. text (str): The input text to process.
  7. Returns:
  8. str: The text with leading punctuation or symbols removed.
  9. """
  10. # Match Unicode ranges for punctuation and symbols
  11. pattern = r"^[\u2000-\u206F\u2E00-\u2E7F\u3000-\u303F!\"#$%&'()*+,\-./:;<=>?@\[\]^_`{|}~]+"
  12. return re.sub(pattern, "", text)