text_processing_utils.py 537 B

123456789101112131415161718
  1. import re
  2. def remove_leading_symbols(text: str) -> str:
  3. """
  4. Remove leading punctuation or symbols from the given text.
  5. Args:
  6. text (str): The input text to process.
  7. Returns:
  8. str: The text with leading punctuation or symbols removed.
  9. """
  10. # Match Unicode ranges for punctuation and symbols
  11. # FIXME this pattern is confused quick fix for #11868 maybe refactor it later
  12. pattern = r"^[\u2000-\u206F\u2E00-\u2E7F\u3000-\u303F!\"#$%&'()*+,./:;<=>?@^_`~]+"
  13. return re.sub(pattern, "", text)