check_no_chinese_comments.py 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637
  1. import os
  2. import re
  3. from zhon.hanzi import punctuation
  4. def has_chinese_characters(text):
  5. for char in text:
  6. if '\u4e00' <= char <= '\u9fff' or char in punctuation:
  7. return True
  8. return False
  9. def check_file_for_chinese_comments(file_path):
  10. with open(file_path, 'r', encoding='utf-8') as file:
  11. for line_number, line in enumerate(file, start=1):
  12. if has_chinese_characters(line):
  13. print(f"Found Chinese characters in {file_path} on line {line_number}:")
  14. print(line.strip())
  15. return True
  16. return False
  17. def main():
  18. has_chinese = False
  19. excluded_files = ["model_template.py", 'stopwords.py', 'commands.py',
  20. 'indexing_runner.py', 'web_reader_tool.py', 'spark_provider.py']
  21. for root, _, files in os.walk("."):
  22. for file in files:
  23. if file.endswith(".py") and file not in excluded_files:
  24. file_path = os.path.join(root, file)
  25. if check_file_for_chinese_comments(file_path):
  26. has_chinese = True
  27. if has_chinese:
  28. raise Exception("Found Chinese characters in Python files. Please remove them.")
  29. if __name__ == "__main__":
  30. main()