Parcourir la source

fix: code block segmentation problem of markdown document (#6465)

灰灰 il y a 9 mois
Parent
commit
5e4ac11df3
1 fichiers modifiés avec 8 ajouts et 0 suppressions
  1. 8 0
      api/core/rag/extractor/markdown_extractor.py

+ 8 - 0
api/core/rag/extractor/markdown_extractor.py

@@ -54,8 +54,16 @@ class MarkdownExtractor(BaseExtractor):
 
         current_header = None
         current_text = ""
+        code_block_flag = False
 
         for line in lines:
+            if line.startswith("```"):
+                code_block_flag = not code_block_flag
+                current_text += line + "\n"
+                continue
+            if code_block_flag:
+                current_text += line + "\n"
+                continue
             header_match = re.match(r"^#+\s", line)
             if header_match:
                 if current_header is not None: