Browse Source

fix: code block segmentation problem of markdown document (#6465)

灰灰 9 months ago
parent
commit
5e4ac11df3
1 changed files with 8 additions and 0 deletions
  1. 8 0
      api/core/rag/extractor/markdown_extractor.py

+ 8 - 0
api/core/rag/extractor/markdown_extractor.py

@@ -54,8 +54,16 @@ class MarkdownExtractor(BaseExtractor):
 
         current_header = None
         current_text = ""
+        code_block_flag = False
 
         for line in lines:
+            if line.startswith("```"):
+                code_block_flag = not code_block_flag
+                current_text += line + "\n"
+                continue
+            if code_block_flag:
+                current_text += line + "\n"
+                continue
             header_match = re.match(r"^#+\s", line)
             if header_match:
                 if current_header is not None: