浏览代码

fix: code block segmentation problem of markdown document (#6465)

灰灰 9 月之前
父节点
当前提交
5e4ac11df3
共有 1 个文件被更改,包括 8 次插入0 次删除
  1. 8 0
      api/core/rag/extractor/markdown_extractor.py

+ 8 - 0
api/core/rag/extractor/markdown_extractor.py

@@ -54,8 +54,16 @@ class MarkdownExtractor(BaseExtractor):
 
         current_header = None
         current_text = ""
+        code_block_flag = False
 
         for line in lines:
+            if line.startswith("```"):
+                code_block_flag = not code_block_flag
+                current_text += line + "\n"
+                continue
+            if code_block_flag:
+                current_text += line + "\n"
+                continue
             header_match = re.match(r"^#+\s", line)
             if header_match:
                 if current_header is not None: