Parcourir la source

fix full-doc mode document doesn't reindex after enable or un_archive (#16737)

Jyong il y a 1 mois
Parent
commit
58c62f0a34

+ 1 - 1
api/core/indexing_runner.py

@@ -187,7 +187,7 @@ class IndexingRunner:
                             },
                         )
                         if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
-                            child_chunks = document_segment.child_chunks
+                            child_chunks = document_segment.get_child_chunks()
                             if child_chunks:
                                 child_documents = []
                                 for child_chunk in child_chunks:

+ 17 - 0
api/models/dataset.py

@@ -720,6 +720,23 @@ class DocumentSegment(db.Model):  # type: ignore[name-defined]
         else:
             return []
 
+    def get_child_chunks(self):
+        process_rule = self.document.dataset_process_rule
+        if process_rule.mode == "hierarchical":
+            rules = Rule(**process_rule.rules_dict)
+            if rules.parent_mode:
+                child_chunks = (
+                    db.session.query(ChildChunk)
+                    .filter(ChildChunk.segment_id == self.id)
+                    .order_by(ChildChunk.position.asc())
+                    .all()
+                )
+                return child_chunks or []
+            else:
+                return []
+        else:
+            return []
+
     @property
     def sign_content(self):
         return self.get_sign_content()

+ 1 - 1
api/tasks/add_document_to_index_task.py

@@ -59,7 +59,7 @@ def add_document_to_index_task(dataset_document_id: str):
                 },
             )
             if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
-                child_chunks = segment.child_chunks
+                child_chunks = segment.get_child_chunks()
                 if child_chunks:
                     child_documents = []
                     for child_chunk in child_chunks:

+ 1 - 1
api/tasks/deal_dataset_vector_index_task.py

@@ -130,7 +130,7 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str):
                                     },
                                 )
                                 if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
-                                    child_chunks = segment.child_chunks
+                                    child_chunks = segment.get_child_chunks()
                                     if child_chunks:
                                         child_documents = []
                                         for child_chunk in child_chunks:

+ 1 - 1
api/tasks/enable_segment_to_index_task.py

@@ -63,7 +63,7 @@ def enable_segment_to_index_task(segment_id: str):
 
         index_processor = IndexProcessorFactory(dataset_document.doc_form).init_index_processor()
         if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
-            child_chunks = segment.child_chunks
+            child_chunks = segment.get_child_chunks()
             if child_chunks:
                 child_documents = []
                 for child_chunk in child_chunks:

+ 1 - 1
api/tasks/enable_segments_to_index_task.py

@@ -67,7 +67,7 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i
             )
 
             if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
-                child_chunks = segment.child_chunks
+                child_chunks = segment.get_child_chunks()
                 if child_chunks:
                     child_documents = []
                     for child_chunk in child_chunks: