Преглед изворни кода

fix image files not deleted on indexing_estimate #9541 (#10798)

Co-authored-by: root <root@localhost.localdomain>
wy96f пре 5 месеци
родитељ
комит
94c9cadbd8
3 измењених фајлова са 16 додато и 0 уклоњено
  1. 14 0
      api/core/indexing_runner.py
  2. 1 0
      api/tasks/clean_dataset_task.py
  3. 1 0
      api/tasks/clean_document_task.py

+ 14 - 0
api/core/indexing_runner.py

@@ -30,6 +30,7 @@ from core.rag.splitter.fixed_text_splitter import (
 )
 from core.rag.splitter.text_splitter import TextSplitter
 from core.tools.utils.text_processing_utils import remove_leading_symbols
+from core.tools.utils.web_reader_tool import get_image_upload_file_ids
 from extensions.ext_database import db
 from extensions.ext_redis import redis_client
 from extensions.ext_storage import storage
@@ -279,6 +280,19 @@ class IndexingRunner:
                 if len(preview_texts) < 5:
                     preview_texts.append(document.page_content)
 
+                # delete image files and related db records
+                image_upload_file_ids = get_image_upload_file_ids(document.page_content)
+                for upload_file_id in image_upload_file_ids:
+                    image_file = db.session.query(UploadFile).filter(UploadFile.id == upload_file_id).first()
+                    try:
+                        storage.delete(image_file.key)
+                    except Exception:
+                        logging.exception(
+                            "Delete image_files failed while indexing_estimate, \
+                                          image_upload_file_is: {}".format(upload_file_id)
+                        )
+                    db.session.delete(image_file)
+
         if doc_form and doc_form == "qa_model":
             if len(preview_texts) > 0:
                 # qa model document

+ 1 - 0
api/tasks/clean_dataset_task.py

@@ -78,6 +78,7 @@ def clean_dataset_task(
                             "Delete image_files failed when storage deleted, \
                                           image_upload_file_is: {}".format(upload_file_id)
                         )
+                    db.session.delete(image_file)
                 db.session.delete(segment)
 
         db.session.query(DatasetProcessRule).filter(DatasetProcessRule.dataset_id == dataset_id).delete()

+ 1 - 0
api/tasks/clean_document_task.py

@@ -51,6 +51,7 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i
                             "Delete image_files failed when storage deleted, \
                                           image_upload_file_is: {}".format(upload_file_id)
                         )
+                    db.session.delete(image_file)
                 db.session.delete(segment)
 
             db.session.commit()