Browse Source

fix update dataset failed when embedding model is not exist (#6920)

Jyong 8 months ago
parent
commit
048bc4c06e
2 changed files with 29 additions and 2 deletions
  1. 7 2
      api/controllers/console/datasets/datasets.py
  2. 22 0
      api/services/dataset_service.py

+ 7 - 2
api/controllers/console/datasets/datasets.py

@@ -189,8 +189,6 @@ class DatasetApi(Resource):
         dataset = DatasetService.get_dataset(dataset_id_str)
         if dataset is None:
             raise NotFound("Dataset not found.")
-        # check user's model setting
-        DatasetService.check_dataset_model_setting(dataset)
 
         parser = reqparse.RequestParser()
         parser.add_argument('name', nullable=False,
@@ -215,6 +213,13 @@ class DatasetApi(Resource):
         args = parser.parse_args()
         data = request.get_json()
 
+        # check embedding model setting
+        if data.get('indexing_technique') == 'high_quality':
+            DatasetService.check_embedding_model_setting(dataset.tenant_id,
+                                                         data.get('embedding_model_provider'),
+                                                         data.get('embedding_model')
+                                                         )
+
         # The role of the current user in the ta table must be admin, owner, editor, or dataset_operator
         DatasetPermissionService.check_permission(
             current_user, dataset, data.get('permission'), data.get('partial_member_list')

+ 22 - 0
api/services/dataset_service.py

@@ -198,6 +198,28 @@ class DatasetService:
                 )
 
     @staticmethod
+    def check_embedding_model_setting(tenant_id: str, embedding_model_provider: str, embedding_model:str):
+        try:
+            model_manager = ModelManager()
+            model_manager.get_model_instance(
+                tenant_id=tenant_id,
+                provider=embedding_model_provider,
+                model_type=ModelType.TEXT_EMBEDDING,
+                model=embedding_model
+            )
+        except LLMBadRequestError:
+            raise ValueError(
+                "No Embedding Model available. Please configure a valid provider "
+                "in the Settings -> Model Provider."
+            )
+        except ProviderTokenNotInitError as ex:
+            raise ValueError(
+                f"The dataset in unavailable, due to: "
+                f"{ex.description}"
+            )
+
+
+    @staticmethod
     def update_dataset(dataset_id, data, user):
         data.pop('partial_member_list', None)
         filtered_data = {k: v for k, v in data.items() if v is not None or k == 'description'}