|
@@ -35,6 +35,7 @@ from models.dataset import (
|
|
|
DatasetAutoDisableLog,
|
|
|
DatasetCollectionBinding,
|
|
|
DatasetPermission,
|
|
|
+ DatasetPermissionAll,
|
|
|
DatasetPermissionEnum,
|
|
|
DatasetProcessRule,
|
|
|
DatasetQuery,
|
|
@@ -79,8 +80,9 @@ from tasks.sync_website_document_indexing_task import sync_website_document_inde
|
|
|
|
|
|
class DatasetService:
|
|
|
@staticmethod
|
|
|
- def get_datasets(page, per_page, tenant_id=None, user=None, search=None, tag_ids=None, category_ids=None,
|
|
|
- include_all=False):
|
|
|
+ def get_datasets(
|
|
|
+ page, per_page, tenant_id=None, user=None, search=None, tag_ids=None, category_ids=None, include_all=False
|
|
|
+ ):
|
|
|
query = Dataset.query.filter(Dataset.tenant_id == tenant_id).order_by(Dataset.created_at.desc())
|
|
|
|
|
|
if user:
|
|
@@ -134,8 +136,9 @@ class DatasetService:
|
|
|
return [], 0
|
|
|
|
|
|
if category_ids:
|
|
|
- target_ids_by_category_ids = TagService.get_target_ids_by_tag_ids("knowledge_category", tenant_id,
|
|
|
- category_ids)
|
|
|
+ target_ids_by_category_ids = TagService.get_target_ids_by_tag_ids(
|
|
|
+ "knowledge_category", tenant_id, category_ids
|
|
|
+ )
|
|
|
if target_ids_by_category_ids:
|
|
|
query = query.filter(Dataset.id.in_(target_ids_by_category_ids))
|
|
|
else:
|
|
@@ -194,14 +197,16 @@ class DatasetService:
|
|
|
query = query.filter(
|
|
|
db.or_(
|
|
|
Dataset.id.in_(permitted_dataset_ids),
|
|
|
- Dataset.permission == DatasetPermissionEnum.ALL_TEAM
|
|
|
+ Dataset.permission == DatasetPermissionEnum.ALL_TEAM,
|
|
|
)
|
|
|
)
|
|
|
|
|
|
return query.count()
|
|
|
|
|
|
@staticmethod
|
|
|
- def get_tags_count(tenant_id: str, ) -> int:
|
|
|
+ def get_tags_count(
|
|
|
+ tenant_id: str,
|
|
|
+ ) -> int:
|
|
|
return TagService.get_tags_count(tenant_id)
|
|
|
|
|
|
@staticmethod
|
|
@@ -225,31 +230,24 @@ class DatasetService:
|
|
|
stats = {
|
|
|
"over_180_days": base_query.filter(Dataset.updated_at <= now - timedelta(days=180)).count(),
|
|
|
"within_180_days": base_query.filter(
|
|
|
- Dataset.updated_at > now - timedelta(days=180),
|
|
|
- Dataset.updated_at <= now
|
|
|
+ Dataset.updated_at > now - timedelta(days=180), Dataset.updated_at <= now
|
|
|
).count(),
|
|
|
"within_90_days": base_query.filter(
|
|
|
- Dataset.updated_at > now - timedelta(days=90),
|
|
|
- Dataset.updated_at <= now
|
|
|
+ Dataset.updated_at > now - timedelta(days=90), Dataset.updated_at <= now
|
|
|
).count(),
|
|
|
"within_30_days": base_query.filter(
|
|
|
- Dataset.updated_at > now - timedelta(days=30),
|
|
|
- Dataset.updated_at <= now
|
|
|
+ Dataset.updated_at > now - timedelta(days=30), Dataset.updated_at <= now
|
|
|
).count(),
|
|
|
"within_7_days": base_query.filter(
|
|
|
- Dataset.updated_at > now - timedelta(days=7),
|
|
|
- Dataset.updated_at <= now
|
|
|
+ Dataset.updated_at > now - timedelta(days=7), Dataset.updated_at <= now
|
|
|
).count(),
|
|
|
- "within_3_days": base_query.filter(
|
|
|
- Dataset.updated_at > now - timedelta(days=3)
|
|
|
- ).count()
|
|
|
+ "within_3_days": base_query.filter(Dataset.updated_at > now - timedelta(days=3)).count(),
|
|
|
}
|
|
|
|
|
|
return stats
|
|
|
|
|
|
@staticmethod
|
|
|
def get_dataset_type_stats(tenant_id):
|
|
|
-
|
|
|
sql = """
|
|
|
SELECT t.name, t.id, COUNT(*) as count
|
|
|
FROM datasets d
|
|
@@ -261,7 +259,7 @@ class DatasetService:
|
|
|
|
|
|
if tenant_id:
|
|
|
sql += " AND d.tenant_id = :tenant_id"
|
|
|
- params['tenant_id'] = tenant_id
|
|
|
+ params["tenant_id"] = tenant_id
|
|
|
|
|
|
sql += " GROUP BY t.name, t.id"
|
|
|
|
|
@@ -276,14 +274,109 @@ class DatasetService:
|
|
|
stats = []
|
|
|
for row in result:
|
|
|
percentage = (row.count / total) * 100
|
|
|
- stats.append({
|
|
|
- "type": row.name,
|
|
|
- "percentage": round(percentage, 2)
|
|
|
- })
|
|
|
+ stats.append({"type": row.name, "percentage": round(percentage, 2)})
|
|
|
|
|
|
return stats
|
|
|
|
|
|
@staticmethod
|
|
|
+ def get_datasets_edit_permission(dataset_id):
|
|
|
+ results = (
|
|
|
+ db.session.query(DatasetPermissionAll.account_id, Account.email)
|
|
|
+ .join(DatasetPermissionAll, Account.id == DatasetPermissionAll.account_id)
|
|
|
+ .filter(DatasetPermissionAll.dataset_id == dataset_id, DatasetPermissionAll.has_edit_permission == True)
|
|
|
+ .all()
|
|
|
+ )
|
|
|
+ edit_permission_list = []
|
|
|
+ for row in results:
|
|
|
+ edit_permission_list.append({"id": row.account_id, "email": row.email})
|
|
|
+
|
|
|
+ return edit_permission_list
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def get_datasets_read_permission(dataset_id):
|
|
|
+ results = (
|
|
|
+ db.session.query(DatasetPermissionAll.account_id, Account.email)
|
|
|
+ .join(DatasetPermissionAll, Account.id == DatasetPermissionAll.account_id)
|
|
|
+ .filter(DatasetPermissionAll.dataset_id == dataset_id, DatasetPermissionAll.has_read_permission == True)
|
|
|
+ .all()
|
|
|
+ )
|
|
|
+ read_permission_list = []
|
|
|
+ for row in results:
|
|
|
+ read_permission_list.append({"id": row.account_id, "email": row.email})
|
|
|
+ return read_permission_list
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def update_dataset_edit_auth(dataset_id, edit_auth):
|
|
|
+ dataset = DatasetService.get_dataset(dataset_id)
|
|
|
+ if not dataset:
|
|
|
+ raise ValueError("Dataset not found")
|
|
|
+
|
|
|
+ if dataset.edit_auth != edit_auth:
|
|
|
+ filtered_data["updated_by"] = user.id
|
|
|
+ filtered_data["updated_at"] = datetime.datetime.now()
|
|
|
+ filtered_data["edit_auth"] = edit_auth
|
|
|
+
|
|
|
+ dataset.query.filter_by(id=dataset_id).update(filtered_data)
|
|
|
+ db.session.commit()
|
|
|
+ # if action:
|
|
|
+ # deal_dataset_vector_index_task.delay(dataset_id, action)
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def update_dataset_edit_and_read_permission(dataset_id, user, edit_permission_list, read_permission_list):
|
|
|
+ for edit_permission in edit_permission_list:
|
|
|
+ print(f" 编辑权限 ID: {edit_permission.get('id')}, Email: {edit_permission.get('email')}")
|
|
|
+ account_id = edit_permission.get("id")
|
|
|
+ email = edit_permission.get("email")
|
|
|
+ dataset_permission_all: Optional[DatasetPermissionAll] = DatasetPermissionAll.query.filter_by(
|
|
|
+ dataset_id=dataset_id, email=email, account_id=account_id
|
|
|
+ ).first()
|
|
|
+ if dataset_permission_all == None:
|
|
|
+ print(f" 数据表原本无编辑权限 ID: {edit_permission.get('id')}, Email: {edit_permission.get('email')}")
|
|
|
+ new_dataset_permission_all = DatasetPermissionAll()
|
|
|
+ new_dataset_permission_all.dataset_id = dataset_id
|
|
|
+ new_dataset_permission_all.account_id = account_id
|
|
|
+ new_dataset_permission_all.created_by = user.id
|
|
|
+ new_dataset_permission_all.created_at = datetime.datetime.now()
|
|
|
+ new_dataset_permission_all.has_edit_permission = True
|
|
|
+ new_dataset_permission_all.email = email
|
|
|
+ db.session.add(new_dataset_permission_all)
|
|
|
+ db.session.commit()
|
|
|
+ db.session.flush()
|
|
|
+ else:
|
|
|
+ if dataset_permission_all.has_edit_permission == False:
|
|
|
+ dataset_permission_all.query.filter_by(dataset_id=dataset_id, account_id=account_id).update(
|
|
|
+ {"has_edit_permission": True, "updated_by": user.id, "updated_at": datetime.datetime.now()}
|
|
|
+ )
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ for read_permission in read_permission_list:
|
|
|
+ print(f" 查看权限 ID: {read_permission.get('id')}, Email: {read_permission.get('email')}")
|
|
|
+ account_id = read_permission.get("id")
|
|
|
+ email = read_permission.get("email")
|
|
|
+ dataset_permission_all: Optional[DatasetPermissionAll] = DatasetPermissionAll.query.filter_by(
|
|
|
+ dataset_id=dataset_id, account_id=account_id
|
|
|
+ ).first()
|
|
|
+ if dataset_permission_all == None:
|
|
|
+ # 插入
|
|
|
+ print(f" 数据表无有查看权限 ID: {read_permission.get('id')}, Email: {read_permission.get('email')}")
|
|
|
+ new_dataset_permission_all = DatasetPermissionAll()
|
|
|
+ new_dataset_permission_all.dataset_id = dataset_id
|
|
|
+ new_dataset_permission_all.account_id = account_id
|
|
|
+ new_dataset_permission_all.created_by = user.id
|
|
|
+ new_dataset_permission_all.updated_by = user.id
|
|
|
+ new_dataset_permission_all.has_read_permission = True
|
|
|
+ new_dataset_permission_all.email = email
|
|
|
+ db.session.add(new_dataset_permission_all)
|
|
|
+ db.session.commit()
|
|
|
+ db.session.flush()
|
|
|
+ else:
|
|
|
+ if dataset_permission_all.has_read_permission == False:
|
|
|
+ dataset_permission_all.query.filter_by(dataset_id=dataset_id, account_id=account_id).update(
|
|
|
+ {"has_read_permission": True, "updated_by": user.id, "updated_at": datetime.datetime.now()}
|
|
|
+ )
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
def create_empty_dataset(
|
|
|
tenant_id: str,
|
|
|
name: str,
|
|
@@ -314,6 +407,7 @@ class DatasetService:
|
|
|
dataset.embedding_model = embedding_model.model if embedding_model else None
|
|
|
dataset.permission = DatasetPermissionEnum.ALL_TEAM
|
|
|
dataset.provider = provider
|
|
|
+ dataset.dept_id = account.dept_id
|
|
|
db.session.add(dataset)
|
|
|
db.session.flush()
|
|
|
|
|
@@ -585,6 +679,16 @@ class DatasetService:
|
|
|
raise NoPermissionError("You do not have permission to access this dataset.")
|
|
|
|
|
|
@staticmethod
|
|
|
+ def check_dataset_permission_new(dataset, user):
|
|
|
+ if dataset.tenant_id != user.current_tenant_id:
|
|
|
+ logging.debug(f"User {user.id} does not have permission to access dataset {dataset.id}")
|
|
|
+ raise NoPermissionError("You do not have permission to access this dataset.")
|
|
|
+ if user.current_role != TenantAccountRole.OWNER:
|
|
|
+ if user.current_role != Acc.ADMIN and dataset.created_by != user.id:
|
|
|
+ logging.debug(f"User {user.id} does not have permission to access dataset {dataset.id}")
|
|
|
+ raise NoPermissionError("You do not have permission to access this dataset.")
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
def check_dataset_operator_permission(user: Optional[Account] = None, dataset: Optional[Dataset] = None):
|
|
|
if not dataset:
|
|
|
raise ValueError("Dataset not found")
|
|
@@ -1390,7 +1494,6 @@ class DocumentService:
|
|
|
f"You have reached the limit of your subscription. Only {can_upload_size} documents can be uploaded."
|
|
|
)
|
|
|
|
|
|
-
|
|
|
@staticmethod
|
|
|
def build_document(
|
|
|
dataset: Dataset,
|