|
@@ -12,6 +12,7 @@ from flask_login import current_user # type: ignore
|
|
|
from sqlalchemy import func
|
|
|
from sqlalchemy.orm import Session
|
|
|
from werkzeug.exceptions import NotFound
|
|
|
+from sqlalchemy import text
|
|
|
|
|
|
from configs import dify_config
|
|
|
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
|
|
@@ -171,6 +172,119 @@ class DatasetService:
|
|
|
return datasets.items, datasets.total
|
|
|
|
|
|
@staticmethod
|
|
|
+ def get_datasets_count(tenant_id: str, user) -> int:
|
|
|
+ query = Dataset.query
|
|
|
+ if tenant_id:
|
|
|
+ query = query.filter(Dataset.tenant_id == tenant_id)
|
|
|
+
|
|
|
+ if user:
|
|
|
+ # Get dataset IDs that the user has permission to access
|
|
|
+ dataset_permission = DatasetPermission.query.filter_by(account_id=user.id, tenant_id=tenant_id).all()
|
|
|
+ permitted_dataset_ids = {dp.dataset_id for dp in dataset_permission} if dataset_permission else None
|
|
|
+
|
|
|
+ if user.current_role == TenantAccountRole.DATASET_OPERATOR:
|
|
|
+ # Only show datasets that the user has permission to access
|
|
|
+ if permitted_dataset_ids:
|
|
|
+ query = query.filter(Dataset.id.in_(permitted_dataset_ids))
|
|
|
+ else:
|
|
|
+ return 0
|
|
|
+ else:
|
|
|
+ if user.current_role != TenantAccountRole.OWNER:
|
|
|
+ # Show all datasets that the user has permission to access
|
|
|
+ if permitted_dataset_ids:
|
|
|
+ query = query.filter(
|
|
|
+ db.or_(
|
|
|
+ Dataset.id.in_(permitted_dataset_ids),
|
|
|
+ Dataset.permission == DatasetPermissionEnum.ALL_TEAM
|
|
|
+ )
|
|
|
+ )
|
|
|
+
|
|
|
+ return query.count()
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def get_tags_count(tenant_id: str, ) -> int:
|
|
|
+ return TagService.get_tags_count(tenant_id)
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def count_datasets(tenant_id=None):
|
|
|
+ query = Dataset.query
|
|
|
+ if tenant_id:
|
|
|
+ query = query.filter(Dataset.tenant_id == tenant_id)
|
|
|
+ return query.count()
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def get_dataset_update_stats(tenant_id):
|
|
|
+ """Get dataset update statistics."""
|
|
|
+ from datetime import datetime, timedelta
|
|
|
+
|
|
|
+ now = datetime.utcnow()
|
|
|
+ base_query = Dataset.query
|
|
|
+ if tenant_id:
|
|
|
+ base_query = base_query.filter(Dataset.tenant_id == tenant_id)
|
|
|
+
|
|
|
+ # 统计不同时间段内更新的数据集数量
|
|
|
+ stats = {
|
|
|
+ "over_180_days": base_query.filter(Dataset.updated_at <= now - timedelta(days=180)).count(),
|
|
|
+ "within_180_days": base_query.filter(
|
|
|
+ Dataset.updated_at > now - timedelta(days=180),
|
|
|
+ Dataset.updated_at <= now
|
|
|
+ ).count(),
|
|
|
+ "within_90_days": base_query.filter(
|
|
|
+ Dataset.updated_at > now - timedelta(days=90),
|
|
|
+ Dataset.updated_at <= now
|
|
|
+ ).count(),
|
|
|
+ "within_30_days": base_query.filter(
|
|
|
+ Dataset.updated_at > now - timedelta(days=30),
|
|
|
+ Dataset.updated_at <= now
|
|
|
+ ).count(),
|
|
|
+ "within_7_days": base_query.filter(
|
|
|
+ Dataset.updated_at > now - timedelta(days=7),
|
|
|
+ Dataset.updated_at <= now
|
|
|
+ ).count(),
|
|
|
+ "within_3_days": base_query.filter(
|
|
|
+ Dataset.updated_at > now - timedelta(days=3)
|
|
|
+ ).count()
|
|
|
+ }
|
|
|
+
|
|
|
+ return stats
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def get_dataset_type_stats(tenant_id):
|
|
|
+
|
|
|
+ sql = """
|
|
|
+ SELECT t.name, t.id, COUNT(*) as count
|
|
|
+ FROM datasets d
|
|
|
+ LEFT JOIN tag_bindings tb ON tb.target_id = d.id
|
|
|
+ LEFT JOIN tags t ON t.id = tb.tag_id
|
|
|
+ WHERE t.type = 'knowledge_category'
|
|
|
+ """
|
|
|
+ params = {}
|
|
|
+
|
|
|
+ if tenant_id:
|
|
|
+ sql += " AND d.tenant_id = :tenant_id"
|
|
|
+ params['tenant_id'] = tenant_id
|
|
|
+
|
|
|
+ sql += " GROUP BY t.name, t.id"
|
|
|
+
|
|
|
+ # 注意这里要用 text(sql)
|
|
|
+ result = db.session.execute(text(sql), params).fetchall()
|
|
|
+
|
|
|
+ total = sum(row.count for row in result)
|
|
|
+ if total == 0:
|
|
|
+ return []
|
|
|
+
|
|
|
+ # 计算百分比并格式化结果
|
|
|
+ stats = []
|
|
|
+ for row in result:
|
|
|
+ percentage = (row.count / total) * 100
|
|
|
+ stats.append({
|
|
|
+ "type": row.name,
|
|
|
+ "percentage": round(percentage, 2)
|
|
|
+ })
|
|
|
+
|
|
|
+ return stats
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
def create_empty_dataset(
|
|
|
tenant_id: str,
|
|
|
name: str,
|