瀏覽代碼

知识总览览新增

zhouyuexiang 3 月之前
父節點
當前提交
13bcc55b41
共有 3 個文件被更改,包括 178 次插入0 次删除
  1. 54 0
      api/controllers/console/datasets/datasets.py
  2. 114 0
      api/services/dataset_service.py
  3. 10 0
      api/services/tag_service.py

+ 54 - 0
api/controllers/console/datasets/datasets.py

@@ -768,6 +768,57 @@ class DatasetAutoDisableLogApi(Resource):
         return DatasetService.get_dataset_auto_disable_logs(dataset_id_str), 200
 
 
+class DatasetCountApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self):
+        # tenant_id = current_user.current_tenant_id
+        tenant_id = request.args.get("tenant_id", default=None, type=str)
+        datasets_count = DatasetService.get_datasets_count(tenant_id, current_user)
+        tags_count = DatasetService.get_tags_count(tenant_id)
+        response = {
+            'datasets_count': datasets_count,
+            'tags_count': tags_count
+        }
+        return {"data": response}, 200
+
+
+class DatasetUpdateStatsApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self):
+        """Get dataset update statistics."""
+        tenant_id = request.args.get("tenant_id", default=None, type=str)
+        stats = DatasetService.get_dataset_update_stats(tenant_id)
+
+        # 转换为前端需要的格式
+        response = {
+            "data": [
+                {"period": "半年以上", "count": stats["over_180_days"]},
+                {"period": "半年以内", "count": stats["within_180_days"]},
+                {"period": "90天内", "count": stats["within_90_days"]},
+                {"period": "30天内", "count": stats["within_30_days"]},
+                {"period": "7天内", "count": stats["within_7_days"]},
+                {"period": "3天内", "count": stats["within_3_days"]}
+            ]
+        }
+        return response, 200
+
+
+class DatasetTypeStatsApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self):
+        tenant_id = current_user.current_tenant_id
+        response = DatasetService.get_dataset_type_stats(tenant_id)
+        return {
+            "data": response
+        }, 200
+
+
 api.add_resource(DatasetListApi, "/datasets")
 api.add_resource(DatasetApi, "/datasets/<uuid:dataset_id>")
 api.add_resource(DatasetUseCheckApi, "/datasets/<uuid:dataset_id>/use-check")
@@ -783,3 +834,6 @@ api.add_resource(DatasetRetrievalSettingApi, "/datasets/retrieval-setting")
 api.add_resource(DatasetRetrievalSettingMockApi, "/datasets/retrieval-setting/<string:vector_type>")
 api.add_resource(DatasetPermissionUserListApi, "/datasets/<uuid:dataset_id>/permission-part-users")
 api.add_resource(DatasetAutoDisableLogApi, "/datasets/<uuid:dataset_id>/auto-disable-logs")
+api.add_resource(DatasetCountApi, '/datasets/count')
+api.add_resource(DatasetUpdateStatsApi, '/datasets/update-stats')
+api.add_resource(DatasetTypeStatsApi, '/datasets/type-stats')

+ 114 - 0
api/services/dataset_service.py

@@ -12,6 +12,7 @@ from flask_login import current_user  # type: ignore
 from sqlalchemy import func
 from sqlalchemy.orm import Session
 from werkzeug.exceptions import NotFound
+from sqlalchemy import text
 
 from configs import dify_config
 from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
@@ -171,6 +172,119 @@ class DatasetService:
         return datasets.items, datasets.total
 
     @staticmethod
+    def get_datasets_count(tenant_id: str, user) -> int:
+        query = Dataset.query
+        if tenant_id:
+            query = query.filter(Dataset.tenant_id == tenant_id)
+
+        if user:
+            # Get dataset IDs that the user has permission to access
+            dataset_permission = DatasetPermission.query.filter_by(account_id=user.id, tenant_id=tenant_id).all()
+            permitted_dataset_ids = {dp.dataset_id for dp in dataset_permission} if dataset_permission else None
+
+            if user.current_role == TenantAccountRole.DATASET_OPERATOR:
+                # Only show datasets that the user has permission to access
+                if permitted_dataset_ids:
+                    query = query.filter(Dataset.id.in_(permitted_dataset_ids))
+                else:
+                    return 0
+            else:
+                if user.current_role != TenantAccountRole.OWNER:
+                    # Show all datasets that the user has permission to access
+                    if permitted_dataset_ids:
+                        query = query.filter(
+                            db.or_(
+                                Dataset.id.in_(permitted_dataset_ids),
+                                Dataset.permission == DatasetPermissionEnum.ALL_TEAM
+                            )
+                        )
+
+        return query.count()
+
+    @staticmethod
+    def get_tags_count(tenant_id: str, ) -> int:
+        return TagService.get_tags_count(tenant_id)
+
+    @staticmethod
+    def count_datasets(tenant_id=None):
+        query = Dataset.query
+        if tenant_id:
+            query = query.filter(Dataset.tenant_id == tenant_id)
+        return query.count()
+
+    @staticmethod
+    def get_dataset_update_stats(tenant_id):
+        """Get dataset update statistics."""
+        from datetime import datetime, timedelta
+
+        now = datetime.utcnow()
+        base_query = Dataset.query
+        if tenant_id:
+            base_query = base_query.filter(Dataset.tenant_id == tenant_id)
+
+        # 统计不同时间段内更新的数据集数量
+        stats = {
+            "over_180_days": base_query.filter(Dataset.updated_at <= now - timedelta(days=180)).count(),
+            "within_180_days": base_query.filter(
+                Dataset.updated_at > now - timedelta(days=180),
+                Dataset.updated_at <= now
+            ).count(),
+            "within_90_days": base_query.filter(
+                Dataset.updated_at > now - timedelta(days=90),
+                Dataset.updated_at <= now
+            ).count(),
+            "within_30_days": base_query.filter(
+                Dataset.updated_at > now - timedelta(days=30),
+                Dataset.updated_at <= now
+            ).count(),
+            "within_7_days": base_query.filter(
+                Dataset.updated_at > now - timedelta(days=7),
+                Dataset.updated_at <= now
+            ).count(),
+            "within_3_days": base_query.filter(
+                Dataset.updated_at > now - timedelta(days=3)
+            ).count()
+        }
+
+        return stats
+
+    @staticmethod
+    def get_dataset_type_stats(tenant_id):
+
+        sql = """
+             SELECT t.name, t.id, COUNT(*) as count 
+             FROM datasets d
+             LEFT JOIN tag_bindings tb ON tb.target_id = d.id
+             LEFT JOIN tags t ON t.id = tb.tag_id
+             WHERE t.type = 'knowledge_category'
+         """
+        params = {}
+
+        if tenant_id:
+            sql += " AND d.tenant_id = :tenant_id"
+            params['tenant_id'] = tenant_id
+
+        sql += " GROUP BY t.name, t.id"
+
+        # 注意这里要用 text(sql)
+        result = db.session.execute(text(sql), params).fetchall()
+
+        total = sum(row.count for row in result)
+        if total == 0:
+            return []
+
+        # 计算百分比并格式化结果
+        stats = []
+        for row in result:
+            percentage = (row.count / total) * 100
+            stats.append({
+                "type": row.name,
+                "percentage": round(percentage, 2)
+            })
+
+        return stats
+
+    @staticmethod
     def create_empty_dataset(
         tenant_id: str,
         name: str,

+ 10 - 0
api/services/tag_service.py

@@ -52,6 +52,16 @@ class TagService:
         return results
 
     @staticmethod
+    def get_tags_count(tenant_id: str, keyword: Optional[str] = None) -> int:
+        query = db.session.query(Tag).filter(Tag.type == "knowledge")
+        if tenant_id:
+            query = query.filter(Tag.tenant_id == tenant_id)
+        if keyword:
+            query = query.filter(Tag.name.ilike(f"%{keyword}%"))
+
+        return query.count()
+
+    @staticmethod
     def get_tags_by_target_id(tag_type: str, current_tenant_id: str, target_id: str) -> list:
         tags = (
             db.session.query(Tag)