| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134 | import datetimeimport uuidfrom flask import current_appfrom flask_restful import reqparsefrom werkzeug.exceptions import NotFoundimport services.dataset_servicefrom controllers.service_api import apifrom controllers.service_api.app.error import ProviderNotInitializeErrorfrom controllers.service_api.dataset.error import ArchivedDocumentImmutableError, DocumentIndexingError, \    DatasetNotInitedErrorfrom controllers.service_api.wraps import DatasetApiResourcefrom core.model_providers.error import ProviderTokenNotInitErrorfrom extensions.ext_database import dbfrom extensions.ext_storage import storagefrom models.model import UploadFilefrom services.dataset_service import DocumentServiceclass DocumentListApi(DatasetApiResource):    """Resource for documents."""    def post(self, dataset):        """Create document."""        parser = reqparse.RequestParser()        parser.add_argument('name', type=str, required=True, nullable=False, location='json')        parser.add_argument('text', type=str, required=True, nullable=False, location='json')        parser.add_argument('doc_type', type=str, location='json')        parser.add_argument('doc_metadata', type=dict, location='json')        args = parser.parse_args()        if not dataset.indexing_technique:            raise DatasetNotInitedError("Dataset indexing technique must be set.")        doc_type = args.get('doc_type')        doc_metadata = args.get('doc_metadata')        if doc_type and doc_type not in DocumentService.DOCUMENT_METADATA_SCHEMA:            raise ValueError('Invalid doc_type.')        # user uuid as file name        file_uuid = str(uuid.uuid4())        file_key = 'upload_files/' + dataset.tenant_id + '/' + file_uuid + '.txt'        # save file to storage        storage.save(file_key, args.get('text'))        # save file to db        config = current_app.config        upload_file = UploadFile(            tenant_id=dataset.tenant_id,            storage_type=config['STORAGE_TYPE'],            key=file_key,            name=args.get('name') + '.txt',            size=len(args.get('text')),            extension='txt',            mime_type='text/plain',            created_by=dataset.created_by,            created_at=datetime.datetime.utcnow(),            used=True,            used_by=dataset.created_by,            used_at=datetime.datetime.utcnow()        )        db.session.add(upload_file)        db.session.commit()        document_data = {            'data_source': {                'type': 'upload_file',                'info': [                    {                        'upload_file_id': upload_file.id                    }                ]            }        }        try:            documents, batch = DocumentService.save_document_with_dataset_id(                dataset=dataset,                document_data=document_data,                account=dataset.created_by_account,                dataset_process_rule=dataset.latest_process_rule,                created_from='api'            )        except ProviderTokenNotInitError as ex:            raise ProviderNotInitializeError(ex.description)        document = documents[0]        if doc_type and doc_metadata:            metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[doc_type]            document.doc_metadata = {}            for key, value_type in metadata_schema.items():                value = doc_metadata.get(key)                if value is not None and isinstance(value, value_type):                    document.doc_metadata[key] = value            document.doc_type = doc_type            document.updated_at = datetime.datetime.utcnow()            db.session.commit()        return {'id': document.id}class DocumentApi(DatasetApiResource):    def delete(self, dataset, document_id):        """Delete document."""        document_id = str(document_id)        document = DocumentService.get_document(dataset.id, document_id)        # 404 if document not found        if document is None:            raise NotFound("Document Not Exists.")        # 403 if document is archived        if DocumentService.check_archived(document):            raise ArchivedDocumentImmutableError()        try:            # delete document            DocumentService.delete_document(document)        except services.errors.document.DocumentIndexingError:            raise DocumentIndexingError('Cannot delete document during indexing.')        return {'result': 'success'}, 204api.add_resource(DocumentListApi, '/documents')api.add_resource(DocumentApi, '/documents/<uuid:document_id>')
 |