| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149 | import datetimeimport hashlibimport tempfileimport chardetimport timeimport uuidfrom pathlib import Pathfrom cachetools import TTLCachefrom flask import request, current_appfrom flask_login import current_userfrom core.login.login import login_requiredfrom flask_restful import Resource, marshal_with, fieldsfrom werkzeug.exceptions import NotFoundfrom controllers.console import apifrom controllers.console.datasets.error import NoFileUploadedError, TooManyFilesError, FileTooLargeError, \    UnsupportedFileTypeErrorfrom controllers.console.setup import setup_requiredfrom controllers.console.wraps import account_initialization_requiredfrom core.data_loader.file_extractor import FileExtractorfrom extensions.ext_storage import storagefrom libs.helper import TimestampFieldfrom extensions.ext_database import dbfrom models.model import UploadFilecache = TTLCache(maxsize=None, ttl=30)ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx']PREVIEW_WORDS_LIMIT = 3000class FileApi(Resource):    upload_config_fields = {        'file_size_limit': fields.Integer,        'batch_count_limit': fields.Integer    }    @setup_required    @login_required    @account_initialization_required    @marshal_with(upload_config_fields)    def get(self):        file_size_limit = current_app.config.get("UPLOAD_FILE_SIZE_LIMIT")        batch_count_limit = current_app.config.get("UPLOAD_FILE_BATCH_LIMIT")        return {            'file_size_limit': file_size_limit,            'batch_count_limit': batch_count_limit        }, 200    file_fields = {        'id': fields.String,        'name': fields.String,        'size': fields.Integer,        'extension': fields.String,        'mime_type': fields.String,        'created_by': fields.String,        'created_at': TimestampField,    }    @setup_required    @login_required    @account_initialization_required    @marshal_with(file_fields)    def post(self):        # get file from request        file = request.files['file']        # check file        if 'file' not in request.files:            raise NoFileUploadedError()        if len(request.files) > 1:            raise TooManyFilesError()        file_content = file.read()        file_size = len(file_content)        file_size_limit = current_app.config.get("UPLOAD_FILE_SIZE_LIMIT") * 1024 * 1024        if file_size > file_size_limit:            message = "({file_size} > {file_size_limit})"            raise FileTooLargeError(message)        extension = file.filename.split('.')[-1]        if extension not in ALLOWED_EXTENSIONS:            raise UnsupportedFileTypeError()        # user uuid as file name        file_uuid = str(uuid.uuid4())        file_key = 'upload_files/' + current_user.current_tenant_id + '/' + file_uuid + '.' + extension        # save file to storage        storage.save(file_key, file_content)        # save file to db        config = current_app.config        upload_file = UploadFile(            tenant_id=current_user.current_tenant_id,            storage_type=config['STORAGE_TYPE'],            key=file_key,            name=file.filename,            size=file_size,            extension=extension,            mime_type=file.mimetype,            created_by=current_user.id,            created_at=datetime.datetime.utcnow(),            used=False,            hash=hashlib.sha3_256(file_content).hexdigest()        )        db.session.add(upload_file)        db.session.commit()        return upload_file, 201class FilePreviewApi(Resource):    @setup_required    @login_required    @account_initialization_required    def get(self, file_id):        file_id = str(file_id)        key = file_id + request.path        cached_response = cache.get(key)        if cached_response and time.time() - cached_response['timestamp'] < cache.ttl:            return cached_response['response']        upload_file = db.session.query(UploadFile) \            .filter(UploadFile.id == file_id) \            .first()        if not upload_file:            raise NotFound("File not found")        # extract text from file        extension = upload_file.extension        if extension not in ALLOWED_EXTENSIONS:            raise UnsupportedFileTypeError()        text = FileExtractor.load(upload_file, return_text=True)        text = text[0:PREVIEW_WORDS_LIMIT] if text else ''        return {'content': text}api.add_resource(FileApi, '/files/upload')api.add_resource(FilePreviewApi, '/files/<uuid:file_id>/preview')
 |