123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224 |
- import base64
- import hashlib
- import hmac
- import logging
- import os
- import time
- from mimetypes import guess_extension, guess_type
- from typing import Optional, Union
- from uuid import uuid4
- import httpx
- from configs import dify_config
- from core.helper import ssrf_proxy
- from extensions.ext_database import db
- from extensions.ext_storage import storage
- from models.model import MessageFile
- from models.tools import ToolFile
- logger = logging.getLogger(__name__)
- class ToolFileManager:
- @staticmethod
- def sign_file(tool_file_id: str, extension: str) -> str:
- """
- sign file to get a temporary url
- """
- base_url = dify_config.FILES_URL
- file_preview_url = f"{base_url}/files/tools/{tool_file_id}{extension}"
- timestamp = str(int(time.time()))
- nonce = os.urandom(16).hex()
- data_to_sign = f"file-preview|{tool_file_id}|{timestamp}|{nonce}"
- secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
- sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
- encoded_sign = base64.urlsafe_b64encode(sign).decode()
- return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
- @staticmethod
- def verify_file(file_id: str, timestamp: str, nonce: str, sign: str) -> bool:
- """
- verify signature
- """
- data_to_sign = f"file-preview|{file_id}|{timestamp}|{nonce}"
- secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
- recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
- recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
- # verify signature
- if sign != recalculated_encoded_sign:
- return False
- current_time = int(time.time())
- return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT
- @staticmethod
- def create_file_by_raw(
- *,
- user_id: str,
- tenant_id: str,
- conversation_id: Optional[str],
- file_binary: bytes,
- mimetype: str,
- ) -> ToolFile:
- extension = guess_extension(mimetype) or ".bin"
- unique_name = uuid4().hex
- filename = f"{unique_name}{extension}"
- filepath = f"tools/{tenant_id}/{filename}"
- storage.save(filepath, file_binary)
- tool_file = ToolFile(
- user_id=user_id,
- tenant_id=tenant_id,
- conversation_id=conversation_id,
- file_key=filepath,
- mimetype=mimetype,
- name=filename,
- size=len(file_binary),
- )
- db.session.add(tool_file)
- db.session.commit()
- db.session.refresh(tool_file)
- return tool_file
- @staticmethod
- def create_file_by_url(
- user_id: str,
- tenant_id: str,
- file_url: str,
- conversation_id: Optional[str] = None,
- ) -> ToolFile:
- # try to download image
- try:
- response = ssrf_proxy.get(file_url)
- response.raise_for_status()
- blob = response.content
- except httpx.TimeoutException:
- raise ValueError(f"timeout when downloading file from {file_url}")
- mimetype = guess_type(file_url)[0] or "octet/stream"
- extension = guess_extension(mimetype) or ".bin"
- unique_name = uuid4().hex
- filename = f"{unique_name}{extension}"
- filepath = f"tools/{tenant_id}/{filename}"
- storage.save(filepath, blob)
- tool_file = ToolFile(
- user_id=user_id,
- tenant_id=tenant_id,
- conversation_id=conversation_id,
- file_key=filepath,
- mimetype=mimetype,
- original_url=file_url,
- name=filename,
- size=len(blob),
- )
- db.session.add(tool_file)
- db.session.commit()
- return tool_file
- @staticmethod
- def get_file_binary(id: str) -> Union[tuple[bytes, str], None]:
- """
- get file binary
- :param id: the id of the file
- :return: the binary of the file, mime type
- """
- tool_file: ToolFile | None = (
- db.session.query(ToolFile)
- .filter(
- ToolFile.id == id,
- )
- .first()
- )
- if not tool_file:
- return None
- blob = storage.load_once(tool_file.file_key)
- return blob, tool_file.mimetype
- @staticmethod
- def get_file_binary_by_message_file_id(id: str) -> Union[tuple[bytes, str], None]:
- """
- get file binary
- :param id: the id of the file
- :return: the binary of the file, mime type
- """
- message_file: MessageFile | None = (
- db.session.query(MessageFile)
- .filter(
- MessageFile.id == id,
- )
- .first()
- )
- # Check if message_file is not None
- if message_file is not None:
- # get tool file id
- if message_file.url is not None:
- tool_file_id = message_file.url.split("/")[-1]
- # trim extension
- tool_file_id = tool_file_id.split(".")[0]
- else:
- tool_file_id = None
- else:
- tool_file_id = None
- tool_file: ToolFile | None = (
- db.session.query(ToolFile)
- .filter(
- ToolFile.id == tool_file_id,
- )
- .first()
- )
- if not tool_file:
- return None
- blob = storage.load_once(tool_file.file_key)
- return blob, tool_file.mimetype
- @staticmethod
- def get_file_generator_by_tool_file_id(tool_file_id: str):
- """
- get file binary
- :param tool_file_id: the id of the tool file
- :return: the binary of the file, mime type
- """
- tool_file: ToolFile | None = (
- db.session.query(ToolFile)
- .filter(
- ToolFile.id == tool_file_id,
- )
- .first()
- )
- if not tool_file:
- return None, None
- stream = storage.load_stream(tool_file.file_key)
- return stream, tool_file
- # init tool_file_parser
- from core.file.tool_file_parser import tool_file_manager
- tool_file_manager["manager"] = ToolFileManager
|