tool_file_manager.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. import base64
  2. import hashlib
  3. import hmac
  4. import logging
  5. import os
  6. import time
  7. from collections.abc import Generator
  8. from mimetypes import guess_extension, guess_type
  9. from typing import Optional, Union
  10. from uuid import uuid4
  11. from httpx import get
  12. from configs import dify_config
  13. from extensions.ext_database import db
  14. from extensions.ext_storage import storage
  15. from models.model import MessageFile
  16. from models.tools import ToolFile
  17. logger = logging.getLogger(__name__)
  18. class ToolFileManager:
  19. @staticmethod
  20. def sign_file(tool_file_id: str, extension: str) -> str:
  21. """
  22. sign file to get a temporary url
  23. """
  24. base_url = dify_config.FILES_URL
  25. file_preview_url = f'{base_url}/files/tools/{tool_file_id}{extension}'
  26. timestamp = str(int(time.time()))
  27. nonce = os.urandom(16).hex()
  28. data_to_sign = f'file-preview|{tool_file_id}|{timestamp}|{nonce}'
  29. secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b''
  30. sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
  31. encoded_sign = base64.urlsafe_b64encode(sign).decode()
  32. return f'{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}'
  33. @staticmethod
  34. def verify_file(file_id: str, timestamp: str, nonce: str, sign: str) -> bool:
  35. """
  36. verify signature
  37. """
  38. data_to_sign = f'file-preview|{file_id}|{timestamp}|{nonce}'
  39. secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b''
  40. recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
  41. recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
  42. # verify signature
  43. if sign != recalculated_encoded_sign:
  44. return False
  45. current_time = int(time.time())
  46. return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT
  47. @staticmethod
  48. def create_file_by_raw(
  49. user_id: str, tenant_id: str, conversation_id: Optional[str], file_binary: bytes, mimetype: str
  50. ) -> ToolFile:
  51. """
  52. create file
  53. """
  54. extension = guess_extension(mimetype) or '.bin'
  55. unique_name = uuid4().hex
  56. filename = f'tools/{tenant_id}/{unique_name}{extension}'
  57. storage.save(filename, file_binary)
  58. tool_file = ToolFile(
  59. user_id=user_id, tenant_id=tenant_id, conversation_id=conversation_id, file_key=filename, mimetype=mimetype
  60. )
  61. db.session.add(tool_file)
  62. db.session.commit()
  63. return tool_file
  64. @staticmethod
  65. def create_file_by_url(
  66. user_id: str,
  67. tenant_id: str,
  68. conversation_id: str,
  69. file_url: str,
  70. ) -> ToolFile:
  71. """
  72. create file
  73. """
  74. # try to download image
  75. response = get(file_url)
  76. response.raise_for_status()
  77. blob = response.content
  78. mimetype = guess_type(file_url)[0] or 'octet/stream'
  79. extension = guess_extension(mimetype) or '.bin'
  80. unique_name = uuid4().hex
  81. filename = f'tools/{tenant_id}/{unique_name}{extension}'
  82. storage.save(filename, blob)
  83. tool_file = ToolFile(
  84. user_id=user_id,
  85. tenant_id=tenant_id,
  86. conversation_id=conversation_id,
  87. file_key=filename,
  88. mimetype=mimetype,
  89. original_url=file_url,
  90. )
  91. db.session.add(tool_file)
  92. db.session.commit()
  93. return tool_file
  94. @staticmethod
  95. def create_file_by_key(
  96. user_id: str, tenant_id: str, conversation_id: str, file_key: str, mimetype: str
  97. ) -> ToolFile:
  98. """
  99. create file
  100. """
  101. tool_file = ToolFile(
  102. user_id=user_id, tenant_id=tenant_id, conversation_id=conversation_id, file_key=file_key, mimetype=mimetype
  103. )
  104. return tool_file
  105. @staticmethod
  106. def get_file_binary(id: str) -> Union[tuple[bytes, str], None]:
  107. """
  108. get file binary
  109. :param id: the id of the file
  110. :return: the binary of the file, mime type
  111. """
  112. tool_file: ToolFile = (
  113. db.session.query(ToolFile)
  114. .filter(
  115. ToolFile.id == id,
  116. )
  117. .first()
  118. )
  119. if not tool_file:
  120. return None
  121. blob = storage.load_once(tool_file.file_key)
  122. return blob, tool_file.mimetype
  123. @staticmethod
  124. def get_file_binary_by_message_file_id(id: str) -> Union[tuple[bytes, str], None]:
  125. """
  126. get file binary
  127. :param id: the id of the file
  128. :return: the binary of the file, mime type
  129. """
  130. message_file: MessageFile = (
  131. db.session.query(MessageFile)
  132. .filter(
  133. MessageFile.id == id,
  134. )
  135. .first()
  136. )
  137. # Check if message_file is not None
  138. if message_file is not None:
  139. # get tool file id
  140. tool_file_id = message_file.url.split('/')[-1]
  141. # trim extension
  142. tool_file_id = tool_file_id.split('.')[0]
  143. else:
  144. tool_file_id = None
  145. tool_file: ToolFile = (
  146. db.session.query(ToolFile)
  147. .filter(
  148. ToolFile.id == tool_file_id,
  149. )
  150. .first()
  151. )
  152. if not tool_file:
  153. return None
  154. blob = storage.load_once(tool_file.file_key)
  155. return blob, tool_file.mimetype
  156. @staticmethod
  157. def get_file_generator_by_tool_file_id(tool_file_id: str) -> Union[tuple[Generator, str], None]:
  158. """
  159. get file binary
  160. :param tool_file_id: the id of the tool file
  161. :return: the binary of the file, mime type
  162. """
  163. tool_file: ToolFile = (
  164. db.session.query(ToolFile)
  165. .filter(
  166. ToolFile.id == tool_file_id,
  167. )
  168. .first()
  169. )
  170. if not tool_file:
  171. return None
  172. generator = storage.load_stream(tool_file.file_key)
  173. return generator, tool_file.mimetype
  174. # init tool_file_parser
  175. from core.file.tool_file_parser import tool_file_manager
  176. tool_file_manager['manager'] = ToolFileManager