ext_storage.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. import logging
  2. from collections.abc import Callable, Generator, Mapping
  3. from typing import Union
  4. from flask import Flask
  5. from configs import dify_config
  6. from configs.middleware.storage.opendal_storage_config import OpenDALScheme
  7. from dify_app import DifyApp
  8. from extensions.storage.base_storage import BaseStorage
  9. from extensions.storage.storage_type import StorageType
  10. logger = logging.getLogger(__name__)
  11. class Storage:
  12. def init_app(self, app: Flask):
  13. storage_factory = self.get_storage_factory(dify_config.STORAGE_TYPE)
  14. with app.app_context():
  15. self.storage_runner = storage_factory()
  16. @staticmethod
  17. def get_storage_factory(storage_type: str) -> Callable[[], BaseStorage]:
  18. match storage_type:
  19. case StorageType.S3:
  20. from extensions.storage.opendal_storage import OpenDALStorage
  21. kwargs = _load_s3_storage_kwargs()
  22. return lambda: OpenDALStorage(scheme=OpenDALScheme.S3, **kwargs)
  23. case StorageType.OPENDAL:
  24. from extensions.storage.opendal_storage import OpenDALStorage
  25. scheme = OpenDALScheme(dify_config.STORAGE_OPENDAL_SCHEME)
  26. kwargs = _load_opendal_storage_kwargs(scheme)
  27. return lambda: OpenDALStorage(scheme=scheme, **kwargs)
  28. case StorageType.LOCAL:
  29. from extensions.storage.opendal_storage import OpenDALStorage
  30. kwargs = _load_local_storage_kwargs()
  31. return lambda: OpenDALStorage(scheme=OpenDALScheme.FS, **kwargs)
  32. case StorageType.AZURE_BLOB:
  33. from extensions.storage.azure_blob_storage import AzureBlobStorage
  34. return AzureBlobStorage
  35. case StorageType.ALIYUN_OSS:
  36. from extensions.storage.aliyun_oss_storage import AliyunOssStorage
  37. return AliyunOssStorage
  38. case StorageType.GOOGLE_STORAGE:
  39. from extensions.storage.google_cloud_storage import GoogleCloudStorage
  40. return GoogleCloudStorage
  41. case StorageType.TENCENT_COS:
  42. from extensions.storage.tencent_cos_storage import TencentCosStorage
  43. return TencentCosStorage
  44. case StorageType.OCI_STORAGE:
  45. from extensions.storage.oracle_oci_storage import OracleOCIStorage
  46. return OracleOCIStorage
  47. case StorageType.HUAWEI_OBS:
  48. from extensions.storage.huawei_obs_storage import HuaweiObsStorage
  49. return HuaweiObsStorage
  50. case StorageType.BAIDU_OBS:
  51. from extensions.storage.baidu_obs_storage import BaiduObsStorage
  52. return BaiduObsStorage
  53. case StorageType.VOLCENGINE_TOS:
  54. from extensions.storage.volcengine_tos_storage import VolcengineTosStorage
  55. return VolcengineTosStorage
  56. case StorageType.SUPBASE:
  57. from extensions.storage.supabase_storage import SupabaseStorage
  58. return SupabaseStorage
  59. case _:
  60. raise ValueError(f"Unsupported storage type {storage_type}")
  61. def save(self, filename, data):
  62. try:
  63. self.storage_runner.save(filename, data)
  64. except Exception as e:
  65. logger.exception(f"Failed to save file {filename}")
  66. raise e
  67. def load(self, filename: str, /, *, stream: bool = False) -> Union[bytes, Generator]:
  68. try:
  69. if stream:
  70. return self.load_stream(filename)
  71. else:
  72. return self.load_once(filename)
  73. except Exception as e:
  74. logger.exception(f"Failed to load file {filename}")
  75. raise e
  76. def load_once(self, filename: str) -> bytes:
  77. try:
  78. return self.storage_runner.load_once(filename)
  79. except Exception as e:
  80. logger.exception(f"Failed to load_once file {filename}")
  81. raise e
  82. def load_stream(self, filename: str) -> Generator:
  83. try:
  84. return self.storage_runner.load_stream(filename)
  85. except Exception as e:
  86. logger.exception(f"Failed to load_stream file {filename}")
  87. raise e
  88. def download(self, filename, target_filepath):
  89. try:
  90. self.storage_runner.download(filename, target_filepath)
  91. except Exception as e:
  92. logger.exception(f"Failed to download file {filename}")
  93. raise e
  94. def exists(self, filename):
  95. try:
  96. return self.storage_runner.exists(filename)
  97. except Exception as e:
  98. logger.exception(f"Failed to check file exists {filename}")
  99. raise e
  100. def delete(self, filename):
  101. try:
  102. return self.storage_runner.delete(filename)
  103. except Exception as e:
  104. logger.exception(f"Failed to delete file {filename}")
  105. raise e
  106. def _load_s3_storage_kwargs() -> Mapping[str, str]:
  107. """
  108. Load the kwargs for S3 storage based on dify_config.
  109. Handles special cases like AWS managed IAM and R2.
  110. """
  111. kwargs = {
  112. "root": "/",
  113. "bucket": dify_config.S3_BUCKET_NAME,
  114. "endpoint": dify_config.S3_ENDPOINT,
  115. "access_key_id": dify_config.S3_ACCESS_KEY,
  116. "secret_access_key": dify_config.S3_SECRET_KEY,
  117. "region": dify_config.S3_REGION,
  118. }
  119. kwargs = {k: v for k, v in kwargs.items() if isinstance(v, str)}
  120. # For AWS managed IAM
  121. if dify_config.S3_USE_AWS_MANAGED_IAM:
  122. from extensions.storage.opendal_storage import S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS
  123. logger.debug("Using AWS managed IAM role for S3")
  124. kwargs = {**kwargs, **{k: v for k, v in S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS.items() if k not in kwargs}}
  125. # For Cloudflare R2
  126. if kwargs.get("endpoint"):
  127. from extensions.storage.opendal_storage import S3_R2_COMPATIBLE_KWARGS, is_r2_endpoint
  128. if is_r2_endpoint(kwargs["endpoint"]):
  129. logger.debug("Using R2 for OpenDAL S3")
  130. kwargs = {**kwargs, **{k: v for k, v in S3_R2_COMPATIBLE_KWARGS.items() if k not in kwargs}}
  131. return kwargs
  132. def _load_local_storage_kwargs() -> Mapping[str, str]:
  133. """
  134. Load the kwargs for local storage based on dify_config.
  135. """
  136. return {
  137. "root": dify_config.STORAGE_LOCAL_PATH,
  138. }
  139. def _load_opendal_storage_kwargs(scheme: OpenDALScheme) -> Mapping[str, str]:
  140. """
  141. Load the kwargs for OpenDAL storage based on the given scheme.
  142. """
  143. match scheme:
  144. case OpenDALScheme.FS:
  145. kwargs = {
  146. "root": dify_config.OPENDAL_FS_ROOT,
  147. }
  148. case OpenDALScheme.S3:
  149. # Load OpenDAL S3-related configs
  150. kwargs = {
  151. "root": dify_config.OPENDAL_S3_ROOT,
  152. "bucket": dify_config.OPENDAL_S3_BUCKET,
  153. "endpoint": dify_config.OPENDAL_S3_ENDPOINT,
  154. "access_key_id": dify_config.OPENDAL_S3_ACCESS_KEY_ID,
  155. "secret_access_key": dify_config.OPENDAL_S3_SECRET_ACCESS_KEY,
  156. "region": dify_config.OPENDAL_S3_REGION,
  157. }
  158. # For Cloudflare R2
  159. if kwargs.get("endpoint"):
  160. from extensions.storage.opendal_storage import S3_R2_COMPATIBLE_KWARGS, is_r2_endpoint
  161. if is_r2_endpoint(kwargs["endpoint"]):
  162. logger.debug("Using R2 for OpenDAL S3")
  163. kwargs = {**kwargs, **{k: v for k, v in S3_R2_COMPATIBLE_KWARGS.items() if k not in kwargs}}
  164. case _:
  165. logger.warning(f"Unrecognized OpenDAL scheme: {scheme}, will fall back to default.")
  166. kwargs = {}
  167. return kwargs
  168. storage = Storage()
  169. def init_app(app: DifyApp):
  170. storage.init_app(app)