opendal_storage.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. from collections.abc import Generator
  2. from pathlib import Path
  3. from urllib.parse import urlparse
  4. import opendal
  5. from configs.middleware.storage.opendal_storage_config import OpenDALScheme
  6. from extensions.storage.base_storage import BaseStorage
  7. S3_R2_HOSTNAME = "r2.cloudflarestorage.com"
  8. S3_R2_COMPATIBLE_KWARGS = {
  9. "delete_max_size": "700",
  10. "disable_stat_with_override": "true",
  11. "region": "auto",
  12. }
  13. S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS = {
  14. "server_side_encryption": "aws:kms",
  15. }
  16. def is_r2_endpoint(endpoint: str) -> bool:
  17. if not endpoint:
  18. return False
  19. parsed_url = urlparse(endpoint)
  20. return bool(parsed_url.hostname and parsed_url.hostname.endswith(S3_R2_HOSTNAME))
  21. class OpenDALStorage(BaseStorage):
  22. def __init__(self, scheme: OpenDALScheme, **kwargs):
  23. if scheme == OpenDALScheme.FS:
  24. Path(kwargs["root"]).mkdir(parents=True, exist_ok=True)
  25. self.op = opendal.Operator(scheme=scheme, **kwargs)
  26. def save(self, filename: str, data: bytes) -> None:
  27. self.op.write(path=filename, bs=data)
  28. def load_once(self, filename: str) -> bytes:
  29. if not self.exists(filename):
  30. raise FileNotFoundError("File not found")
  31. return self.op.read(path=filename)
  32. def load_stream(self, filename: str) -> Generator:
  33. if not self.exists(filename):
  34. raise FileNotFoundError("File not found")
  35. batch_size = 4096
  36. file = self.op.open(path=filename, mode="rb")
  37. while chunk := file.read(batch_size):
  38. yield chunk
  39. def download(self, filename: str, target_filepath: str):
  40. if not self.exists(filename):
  41. raise FileNotFoundError("File not found")
  42. with Path(target_filepath).open("wb") as f:
  43. f.write(self.op.read(path=filename))
  44. def exists(self, filename: str) -> bool:
  45. # FIXME this is a workaround for opendal python-binding do not have a exists method and no better
  46. # error handler here when opendal python-binding has a exists method, we should use it
  47. # more https://github.com/apache/opendal/blob/main/bindings/python/src/operator.rs
  48. try:
  49. return self.op.stat(path=filename).mode.is_file()
  50. except Exception as e:
  51. return False
  52. def delete(self, filename: str):
  53. if self.exists(filename):
  54. self.op.delete(path=filename)