| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 | import loggingfrom collections.abc import Generatorimport boto3  # type: ignorefrom botocore.client import Config  # type: ignorefrom botocore.exceptions import ClientError  # type: ignorefrom configs import dify_configfrom extensions.storage.base_storage import BaseStoragelogger = logging.getLogger(__name__)class AwsS3Storage(BaseStorage):    """Implementation for Amazon Web Services S3 storage."""    def __init__(self):        super().__init__()        self.bucket_name = dify_config.S3_BUCKET_NAME        if dify_config.S3_USE_AWS_MANAGED_IAM:            logger.info("Using AWS managed IAM role for S3")            session = boto3.Session()            region_name = dify_config.S3_REGION            self.client = session.client(service_name="s3", region_name=region_name)        else:            logger.info("Using ak and sk for S3")            self.client = boto3.client(                "s3",                aws_secret_access_key=dify_config.S3_SECRET_KEY,                aws_access_key_id=dify_config.S3_ACCESS_KEY,                endpoint_url=dify_config.S3_ENDPOINT,                region_name=dify_config.S3_REGION,                config=Config(                    s3={"addressing_style": dify_config.S3_ADDRESS_STYLE},                    request_checksum_calculation="when_required",                    response_checksum_validation="when_required",                ),            )        # create bucket        try:            self.client.head_bucket(Bucket=self.bucket_name)        except ClientError as e:            # if bucket not exists, create it            if e.response["Error"]["Code"] == "404":                self.client.create_bucket(Bucket=self.bucket_name)            # if bucket is not accessible, pass, maybe the bucket is existing but not accessible            elif e.response["Error"]["Code"] == "403":                pass            else:                # other error, raise exception                raise    def save(self, filename, data):        self.client.put_object(Bucket=self.bucket_name, Key=filename, Body=data)    def load_once(self, filename: str) -> bytes:        try:            data: bytes = self.client.get_object(Bucket=self.bucket_name, Key=filename)["Body"].read()        except ClientError as ex:            if ex.response["Error"]["Code"] == "NoSuchKey":                raise FileNotFoundError("File not found")            else:                raise        return data    def load_stream(self, filename: str) -> Generator:        try:            response = self.client.get_object(Bucket=self.bucket_name, Key=filename)            yield from response["Body"].iter_chunks()        except ClientError as ex:            if ex.response["Error"]["Code"] == "NoSuchKey":                raise FileNotFoundError("file not found")            elif "reached max retries" in str(ex):                raise ValueError("please do not request the same file too frequently")            else:                raise    def download(self, filename, target_filepath):        self.client.download_file(self.bucket_name, filename, target_filepath)    def exists(self, filename):        try:            self.client.head_object(Bucket=self.bucket_name, Key=filename)            return True        except:            return False    def delete(self, filename):        self.client.delete_object(Bucket=self.bucket_name, Key=filename)
 |