audio_service.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. import io
  2. from core.model_manager import ModelManager
  3. from core.model_runtime.entities.model_entities import ModelType
  4. from services.errors.audio import (AudioTooLargeServiceError, NoAudioUploadedServiceError,
  5. ProviderNotSupportSpeechToTextServiceError, UnsupportedAudioTypeServiceError)
  6. from werkzeug.datastructures import FileStorage
  7. FILE_SIZE = 15
  8. FILE_SIZE_LIMIT = FILE_SIZE * 1024 * 1024
  9. ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']
  10. class AudioService:
  11. @classmethod
  12. def transcript(cls, tenant_id: str, file: FileStorage):
  13. if file is None:
  14. raise NoAudioUploadedServiceError()
  15. extension = file.mimetype
  16. if extension not in [f'audio/{ext}' for ext in ALLOWED_EXTENSIONS]:
  17. raise UnsupportedAudioTypeServiceError()
  18. file_content = file.read()
  19. file_size = len(file_content)
  20. if file_size > FILE_SIZE_LIMIT:
  21. message = f"Audio size larger than {FILE_SIZE} mb"
  22. raise AudioTooLargeServiceError(message)
  23. model_manager = ModelManager()
  24. model_instance = model_manager.get_default_model_instance(
  25. tenant_id=tenant_id,
  26. model_type=ModelType.SPEECH2TEXT
  27. )
  28. buffer = io.BytesIO(file_content)
  29. buffer.name = 'temp.mp3'
  30. return {"text": model_instance.invoke_speech2text(buffer)}