audio_service.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. import io
  2. from werkzeug.datastructures import FileStorage
  3. from core.model_manager import ModelManager
  4. from core.model_runtime.entities.model_entities import ModelType
  5. from services.errors.audio import NoAudioUploadedServiceError, AudioTooLargeServiceError, UnsupportedAudioTypeServiceError, ProviderNotSupportSpeechToTextServiceError
  6. FILE_SIZE = 15
  7. FILE_SIZE_LIMIT = FILE_SIZE * 1024 * 1024
  8. ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']
  9. class AudioService:
  10. @classmethod
  11. def transcript(cls, tenant_id: str, file: FileStorage):
  12. if file is None:
  13. raise NoAudioUploadedServiceError()
  14. extension = file.mimetype
  15. if extension not in [f'audio/{ext}' for ext in ALLOWED_EXTENSIONS]:
  16. raise UnsupportedAudioTypeServiceError()
  17. file_content = file.read()
  18. file_size = len(file_content)
  19. if file_size > FILE_SIZE_LIMIT:
  20. message = f"Audio size larger than {FILE_SIZE} mb"
  21. raise AudioTooLargeServiceError(message)
  22. model_manager = ModelManager()
  23. model_instance = model_manager.get_default_model_instance(
  24. tenant_id=tenant_id,
  25. model_type=ModelType.SPEECH2TEXT
  26. )
  27. buffer = io.BytesIO(file_content)
  28. buffer.name = 'temp.mp3'
  29. return {"text": model_instance.invoke_speech2text(buffer)}