audio_service.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. import io
  2. from typing import Optional
  3. from core.model_manager import ModelManager
  4. from core.model_runtime.entities.model_entities import ModelType
  5. from services.errors.audio import (AudioTooLargeServiceError,
  6. NoAudioUploadedServiceError,
  7. ProviderNotSupportTextToSpeechServiceError,
  8. ProviderNotSupportSpeechToTextServiceError,
  9. UnsupportedAudioTypeServiceError)
  10. from werkzeug.datastructures import FileStorage
  11. FILE_SIZE = 15
  12. FILE_SIZE_LIMIT = FILE_SIZE * 1024 * 1024
  13. ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm', 'amr']
  14. class AudioService:
  15. @classmethod
  16. def transcript_asr(cls, tenant_id: str, file: FileStorage, end_user: Optional[str] = None):
  17. if file is None:
  18. raise NoAudioUploadedServiceError()
  19. extension = file.mimetype
  20. if extension not in [f'audio/{ext}' for ext in ALLOWED_EXTENSIONS]:
  21. raise UnsupportedAudioTypeServiceError()
  22. file_content = file.read()
  23. file_size = len(file_content)
  24. if file_size > FILE_SIZE_LIMIT:
  25. message = f"Audio size larger than {FILE_SIZE} mb"
  26. raise AudioTooLargeServiceError(message)
  27. model_manager = ModelManager()
  28. model_instance = model_manager.get_default_model_instance(
  29. tenant_id=tenant_id,
  30. model_type=ModelType.SPEECH2TEXT
  31. )
  32. if model_instance is None:
  33. raise ProviderNotSupportSpeechToTextServiceError()
  34. buffer = io.BytesIO(file_content)
  35. buffer.name = 'temp.mp3'
  36. return {"text": model_instance.invoke_speech2text(file=buffer, user=end_user)}
  37. @classmethod
  38. def transcript_tts(cls, tenant_id: str, text: str, streaming: bool, end_user: Optional[str] = None):
  39. model_manager = ModelManager()
  40. model_instance = model_manager.get_default_model_instance(
  41. tenant_id=tenant_id,
  42. model_type=ModelType.TTS
  43. )
  44. if model_instance is None:
  45. raise ProviderNotSupportTextToSpeechServiceError()
  46. try:
  47. return model_instance.invoke_tts(content_text=text.strip(), user=end_user, streaming=streaming)
  48. except Exception as e:
  49. raise e