|  | @@ -15,9 +15,10 @@ from services.errors.file import FileTooLargeError, UnsupportedFileTypeError
 | 
	
		
			
				|  |  |  from werkzeug.datastructures import FileStorage
 | 
	
		
			
				|  |  |  from werkzeug.exceptions import NotFound
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv',
 | 
	
		
			
				|  |  | -                      'jpg', 'jpeg', 'png', 'webp', 'gif', 'svg']
 | 
	
		
			
				|  |  |  IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg']
 | 
	
		
			
				|  |  | +ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'doc', 'csv'] + IMAGE_EXTENSIONS
 | 
	
		
			
				|  |  | +UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx',
 | 
	
		
			
				|  |  | +                                      'docx', 'doc', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] + IMAGE_EXTENSIONS
 | 
	
		
			
				|  |  |  PREVIEW_WORDS_LIMIT = 3000
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -27,13 +28,7 @@ class FileService:
 | 
	
		
			
				|  |  |      def upload_file(file: FileStorage, user: Union[Account, EndUser], only_image: bool = False) -> UploadFile:
 | 
	
		
			
				|  |  |          extension = file.filename.split('.')[-1]
 | 
	
		
			
				|  |  |          etl_type = current_app.config['ETL_TYPE']
 | 
	
		
			
				|  |  | -        if etl_type == 'Unstructured':
 | 
	
		
			
				|  |  | -            allowed_extensions = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx',
 | 
	
		
			
				|  |  | -                                  'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml',
 | 
	
		
			
				|  |  | -                                  'jpg', 'jpeg', 'png', 'webp', 'gif', 'svg']
 | 
	
		
			
				|  |  | -        else:
 | 
	
		
			
				|  |  | -            allowed_extensions = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv',
 | 
	
		
			
				|  |  | -                                  'jpg', 'jpeg', 'png', 'webp', 'gif', 'svg']
 | 
	
		
			
				|  |  | +        allowed_extensions = UNSTRUSTURED_ALLOWED_EXTENSIONS if etl_type == 'Unstructured' else ALLOWED_EXTENSIONS
 | 
	
		
			
				|  |  |          if extension.lower() not in allowed_extensions:
 | 
	
		
			
				|  |  |              raise UnsupportedFileTypeError()
 | 
	
		
			
				|  |  |          elif only_image and extension.lower() not in IMAGE_EXTENSIONS:
 | 
	
	
		
			
				|  | @@ -133,13 +128,7 @@ class FileService:
 | 
	
		
			
				|  |  |          # extract text from file
 | 
	
		
			
				|  |  |          extension = upload_file.extension
 | 
	
		
			
				|  |  |          etl_type = current_app.config['ETL_TYPE']
 | 
	
		
			
				|  |  | -        if etl_type == 'Unstructured':
 | 
	
		
			
				|  |  | -            allowed_extensions = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx',
 | 
	
		
			
				|  |  | -                                  'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml',
 | 
	
		
			
				|  |  | -                                  'jpg', 'jpeg', 'png', 'webp', 'gif', 'svg']
 | 
	
		
			
				|  |  | -        else:
 | 
	
		
			
				|  |  | -            allowed_extensions = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv',
 | 
	
		
			
				|  |  | -                                  'jpg', 'jpeg', 'png', 'webp', 'gif', 'svg']
 | 
	
		
			
				|  |  | +        allowed_extensions = UNSTRUSTURED_ALLOWED_EXTENSIONS if etl_type == 'Unstructured' else ALLOWED_EXTENSIONS
 | 
	
		
			
				|  |  |          if extension.lower() not in allowed_extensions:
 | 
	
		
			
				|  |  |              raise UnsupportedFileTypeError()
 | 
	
		
			
				|  |  |  
 |