| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051 | from core.embedding.cached_embedding import CacheEmbeddingfrom core.index.keyword_table_index.keyword_table_index import KeywordTableConfig, KeywordTableIndexfrom core.index.vector_index.vector_index import VectorIndexfrom core.model_manager import ModelManagerfrom core.model_runtime.entities.model_entities import ModelTypefrom flask import current_appfrom langchain.embeddings import OpenAIEmbeddingsfrom models.dataset import Datasetclass IndexBuilder:    @classmethod    def get_index(cls, dataset: Dataset, indexing_technique: str, ignore_high_quality_check: bool = False):        if indexing_technique == "high_quality":            if not ignore_high_quality_check and dataset.indexing_technique != 'high_quality':                return None            model_manager = ModelManager()            embedding_model = model_manager.get_model_instance(                tenant_id=dataset.tenant_id,                model_type=ModelType.TEXT_EMBEDDING,                provider=dataset.embedding_model_provider,                model=dataset.embedding_model            )            embeddings = CacheEmbedding(embedding_model)            return VectorIndex(                dataset=dataset,                config=current_app.config,                embeddings=embeddings            )        elif indexing_technique == "economy":            return KeywordTableIndex(                dataset=dataset,                config=KeywordTableConfig(                    max_keywords_per_chunk=10                )            )        else:            raise ValueError('Unknown indexing technique')    @classmethod    def get_default_high_quality_index(cls, dataset: Dataset):        embeddings = OpenAIEmbeddings(openai_api_key=' ')        return VectorIndex(            dataset=dataset,            config=current_app.config,            embeddings=embeddings        )
 |