|
@@ -1,4 +1,5 @@
|
|
import datetime
|
|
import datetime
|
|
|
|
+import json
|
|
import math
|
|
import math
|
|
import random
|
|
import random
|
|
import string
|
|
import string
|
|
@@ -6,10 +7,16 @@ import time
|
|
|
|
|
|
import click
|
|
import click
|
|
from flask import current_app
|
|
from flask import current_app
|
|
|
|
+from langchain.embeddings import OpenAIEmbeddings
|
|
from werkzeug.exceptions import NotFound
|
|
from werkzeug.exceptions import NotFound
|
|
|
|
|
|
|
|
+from core.embedding.cached_embedding import CacheEmbedding
|
|
from core.index.index import IndexBuilder
|
|
from core.index.index import IndexBuilder
|
|
|
|
+from core.model_providers.model_factory import ModelFactory
|
|
|
|
+from core.model_providers.models.embedding.openai_embedding import OpenAIEmbedding
|
|
|
|
+from core.model_providers.models.entity.model_params import ModelType
|
|
from core.model_providers.providers.hosted import hosted_model_providers
|
|
from core.model_providers.providers.hosted import hosted_model_providers
|
|
|
|
+from core.model_providers.providers.openai_provider import OpenAIProvider
|
|
from libs.password import password_pattern, valid_password, hash_password
|
|
from libs.password import password_pattern, valid_password, hash_password
|
|
from libs.helper import email as email_validate
|
|
from libs.helper import email as email_validate
|
|
from extensions.ext_database import db
|
|
from extensions.ext_database import db
|
|
@@ -296,6 +303,66 @@ def sync_anthropic_hosted_providers():
|
|
click.echo(click.style('Congratulations! Synced {} anthropic hosted providers.'.format(count), fg='green'))
|
|
click.echo(click.style('Congratulations! Synced {} anthropic hosted providers.'.format(count), fg='green'))
|
|
|
|
|
|
|
|
|
|
|
|
+@click.command('create-qdrant-indexes', help='Create qdrant indexes.')
|
|
|
|
+def create_qdrant_indexes():
|
|
|
|
+ click.echo(click.style('Start create qdrant indexes.', fg='green'))
|
|
|
|
+ create_count = 0
|
|
|
|
+
|
|
|
|
+ page = 1
|
|
|
|
+ while True:
|
|
|
|
+ try:
|
|
|
|
+ datasets = db.session.query(Dataset).filter(Dataset.indexing_technique == 'high_quality') \
|
|
|
|
+ .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50)
|
|
|
|
+ except NotFound:
|
|
|
|
+ break
|
|
|
|
+
|
|
|
|
+ page += 1
|
|
|
|
+ for dataset in datasets:
|
|
|
|
+ try:
|
|
|
|
+ click.echo('Create dataset qdrant index: {}'.format(dataset.id))
|
|
|
|
+ try:
|
|
|
|
+ embedding_model = ModelFactory.get_embedding_model(
|
|
|
|
+ tenant_id=dataset.tenant_id,
|
|
|
|
+ model_provider_name=dataset.embedding_model_provider,
|
|
|
|
+ model_name=dataset.embedding_model
|
|
|
|
+ )
|
|
|
|
+ except Exception:
|
|
|
|
+ provider = Provider(
|
|
|
|
+ id='provider_id',
|
|
|
|
+ tenant_id='tenant_id',
|
|
|
|
+ provider_name='openai',
|
|
|
|
+ provider_type=ProviderType.CUSTOM.value,
|
|
|
|
+ encrypted_config=json.dumps({'openai_api_key': 'TEST'}),
|
|
|
|
+ is_valid=True,
|
|
|
|
+ )
|
|
|
|
+ model_provider = OpenAIProvider(provider=provider)
|
|
|
|
+ embedding_model = OpenAIEmbedding(name="text-embedding-ada-002", model_provider=model_provider)
|
|
|
|
+ embeddings = CacheEmbedding(embedding_model)
|
|
|
|
+
|
|
|
|
+ from core.index.vector_index.qdrant_vector_index import QdrantVectorIndex, QdrantConfig
|
|
|
|
+
|
|
|
|
+ index = QdrantVectorIndex(
|
|
|
|
+ dataset=dataset,
|
|
|
|
+ config=QdrantConfig(
|
|
|
|
+ endpoint=current_app.config.get('QDRANT_URL'),
|
|
|
|
+ api_key=current_app.config.get('QDRANT_API_KEY'),
|
|
|
|
+ root_path=current_app.root_path
|
|
|
|
+ ),
|
|
|
|
+ embeddings=embeddings
|
|
|
|
+ )
|
|
|
|
+ if index:
|
|
|
|
+ index.create_qdrant_dataset(dataset)
|
|
|
|
+ create_count += 1
|
|
|
|
+ else:
|
|
|
|
+ click.echo('passed.')
|
|
|
|
+ except Exception as e:
|
|
|
|
+ click.echo(
|
|
|
|
+ click.style('Create dataset index error: {} {}'.format(e.__class__.__name__, str(e)), fg='red'))
|
|
|
|
+ continue
|
|
|
|
+
|
|
|
|
+ click.echo(click.style('Congratulations! Create {} dataset indexes.'.format(create_count), fg='green'))
|
|
|
|
+
|
|
|
|
+
|
|
def register_commands(app):
|
|
def register_commands(app):
|
|
app.cli.add_command(reset_password)
|
|
app.cli.add_command(reset_password)
|
|
app.cli.add_command(reset_email)
|
|
app.cli.add_command(reset_email)
|
|
@@ -304,3 +371,4 @@ def register_commands(app):
|
|
app.cli.add_command(recreate_all_dataset_indexes)
|
|
app.cli.add_command(recreate_all_dataset_indexes)
|
|
app.cli.add_command(sync_anthropic_hosted_providers)
|
|
app.cli.add_command(sync_anthropic_hosted_providers)
|
|
app.cli.add_command(clean_unused_dataset_indexes)
|
|
app.cli.add_command(clean_unused_dataset_indexes)
|
|
|
|
+ app.cli.add_command(create_qdrant_indexes)
|