commands.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. import datetime
  2. import json
  3. import math
  4. import random
  5. import string
  6. import time
  7. import click
  8. from flask import current_app
  9. from langchain.embeddings import OpenAIEmbeddings
  10. from werkzeug.exceptions import NotFound
  11. from core.embedding.cached_embedding import CacheEmbedding
  12. from core.index.index import IndexBuilder
  13. from core.model_providers.model_factory import ModelFactory
  14. from core.model_providers.models.embedding.openai_embedding import OpenAIEmbedding
  15. from core.model_providers.models.entity.model_params import ModelType
  16. from core.model_providers.providers.hosted import hosted_model_providers
  17. from core.model_providers.providers.openai_provider import OpenAIProvider
  18. from libs.password import password_pattern, valid_password, hash_password
  19. from libs.helper import email as email_validate
  20. from extensions.ext_database import db
  21. from libs.rsa import generate_key_pair
  22. from models.account import InvitationCode, Tenant
  23. from models.dataset import Dataset, DatasetQuery, Document
  24. from models.model import Account
  25. import secrets
  26. import base64
  27. from models.provider import Provider, ProviderType, ProviderQuotaType, ProviderModel
  28. @click.command('reset-password', help='Reset the account password.')
  29. @click.option('--email', prompt=True, help='The email address of the account whose password you need to reset')
  30. @click.option('--new-password', prompt=True, help='the new password.')
  31. @click.option('--password-confirm', prompt=True, help='the new password confirm.')
  32. def reset_password(email, new_password, password_confirm):
  33. if str(new_password).strip() != str(password_confirm).strip():
  34. click.echo(click.style('sorry. The two passwords do not match.', fg='red'))
  35. return
  36. account = db.session.query(Account). \
  37. filter(Account.email == email). \
  38. one_or_none()
  39. if not account:
  40. click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red'))
  41. return
  42. try:
  43. valid_password(new_password)
  44. except:
  45. click.echo(
  46. click.style('sorry. The passwords must match {} '.format(password_pattern), fg='red'))
  47. return
  48. # generate password salt
  49. salt = secrets.token_bytes(16)
  50. base64_salt = base64.b64encode(salt).decode()
  51. # encrypt password with salt
  52. password_hashed = hash_password(new_password, salt)
  53. base64_password_hashed = base64.b64encode(password_hashed).decode()
  54. account.password = base64_password_hashed
  55. account.password_salt = base64_salt
  56. db.session.commit()
  57. click.echo(click.style('Congratulations!, password has been reset.', fg='green'))
  58. @click.command('reset-email', help='Reset the account email.')
  59. @click.option('--email', prompt=True, help='The old email address of the account whose email you need to reset')
  60. @click.option('--new-email', prompt=True, help='the new email.')
  61. @click.option('--email-confirm', prompt=True, help='the new email confirm.')
  62. def reset_email(email, new_email, email_confirm):
  63. if str(new_email).strip() != str(email_confirm).strip():
  64. click.echo(click.style('Sorry, new email and confirm email do not match.', fg='red'))
  65. return
  66. account = db.session.query(Account). \
  67. filter(Account.email == email). \
  68. one_or_none()
  69. if not account:
  70. click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red'))
  71. return
  72. try:
  73. email_validate(new_email)
  74. except:
  75. click.echo(
  76. click.style('sorry. {} is not a valid email. '.format(email), fg='red'))
  77. return
  78. account.email = new_email
  79. db.session.commit()
  80. click.echo(click.style('Congratulations!, email has been reset.', fg='green'))
  81. @click.command('reset-encrypt-key-pair', help='Reset the asymmetric key pair of workspace for encrypt LLM credentials. '
  82. 'After the reset, all LLM credentials will become invalid, '
  83. 'requiring re-entry.'
  84. 'Only support SELF_HOSTED mode.')
  85. @click.confirmation_option(prompt=click.style('Are you sure you want to reset encrypt key pair?'
  86. ' this operation cannot be rolled back!', fg='red'))
  87. def reset_encrypt_key_pair():
  88. if current_app.config['EDITION'] != 'SELF_HOSTED':
  89. click.echo(click.style('Sorry, only support SELF_HOSTED mode.', fg='red'))
  90. return
  91. tenant = db.session.query(Tenant).first()
  92. if not tenant:
  93. click.echo(click.style('Sorry, no workspace found. Please enter /install to initialize.', fg='red'))
  94. return
  95. tenant.encrypt_public_key = generate_key_pair(tenant.id)
  96. db.session.query(Provider).filter(Provider.provider_type == 'custom').delete()
  97. db.session.query(ProviderModel).delete()
  98. db.session.commit()
  99. click.echo(click.style('Congratulations! '
  100. 'the asymmetric key pair of workspace {} has been reset.'.format(tenant.id), fg='green'))
  101. @click.command('generate-invitation-codes', help='Generate invitation codes.')
  102. @click.option('--batch', help='The batch of invitation codes.')
  103. @click.option('--count', prompt=True, help='Invitation codes count.')
  104. def generate_invitation_codes(batch, count):
  105. if not batch:
  106. now = datetime.datetime.now()
  107. batch = now.strftime('%Y%m%d%H%M%S')
  108. if not count or int(count) <= 0:
  109. click.echo(click.style('sorry. the count must be greater than 0.', fg='red'))
  110. return
  111. count = int(count)
  112. click.echo('Start generate {} invitation codes for batch {}.'.format(count, batch))
  113. codes = ''
  114. for i in range(count):
  115. code = generate_invitation_code()
  116. invitation_code = InvitationCode(
  117. code=code,
  118. batch=batch
  119. )
  120. db.session.add(invitation_code)
  121. click.echo(code)
  122. codes += code + "\n"
  123. db.session.commit()
  124. filename = 'storage/invitation-codes-{}.txt'.format(batch)
  125. with open(filename, 'w') as f:
  126. f.write(codes)
  127. click.echo(click.style(
  128. 'Congratulations! Generated {} invitation codes for batch {} and saved to the file \'{}\''.format(count, batch,
  129. filename),
  130. fg='green'))
  131. def generate_invitation_code():
  132. code = generate_upper_string()
  133. while db.session.query(InvitationCode).filter(InvitationCode.code == code).count() > 0:
  134. code = generate_upper_string()
  135. return code
  136. def generate_upper_string():
  137. letters_digits = string.ascii_uppercase + string.digits
  138. result = ""
  139. for i in range(8):
  140. result += random.choice(letters_digits)
  141. return result
  142. @click.command('recreate-all-dataset-indexes', help='Recreate all dataset indexes.')
  143. def recreate_all_dataset_indexes():
  144. click.echo(click.style('Start recreate all dataset indexes.', fg='green'))
  145. recreate_count = 0
  146. page = 1
  147. while True:
  148. try:
  149. datasets = db.session.query(Dataset).filter(Dataset.indexing_technique == 'high_quality') \
  150. .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50)
  151. except NotFound:
  152. break
  153. page += 1
  154. for dataset in datasets:
  155. try:
  156. click.echo('Recreating dataset index: {}'.format(dataset.id))
  157. index = IndexBuilder.get_index(dataset, 'high_quality')
  158. if index and index._is_origin():
  159. index.recreate_dataset(dataset)
  160. recreate_count += 1
  161. else:
  162. click.echo('passed.')
  163. except Exception as e:
  164. click.echo(
  165. click.style('Recreate dataset index error: {} {}'.format(e.__class__.__name__, str(e)), fg='red'))
  166. continue
  167. click.echo(click.style('Congratulations! Recreate {} dataset indexes.'.format(recreate_count), fg='green'))
  168. @click.command('clean-unused-dataset-indexes', help='Clean unused dataset indexes.')
  169. def clean_unused_dataset_indexes():
  170. click.echo(click.style('Start clean unused dataset indexes.', fg='green'))
  171. clean_days = int(current_app.config.get('CLEAN_DAY_SETTING'))
  172. start_at = time.perf_counter()
  173. thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days)
  174. page = 1
  175. while True:
  176. try:
  177. datasets = db.session.query(Dataset).filter(Dataset.created_at < thirty_days_ago) \
  178. .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50)
  179. except NotFound:
  180. break
  181. page += 1
  182. for dataset in datasets:
  183. dataset_query = db.session.query(DatasetQuery).filter(
  184. DatasetQuery.created_at > thirty_days_ago,
  185. DatasetQuery.dataset_id == dataset.id
  186. ).all()
  187. if not dataset_query or len(dataset_query) == 0:
  188. documents = db.session.query(Document).filter(
  189. Document.dataset_id == dataset.id,
  190. Document.indexing_status == 'completed',
  191. Document.enabled == True,
  192. Document.archived == False,
  193. Document.updated_at > thirty_days_ago
  194. ).all()
  195. if not documents or len(documents) == 0:
  196. try:
  197. # remove index
  198. vector_index = IndexBuilder.get_index(dataset, 'high_quality')
  199. kw_index = IndexBuilder.get_index(dataset, 'economy')
  200. # delete from vector index
  201. if vector_index:
  202. vector_index.delete()
  203. kw_index.delete()
  204. # update document
  205. update_params = {
  206. Document.enabled: False
  207. }
  208. Document.query.filter_by(dataset_id=dataset.id).update(update_params)
  209. db.session.commit()
  210. click.echo(click.style('Cleaned unused dataset {} from db success!'.format(dataset.id),
  211. fg='green'))
  212. except Exception as e:
  213. click.echo(
  214. click.style('clean dataset index error: {} {}'.format(e.__class__.__name__, str(e)),
  215. fg='red'))
  216. end_at = time.perf_counter()
  217. click.echo(click.style('Cleaned unused dataset from db success latency: {}'.format(end_at - start_at), fg='green'))
  218. @click.command('sync-anthropic-hosted-providers', help='Sync anthropic hosted providers.')
  219. def sync_anthropic_hosted_providers():
  220. if not hosted_model_providers.anthropic:
  221. click.echo(click.style('Anthropic hosted provider is not configured.', fg='red'))
  222. return
  223. click.echo(click.style('Start sync anthropic hosted providers.', fg='green'))
  224. count = 0
  225. new_quota_limit = hosted_model_providers.anthropic.quota_limit
  226. page = 1
  227. while True:
  228. try:
  229. providers = db.session.query(Provider).filter(
  230. Provider.provider_name == 'anthropic',
  231. Provider.provider_type == ProviderType.SYSTEM.value,
  232. Provider.quota_type == ProviderQuotaType.TRIAL.value,
  233. Provider.quota_limit != new_quota_limit
  234. ).order_by(Provider.created_at.desc()).paginate(page=page, per_page=100)
  235. except NotFound:
  236. break
  237. page += 1
  238. for provider in providers:
  239. try:
  240. click.echo('Syncing tenant anthropic hosted provider: {}, origin: limit {}, used {}'
  241. .format(provider.tenant_id, provider.quota_limit, provider.quota_used))
  242. original_quota_limit = provider.quota_limit
  243. division = math.ceil(new_quota_limit / 1000)
  244. provider.quota_limit = new_quota_limit if original_quota_limit == 1000 \
  245. else original_quota_limit * division
  246. provider.quota_used = division * provider.quota_used
  247. db.session.commit()
  248. count += 1
  249. except Exception as e:
  250. click.echo(click.style(
  251. 'Sync tenant anthropic hosted provider error: {} {}'.format(e.__class__.__name__, str(e)),
  252. fg='red'))
  253. continue
  254. click.echo(click.style('Congratulations! Synced {} anthropic hosted providers.'.format(count), fg='green'))
  255. @click.command('create-qdrant-indexes', help='Create qdrant indexes.')
  256. def create_qdrant_indexes():
  257. click.echo(click.style('Start create qdrant indexes.', fg='green'))
  258. create_count = 0
  259. page = 1
  260. while True:
  261. try:
  262. datasets = db.session.query(Dataset).filter(Dataset.indexing_technique == 'high_quality') \
  263. .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50)
  264. except NotFound:
  265. break
  266. page += 1
  267. for dataset in datasets:
  268. try:
  269. click.echo('Create dataset qdrant index: {}'.format(dataset.id))
  270. try:
  271. embedding_model = ModelFactory.get_embedding_model(
  272. tenant_id=dataset.tenant_id,
  273. model_provider_name=dataset.embedding_model_provider,
  274. model_name=dataset.embedding_model
  275. )
  276. except Exception:
  277. provider = Provider(
  278. id='provider_id',
  279. tenant_id='tenant_id',
  280. provider_name='openai',
  281. provider_type=ProviderType.CUSTOM.value,
  282. encrypted_config=json.dumps({'openai_api_key': 'TEST'}),
  283. is_valid=True,
  284. )
  285. model_provider = OpenAIProvider(provider=provider)
  286. embedding_model = OpenAIEmbedding(name="text-embedding-ada-002", model_provider=model_provider)
  287. embeddings = CacheEmbedding(embedding_model)
  288. from core.index.vector_index.qdrant_vector_index import QdrantVectorIndex, QdrantConfig
  289. index = QdrantVectorIndex(
  290. dataset=dataset,
  291. config=QdrantConfig(
  292. endpoint=current_app.config.get('QDRANT_URL'),
  293. api_key=current_app.config.get('QDRANT_API_KEY'),
  294. root_path=current_app.root_path
  295. ),
  296. embeddings=embeddings
  297. )
  298. if index:
  299. index_struct = {
  300. "type": 'qdrant',
  301. "vector_store": {"class_prefix": dataset.index_struct_dict['vector_store']['class_prefix']}
  302. }
  303. dataset.index_struct = json.dumps(index_struct)
  304. db.session.commit()
  305. index.create_qdrant_dataset(dataset)
  306. create_count += 1
  307. else:
  308. click.echo('passed.')
  309. except Exception as e:
  310. click.echo(
  311. click.style('Create dataset index error: {} {}'.format(e.__class__.__name__, str(e)), fg='red'))
  312. continue
  313. click.echo(click.style('Congratulations! Create {} dataset indexes.'.format(create_count), fg='green'))
  314. def register_commands(app):
  315. app.cli.add_command(reset_password)
  316. app.cli.add_command(reset_email)
  317. app.cli.add_command(generate_invitation_codes)
  318. app.cli.add_command(reset_encrypt_key_pair)
  319. app.cli.add_command(recreate_all_dataset_indexes)
  320. app.cli.add_command(sync_anthropic_hosted_providers)
  321. app.cli.add_command(clean_unused_dataset_indexes)
  322. app.cli.add_command(create_qdrant_indexes)