datasets_segments.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662
  1. import uuid
  2. import pandas as pd
  3. from flask import request
  4. from flask_login import current_user # type: ignore
  5. from flask_restful import Resource, marshal, reqparse # type: ignore
  6. from werkzeug.exceptions import Forbidden, NotFound
  7. import services
  8. from controllers.console import api
  9. from controllers.console.app.error import ProviderNotInitializeError
  10. from controllers.console.datasets.error import (
  11. ChildChunkDeleteIndexError,
  12. ChildChunkIndexingError,
  13. InvalidActionError,
  14. NoFileUploadedError,
  15. TooManyFilesError,
  16. )
  17. from controllers.console.wraps import (
  18. account_initialization_required,
  19. cloud_edition_billing_knowledge_limit_check,
  20. cloud_edition_billing_resource_check,
  21. setup_required,
  22. )
  23. from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
  24. from core.model_manager import ModelManager
  25. from core.model_runtime.entities.model_entities import ModelType
  26. from extensions.ext_redis import redis_client
  27. from fields.segment_fields import child_chunk_fields, segment_fields
  28. from libs.login import login_required
  29. from models.dataset import ChildChunk, DocumentSegment
  30. from services.dataset_service import DatasetService, DocumentService, SegmentService
  31. from services.entities.knowledge_entities.knowledge_entities import ChildChunkUpdateArgs, SegmentUpdateArgs
  32. from services.errors.chunk import ChildChunkDeleteIndexError as ChildChunkDeleteIndexServiceError
  33. from services.errors.chunk import ChildChunkIndexingError as ChildChunkIndexingServiceError
  34. from tasks.batch_create_segment_to_index_task import batch_create_segment_to_index_task
  35. class DatasetDocumentSegmentListApi(Resource):
  36. @setup_required
  37. @login_required
  38. @account_initialization_required
  39. def get(self, dataset_id, document_id):
  40. dataset_id = str(dataset_id)
  41. document_id = str(document_id)
  42. dataset = DatasetService.get_dataset(dataset_id)
  43. if not dataset:
  44. raise NotFound("Dataset not found.")
  45. try:
  46. DatasetService.check_dataset_permission(dataset, current_user)
  47. except services.errors.account.NoPermissionError as e:
  48. raise Forbidden(str(e))
  49. document = DocumentService.get_document(dataset_id, document_id)
  50. if not document:
  51. raise NotFound("Document not found.")
  52. parser = reqparse.RequestParser()
  53. parser.add_argument("limit", type=int, default=20, location="args")
  54. parser.add_argument("status", type=str, action="append", default=[], location="args")
  55. parser.add_argument("hit_count_gte", type=int, default=None, location="args")
  56. parser.add_argument("enabled", type=str, default="all", location="args")
  57. parser.add_argument("keyword", type=str, default=None, location="args")
  58. parser.add_argument("page", type=int, default=1, location="args")
  59. args = parser.parse_args()
  60. page = args["page"]
  61. limit = min(args["limit"], 100)
  62. status_list = args["status"]
  63. hit_count_gte = args["hit_count_gte"]
  64. keyword = args["keyword"]
  65. query = DocumentSegment.query.filter(
  66. DocumentSegment.document_id == str(document_id), DocumentSegment.tenant_id == current_user.current_tenant_id
  67. ).order_by(DocumentSegment.position.asc())
  68. if status_list:
  69. query = query.filter(DocumentSegment.status.in_(status_list))
  70. if hit_count_gte is not None:
  71. query = query.filter(DocumentSegment.hit_count >= hit_count_gte)
  72. if keyword:
  73. query = query.where(DocumentSegment.content.ilike(f"%{keyword}%"))
  74. if args["enabled"].lower() != "all":
  75. if args["enabled"].lower() == "true":
  76. query = query.filter(DocumentSegment.enabled == True)
  77. elif args["enabled"].lower() == "false":
  78. query = query.filter(DocumentSegment.enabled == False)
  79. segments = query.paginate(page=page, per_page=limit, max_per_page=100, error_out=False)
  80. response = {
  81. "data": marshal(segments.items, segment_fields),
  82. "limit": limit,
  83. "total": segments.total,
  84. "total_pages": segments.pages,
  85. "page": page,
  86. }
  87. return response, 200
  88. @setup_required
  89. @login_required
  90. @account_initialization_required
  91. def delete(self, dataset_id, document_id):
  92. # check dataset
  93. dataset_id = str(dataset_id)
  94. dataset = DatasetService.get_dataset(dataset_id)
  95. if not dataset:
  96. raise NotFound("Dataset not found.")
  97. # check user's model setting
  98. DatasetService.check_dataset_model_setting(dataset)
  99. # check document
  100. document_id = str(document_id)
  101. document = DocumentService.get_document(dataset_id, document_id)
  102. if not document:
  103. raise NotFound("Document not found.")
  104. segment_ids = request.args.getlist("segment_id")
  105. # The role of the current user in the ta table must be admin or owner
  106. if not current_user.is_editor:
  107. raise Forbidden()
  108. try:
  109. DatasetService.check_dataset_permission(dataset, current_user)
  110. except services.errors.account.NoPermissionError as e:
  111. raise Forbidden(str(e))
  112. SegmentService.delete_segments(segment_ids, document, dataset)
  113. return {"result": "success"}, 200
  114. class DatasetDocumentSegmentApi(Resource):
  115. @setup_required
  116. @login_required
  117. @account_initialization_required
  118. @cloud_edition_billing_resource_check("vector_space")
  119. def patch(self, dataset_id, document_id, action):
  120. dataset_id = str(dataset_id)
  121. dataset = DatasetService.get_dataset(dataset_id)
  122. if not dataset:
  123. raise NotFound("Dataset not found.")
  124. document_id = str(document_id)
  125. document = DocumentService.get_document(dataset_id, document_id)
  126. if not document:
  127. raise NotFound("Document not found.")
  128. # check user's model setting
  129. DatasetService.check_dataset_model_setting(dataset)
  130. # The role of the current user in the ta table must be admin, owner, or editor
  131. if not current_user.is_editor:
  132. raise Forbidden()
  133. try:
  134. DatasetService.check_dataset_permission(dataset, current_user)
  135. except services.errors.account.NoPermissionError as e:
  136. raise Forbidden(str(e))
  137. if dataset.indexing_technique == "high_quality":
  138. # check embedding model setting
  139. try:
  140. model_manager = ModelManager()
  141. model_manager.get_model_instance(
  142. tenant_id=current_user.current_tenant_id,
  143. provider=dataset.embedding_model_provider,
  144. model_type=ModelType.TEXT_EMBEDDING,
  145. model=dataset.embedding_model,
  146. )
  147. except LLMBadRequestError:
  148. raise ProviderNotInitializeError(
  149. "No Embedding Model available. Please configure a valid provider "
  150. "in the Settings -> Model Provider."
  151. )
  152. except ProviderTokenNotInitError as ex:
  153. raise ProviderNotInitializeError(ex.description)
  154. segment_ids = request.args.getlist("segment_id")
  155. document_indexing_cache_key = "document_{}_indexing".format(document.id)
  156. cache_result = redis_client.get(document_indexing_cache_key)
  157. if cache_result is not None:
  158. raise InvalidActionError("Document is being indexed, please try again later")
  159. try:
  160. SegmentService.update_segments_status(segment_ids, action, dataset, document)
  161. except Exception as e:
  162. raise InvalidActionError(str(e))
  163. return {"result": "success"}, 200
  164. class DatasetDocumentSegmentAddApi(Resource):
  165. @setup_required
  166. @login_required
  167. @account_initialization_required
  168. @cloud_edition_billing_resource_check("vector_space")
  169. @cloud_edition_billing_knowledge_limit_check("add_segment")
  170. def post(self, dataset_id, document_id):
  171. # check dataset
  172. dataset_id = str(dataset_id)
  173. dataset = DatasetService.get_dataset(dataset_id)
  174. if not dataset:
  175. raise NotFound("Dataset not found.")
  176. # check document
  177. document_id = str(document_id)
  178. document = DocumentService.get_document(dataset_id, document_id)
  179. if not document:
  180. raise NotFound("Document not found.")
  181. if not current_user.is_editor:
  182. raise Forbidden()
  183. # check embedding model setting
  184. if dataset.indexing_technique == "high_quality":
  185. try:
  186. model_manager = ModelManager()
  187. model_manager.get_model_instance(
  188. tenant_id=current_user.current_tenant_id,
  189. provider=dataset.embedding_model_provider,
  190. model_type=ModelType.TEXT_EMBEDDING,
  191. model=dataset.embedding_model,
  192. )
  193. except LLMBadRequestError:
  194. raise ProviderNotInitializeError(
  195. "No Embedding Model available. Please configure a valid provider "
  196. "in the Settings -> Model Provider."
  197. )
  198. except ProviderTokenNotInitError as ex:
  199. raise ProviderNotInitializeError(ex.description)
  200. try:
  201. DatasetService.check_dataset_permission(dataset, current_user)
  202. except services.errors.account.NoPermissionError as e:
  203. raise Forbidden(str(e))
  204. # validate args
  205. parser = reqparse.RequestParser()
  206. parser.add_argument("content", type=str, required=True, nullable=False, location="json")
  207. parser.add_argument("answer", type=str, required=False, nullable=True, location="json")
  208. parser.add_argument("keywords", type=list, required=False, nullable=True, location="json")
  209. args = parser.parse_args()
  210. SegmentService.segment_create_args_validate(args, document)
  211. segment = SegmentService.create_segment(args, document, dataset)
  212. return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200
  213. class DatasetDocumentSegmentUpdateApi(Resource):
  214. @setup_required
  215. @login_required
  216. @account_initialization_required
  217. @cloud_edition_billing_resource_check("vector_space")
  218. def patch(self, dataset_id, document_id, segment_id):
  219. # check dataset
  220. dataset_id = str(dataset_id)
  221. dataset = DatasetService.get_dataset(dataset_id)
  222. if not dataset:
  223. raise NotFound("Dataset not found.")
  224. # check user's model setting
  225. DatasetService.check_dataset_model_setting(dataset)
  226. # check document
  227. document_id = str(document_id)
  228. document = DocumentService.get_document(dataset_id, document_id)
  229. if not document:
  230. raise NotFound("Document not found.")
  231. if dataset.indexing_technique == "high_quality":
  232. # check embedding model setting
  233. try:
  234. model_manager = ModelManager()
  235. model_manager.get_model_instance(
  236. tenant_id=current_user.current_tenant_id,
  237. provider=dataset.embedding_model_provider,
  238. model_type=ModelType.TEXT_EMBEDDING,
  239. model=dataset.embedding_model,
  240. )
  241. except LLMBadRequestError:
  242. raise ProviderNotInitializeError(
  243. "No Embedding Model available. Please configure a valid provider "
  244. "in the Settings -> Model Provider."
  245. )
  246. except ProviderTokenNotInitError as ex:
  247. raise ProviderNotInitializeError(ex.description)
  248. # check segment
  249. segment_id = str(segment_id)
  250. segment = DocumentSegment.query.filter(
  251. DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id
  252. ).first()
  253. if not segment:
  254. raise NotFound("Segment not found.")
  255. # The role of the current user in the ta table must be admin, owner, or editor
  256. if not current_user.is_editor:
  257. raise Forbidden()
  258. try:
  259. DatasetService.check_dataset_permission(dataset, current_user)
  260. except services.errors.account.NoPermissionError as e:
  261. raise Forbidden(str(e))
  262. # validate args
  263. parser = reqparse.RequestParser()
  264. parser.add_argument("content", type=str, required=True, nullable=False, location="json")
  265. parser.add_argument("answer", type=str, required=False, nullable=True, location="json")
  266. parser.add_argument("keywords", type=list, required=False, nullable=True, location="json")
  267. parser.add_argument(
  268. "regenerate_child_chunks", type=bool, required=False, nullable=True, default=False, location="json"
  269. )
  270. args = parser.parse_args()
  271. SegmentService.segment_create_args_validate(args, document)
  272. segment = SegmentService.update_segment(SegmentUpdateArgs(**args), segment, document, dataset)
  273. return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200
  274. @setup_required
  275. @login_required
  276. @account_initialization_required
  277. def delete(self, dataset_id, document_id, segment_id):
  278. # check dataset
  279. dataset_id = str(dataset_id)
  280. dataset = DatasetService.get_dataset(dataset_id)
  281. if not dataset:
  282. raise NotFound("Dataset not found.")
  283. # check user's model setting
  284. DatasetService.check_dataset_model_setting(dataset)
  285. # check document
  286. document_id = str(document_id)
  287. document = DocumentService.get_document(dataset_id, document_id)
  288. if not document:
  289. raise NotFound("Document not found.")
  290. # check segment
  291. segment_id = str(segment_id)
  292. segment = DocumentSegment.query.filter(
  293. DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id
  294. ).first()
  295. if not segment:
  296. raise NotFound("Segment not found.")
  297. # The role of the current user in the ta table must be admin or owner
  298. if not current_user.is_editor:
  299. raise Forbidden()
  300. try:
  301. DatasetService.check_dataset_permission(dataset, current_user)
  302. except services.errors.account.NoPermissionError as e:
  303. raise Forbidden(str(e))
  304. SegmentService.delete_segment(segment, document, dataset)
  305. return {"result": "success"}, 200
  306. class DatasetDocumentSegmentBatchImportApi(Resource):
  307. @setup_required
  308. @login_required
  309. @account_initialization_required
  310. @cloud_edition_billing_resource_check("vector_space")
  311. @cloud_edition_billing_knowledge_limit_check("add_segment")
  312. def post(self, dataset_id, document_id):
  313. # check dataset
  314. dataset_id = str(dataset_id)
  315. dataset = DatasetService.get_dataset(dataset_id)
  316. if not dataset:
  317. raise NotFound("Dataset not found.")
  318. # check document
  319. document_id = str(document_id)
  320. document = DocumentService.get_document(dataset_id, document_id)
  321. if not document:
  322. raise NotFound("Document not found.")
  323. # get file from request
  324. file = request.files["file"]
  325. # check file
  326. if "file" not in request.files:
  327. raise NoFileUploadedError()
  328. if len(request.files) > 1:
  329. raise TooManyFilesError()
  330. # check file type
  331. if not file.filename.endswith(".csv"):
  332. raise ValueError("Invalid file type. Only CSV files are allowed")
  333. try:
  334. # Skip the first row
  335. df = pd.read_csv(file)
  336. result = []
  337. for index, row in df.iterrows():
  338. if document.doc_form == "qa_model":
  339. data = {"content": row[0], "answer": row[1]}
  340. else:
  341. data = {"content": row[0]}
  342. result.append(data)
  343. if len(result) == 0:
  344. raise ValueError("The CSV file is empty.")
  345. # async job
  346. job_id = str(uuid.uuid4())
  347. indexing_cache_key = "segment_batch_import_{}".format(str(job_id))
  348. # send batch add segments task
  349. redis_client.setnx(indexing_cache_key, "waiting")
  350. batch_create_segment_to_index_task.delay(
  351. str(job_id), result, dataset_id, document_id, current_user.current_tenant_id, current_user.id
  352. )
  353. except Exception as e:
  354. return {"error": str(e)}, 500
  355. return {"job_id": job_id, "job_status": "waiting"}, 200
  356. @setup_required
  357. @login_required
  358. @account_initialization_required
  359. def get(self, job_id):
  360. job_id = str(job_id)
  361. indexing_cache_key = "segment_batch_import_{}".format(job_id)
  362. cache_result = redis_client.get(indexing_cache_key)
  363. if cache_result is None:
  364. raise ValueError("The job is not exist.")
  365. return {"job_id": job_id, "job_status": cache_result.decode()}, 200
  366. class ChildChunkAddApi(Resource):
  367. @setup_required
  368. @login_required
  369. @account_initialization_required
  370. @cloud_edition_billing_resource_check("vector_space")
  371. @cloud_edition_billing_knowledge_limit_check("add_segment")
  372. def post(self, dataset_id, document_id, segment_id):
  373. # check dataset
  374. dataset_id = str(dataset_id)
  375. dataset = DatasetService.get_dataset(dataset_id)
  376. if not dataset:
  377. raise NotFound("Dataset not found.")
  378. # check document
  379. document_id = str(document_id)
  380. document = DocumentService.get_document(dataset_id, document_id)
  381. if not document:
  382. raise NotFound("Document not found.")
  383. # check segment
  384. segment_id = str(segment_id)
  385. segment = DocumentSegment.query.filter(
  386. DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id
  387. ).first()
  388. if not segment:
  389. raise NotFound("Segment not found.")
  390. if not current_user.is_editor:
  391. raise Forbidden()
  392. # check embedding model setting
  393. if dataset.indexing_technique == "high_quality":
  394. try:
  395. model_manager = ModelManager()
  396. model_manager.get_model_instance(
  397. tenant_id=current_user.current_tenant_id,
  398. provider=dataset.embedding_model_provider,
  399. model_type=ModelType.TEXT_EMBEDDING,
  400. model=dataset.embedding_model,
  401. )
  402. except LLMBadRequestError:
  403. raise ProviderNotInitializeError(
  404. "No Embedding Model available. Please configure a valid provider "
  405. "in the Settings -> Model Provider."
  406. )
  407. except ProviderTokenNotInitError as ex:
  408. raise ProviderNotInitializeError(ex.description)
  409. try:
  410. DatasetService.check_dataset_permission(dataset, current_user)
  411. except services.errors.account.NoPermissionError as e:
  412. raise Forbidden(str(e))
  413. # validate args
  414. parser = reqparse.RequestParser()
  415. parser.add_argument("content", type=str, required=True, nullable=False, location="json")
  416. args = parser.parse_args()
  417. try:
  418. child_chunk = SegmentService.create_child_chunk(args.get("content"), segment, document, dataset)
  419. except ChildChunkIndexingServiceError as e:
  420. raise ChildChunkIndexingError(str(e))
  421. return {"data": marshal(child_chunk, child_chunk_fields)}, 200
  422. @setup_required
  423. @login_required
  424. @account_initialization_required
  425. def get(self, dataset_id, document_id, segment_id):
  426. # check dataset
  427. dataset_id = str(dataset_id)
  428. dataset = DatasetService.get_dataset(dataset_id)
  429. if not dataset:
  430. raise NotFound("Dataset not found.")
  431. # check user's model setting
  432. DatasetService.check_dataset_model_setting(dataset)
  433. # check document
  434. document_id = str(document_id)
  435. document = DocumentService.get_document(dataset_id, document_id)
  436. if not document:
  437. raise NotFound("Document not found.")
  438. # check segment
  439. segment_id = str(segment_id)
  440. segment = DocumentSegment.query.filter(
  441. DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id
  442. ).first()
  443. if not segment:
  444. raise NotFound("Segment not found.")
  445. parser = reqparse.RequestParser()
  446. parser.add_argument("limit", type=int, default=20, location="args")
  447. parser.add_argument("keyword", type=str, default=None, location="args")
  448. parser.add_argument("page", type=int, default=1, location="args")
  449. args = parser.parse_args()
  450. page = args["page"]
  451. limit = min(args["limit"], 100)
  452. keyword = args["keyword"]
  453. child_chunks = SegmentService.get_child_chunks(segment_id, document_id, dataset_id, page, limit, keyword)
  454. return {
  455. "data": marshal(child_chunks.items, child_chunk_fields),
  456. "total": child_chunks.total,
  457. "total_pages": child_chunks.pages,
  458. "page": page,
  459. "limit": limit,
  460. }, 200
  461. @setup_required
  462. @login_required
  463. @account_initialization_required
  464. @cloud_edition_billing_resource_check("vector_space")
  465. def patch(self, dataset_id, document_id, segment_id):
  466. # check dataset
  467. dataset_id = str(dataset_id)
  468. dataset = DatasetService.get_dataset(dataset_id)
  469. if not dataset:
  470. raise NotFound("Dataset not found.")
  471. # check user's model setting
  472. DatasetService.check_dataset_model_setting(dataset)
  473. # check document
  474. document_id = str(document_id)
  475. document = DocumentService.get_document(dataset_id, document_id)
  476. if not document:
  477. raise NotFound("Document not found.")
  478. # check segment
  479. segment_id = str(segment_id)
  480. segment = DocumentSegment.query.filter(
  481. DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id
  482. ).first()
  483. if not segment:
  484. raise NotFound("Segment not found.")
  485. # The role of the current user in the ta table must be admin, owner, or editor
  486. if not current_user.is_editor:
  487. raise Forbidden()
  488. try:
  489. DatasetService.check_dataset_permission(dataset, current_user)
  490. except services.errors.account.NoPermissionError as e:
  491. raise Forbidden(str(e))
  492. # validate args
  493. parser = reqparse.RequestParser()
  494. parser.add_argument("chunks", type=list, required=True, nullable=False, location="json")
  495. args = parser.parse_args()
  496. try:
  497. chunks = [ChildChunkUpdateArgs(**chunk) for chunk in args.get("chunks")]
  498. child_chunks = SegmentService.update_child_chunks(chunks, segment, document, dataset)
  499. except ChildChunkIndexingServiceError as e:
  500. raise ChildChunkIndexingError(str(e))
  501. return {"data": marshal(child_chunks, child_chunk_fields)}, 200
  502. class ChildChunkUpdateApi(Resource):
  503. @setup_required
  504. @login_required
  505. @account_initialization_required
  506. def delete(self, dataset_id, document_id, segment_id, child_chunk_id):
  507. # check dataset
  508. dataset_id = str(dataset_id)
  509. dataset = DatasetService.get_dataset(dataset_id)
  510. if not dataset:
  511. raise NotFound("Dataset not found.")
  512. # check user's model setting
  513. DatasetService.check_dataset_model_setting(dataset)
  514. # check document
  515. document_id = str(document_id)
  516. document = DocumentService.get_document(dataset_id, document_id)
  517. if not document:
  518. raise NotFound("Document not found.")
  519. # check segment
  520. segment_id = str(segment_id)
  521. segment = DocumentSegment.query.filter(
  522. DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id
  523. ).first()
  524. if not segment:
  525. raise NotFound("Segment not found.")
  526. # check child chunk
  527. child_chunk_id = str(child_chunk_id)
  528. child_chunk = ChildChunk.query.filter(
  529. ChildChunk.id == str(child_chunk_id), ChildChunk.tenant_id == current_user.current_tenant_id
  530. ).first()
  531. if not child_chunk:
  532. raise NotFound("Child chunk not found.")
  533. # The role of the current user in the ta table must be admin or owner
  534. if not current_user.is_editor:
  535. raise Forbidden()
  536. try:
  537. DatasetService.check_dataset_permission(dataset, current_user)
  538. except services.errors.account.NoPermissionError as e:
  539. raise Forbidden(str(e))
  540. try:
  541. SegmentService.delete_child_chunk(child_chunk, dataset)
  542. except ChildChunkDeleteIndexServiceError as e:
  543. raise ChildChunkDeleteIndexError(str(e))
  544. return {"result": "success"}, 200
  545. @setup_required
  546. @login_required
  547. @account_initialization_required
  548. @cloud_edition_billing_resource_check("vector_space")
  549. def patch(self, dataset_id, document_id, segment_id, child_chunk_id):
  550. # check dataset
  551. dataset_id = str(dataset_id)
  552. dataset = DatasetService.get_dataset(dataset_id)
  553. if not dataset:
  554. raise NotFound("Dataset not found.")
  555. # check user's model setting
  556. DatasetService.check_dataset_model_setting(dataset)
  557. # check document
  558. document_id = str(document_id)
  559. document = DocumentService.get_document(dataset_id, document_id)
  560. if not document:
  561. raise NotFound("Document not found.")
  562. # check segment
  563. segment_id = str(segment_id)
  564. segment = DocumentSegment.query.filter(
  565. DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id
  566. ).first()
  567. if not segment:
  568. raise NotFound("Segment not found.")
  569. # check child chunk
  570. child_chunk_id = str(child_chunk_id)
  571. child_chunk = ChildChunk.query.filter(
  572. ChildChunk.id == str(child_chunk_id), ChildChunk.tenant_id == current_user.current_tenant_id
  573. ).first()
  574. if not child_chunk:
  575. raise NotFound("Child chunk not found.")
  576. # The role of the current user in the ta table must be admin or owner
  577. if not current_user.is_editor:
  578. raise Forbidden()
  579. try:
  580. DatasetService.check_dataset_permission(dataset, current_user)
  581. except services.errors.account.NoPermissionError as e:
  582. raise Forbidden(str(e))
  583. # validate args
  584. parser = reqparse.RequestParser()
  585. parser.add_argument("content", type=str, required=True, nullable=False, location="json")
  586. args = parser.parse_args()
  587. try:
  588. child_chunk = SegmentService.update_child_chunk(
  589. args.get("content"), child_chunk, segment, document, dataset
  590. )
  591. except ChildChunkIndexingServiceError as e:
  592. raise ChildChunkIndexingError(str(e))
  593. return {"data": marshal(child_chunk, child_chunk_fields)}, 200
  594. api.add_resource(DatasetDocumentSegmentListApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments")
  595. api.add_resource(
  596. DatasetDocumentSegmentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segment/<string:action>"
  597. )
  598. api.add_resource(DatasetDocumentSegmentAddApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segment")
  599. api.add_resource(
  600. DatasetDocumentSegmentUpdateApi,
  601. "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>",
  602. )
  603. api.add_resource(
  604. DatasetDocumentSegmentBatchImportApi,
  605. "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/batch_import",
  606. "/datasets/batch_import_status/<uuid:job_id>",
  607. )
  608. api.add_resource(
  609. ChildChunkAddApi,
  610. "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>/child_chunks",
  611. )
  612. api.add_resource(
  613. ChildChunkUpdateApi,
  614. "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>/child_chunks/<uuid:child_chunk_id>",
  615. )