|
@@ -1390,7 +1390,7 @@ class SegmentService:
|
|
|
model=dataset.embedding_model,
|
|
|
)
|
|
|
# calc embedding use tokens
|
|
|
- tokens = embedding_model.get_text_embedding_num_tokens(texts=[content])
|
|
|
+ tokens = embedding_model.get_text_embedding_num_tokens(texts=[content])[0]
|
|
|
lock_name = "add_segment_lock_document_id_{}".format(document.id)
|
|
|
with redis_client.lock(lock_name, timeout=600):
|
|
|
max_position = (
|
|
@@ -1467,9 +1467,12 @@ class SegmentService:
|
|
|
if dataset.indexing_technique == "high_quality" and embedding_model:
|
|
|
# calc embedding use tokens
|
|
|
if document.doc_form == "qa_model":
|
|
|
- tokens = embedding_model.get_text_embedding_num_tokens(texts=[content + segment_item["answer"]])
|
|
|
+ tokens = embedding_model.get_text_embedding_num_tokens(
|
|
|
+ texts=[content + segment_item["answer"]]
|
|
|
+ )[0]
|
|
|
else:
|
|
|
- tokens = embedding_model.get_text_embedding_num_tokens(texts=[content])
|
|
|
+ tokens = embedding_model.get_text_embedding_num_tokens(texts=[content])[0]
|
|
|
+
|
|
|
segment_document = DocumentSegment(
|
|
|
tenant_id=current_user.current_tenant_id,
|
|
|
dataset_id=document.dataset_id,
|
|
@@ -1577,9 +1580,9 @@ class SegmentService:
|
|
|
|
|
|
# calc embedding use tokens
|
|
|
if document.doc_form == "qa_model":
|
|
|
- tokens = embedding_model.get_text_embedding_num_tokens(texts=[content + segment.answer])
|
|
|
+ tokens = embedding_model.get_text_embedding_num_tokens(texts=[content + segment.answer])[0]
|
|
|
else:
|
|
|
- tokens = embedding_model.get_text_embedding_num_tokens(texts=[content])
|
|
|
+ tokens = embedding_model.get_text_embedding_num_tokens(texts=[content])[0]
|
|
|
segment.content = content
|
|
|
segment.index_node_hash = segment_hash
|
|
|
segment.word_count = len(content)
|