v0.21.1-fastapi

2025-11-04 16:06:36 +08:00
parent 3e58c3d0e9
commit d57b5d76ae
218 changed files with 19617 additions and 72339 deletions
--- a/graphrag/utils.py
+++ b/graphrag/utils.py
@@ -92,10 +92,7 @@ def dict_has_keys_with_types(data: dict, expected_fields: list[tuple[str, type]]

 def get_llm_cache(llmnm, txt, history, genconf):
    hasher = xxhash.xxh64()
-    hasher.update(str(llmnm).encode("utf-8"))
-    hasher.update(str(txt).encode("utf-8"))
-    hasher.update(str(history).encode("utf-8"))
-    hasher.update(str(genconf).encode("utf-8"))
+    hasher.update((str(llmnm)+str(txt)+str(history)+str(genconf)).encode("utf-8"))

    k = hasher.hexdigest()
    bin = REDIS_CONN.get(k)
@@ -106,11 +103,7 @@ def get_llm_cache(llmnm, txt, history, genconf):

 def set_llm_cache(llmnm, txt, v, history, genconf):
    hasher = xxhash.xxh64()
-    hasher.update(str(llmnm).encode("utf-8"))
-    hasher.update(str(txt).encode("utf-8"))
-    hasher.update(str(history).encode("utf-8"))
-    hasher.update(str(genconf).encode("utf-8"))
-
+    hasher.update((str(llmnm)+str(txt)+str(history)+str(genconf)).encode("utf-8"))
    k = hasher.hexdigest()
    REDIS_CONN.set(k, v.encode("utf-8"), 24 * 3600)

@@ -341,7 +334,7 @@ def get_relation(tenant_id, kb_id, from_ent_name, to_ent_name, size=1):
    ents = list(set(ents))
    conds = {"fields": ["content_with_weight"], "size": size, "from_entity_kwd": ents, "to_entity_kwd": ents, "knowledge_graph_kwd": ["relation"]}
    res = []
-    es_res = settings.retrievaler.search(conds, search.index_name(tenant_id), [kb_id] if isinstance(kb_id, str) else kb_id)
+    es_res = settings.retriever.search(conds, search.index_name(tenant_id), [kb_id] if isinstance(kb_id, str) else kb_id)
    for id in es_res.ids:
        try:
            if size == 1:
@@ -398,7 +391,7 @@ async def does_graph_contains(tenant_id, kb_id, doc_id):

 async def get_graph_doc_ids(tenant_id, kb_id) -> list[str]:
    conds = {"fields": ["source_id"], "removed_kwd": "N", "size": 1, "knowledge_graph_kwd": ["graph"]}
-    res = await trio.to_thread.run_sync(lambda: settings.retrievaler.search(conds, search.index_name(tenant_id), [kb_id]))
+    res = await trio.to_thread.run_sync(lambda: settings.retriever.search(conds, search.index_name(tenant_id), [kb_id]))
    doc_ids = []
    if res.total == 0:
        return doc_ids
@@ -409,7 +402,7 @@ async def get_graph_doc_ids(tenant_id, kb_id) -> list[str]:

 async def get_graph(tenant_id, kb_id, exclude_rebuild=None):
    conds = {"fields": ["content_with_weight", "removed_kwd", "source_id"], "size": 1, "knowledge_graph_kwd": ["graph"]}
-    res = await trio.to_thread.run_sync(settings.retrievaler.search, conds, search.index_name(tenant_id), [kb_id])
+    res = await trio.to_thread.run_sync(settings.retriever.search, conds, search.index_name(tenant_id), [kb_id])
    if not res.total == 0:
        for id in res.ids:
            try:
@@ -562,7 +555,7 @@ def merge_tuples(list1, list2):


 async def get_entity_type2samples(idxnms, kb_ids: list):
-    es_res = await trio.to_thread.run_sync(lambda: settings.retrievaler.search({"knowledge_graph_kwd": "ty2ents", "kb_id": kb_ids, "size": 10000, "fields": ["content_with_weight"]}, idxnms, kb_ids))
+    es_res = await trio.to_thread.run_sync(lambda: settings.retriever.search({"knowledge_graph_kwd": "ty2ents", "kb_id": kb_ids, "size": 10000, "fields": ["content_with_weight"]}, idxnms, kb_ids))

    res = defaultdict(list)
    for id in es_res.ids: