v0.21.1-fastapi

This commit is contained in:
2025-11-04 16:06:36 +08:00
parent 3e58c3d0e9
commit d57b5d76ae
218 changed files with 19617 additions and 72339 deletions

View File

@@ -22,7 +22,7 @@ import trio
from api.utils import get_uuid
from api.utils.base64_image import id2image, image2id
from ocr.service import get_ocr_service
from deepdoc.parser.pdf_parser import RAGFlowPdfParser
from rag.flow.base import ProcessBase, ProcessParamBase
from rag.flow.hierarchical_merger.schema import HierarchicalMergerFromUpstream
from rag.nlp import concat_img
@@ -166,24 +166,21 @@ class HierarchicalMerger(ProcessBase):
img = None
for i in path:
txt += lines[i] + "\n"
concat_img(img, id2image(section_images[i], partial(STORAGE_IMPL.get)))
concat_img(img, id2image(section_images[i], partial(STORAGE_IMPL.get, tenant_id=self._canvas._tenant_id)))
cks.append(txt)
images.append(img)
ocr_service = get_ocr_service()
processed_cks = []
for c, img in zip(cks, images):
cleaned_text = await ocr_service.remove_tag(c)
positions = await ocr_service.extract_positions(c)
processed_cks.append({
"text": cleaned_text,
cks = [
{
"text": RAGFlowPdfParser.remove_tag(c),
"image": img,
"positions": positions,
})
cks = processed_cks
"positions": RAGFlowPdfParser.extract_positions(c),
}
for c, img in zip(cks, images)
]
async with trio.open_nursery() as nursery:
for d in cks:
nursery.start_soon(image2id, d, partial(STORAGE_IMPL.put), get_uuid())
nursery.start_soon(image2id, d, partial(STORAGE_IMPL.put, tenant_id=self._canvas._tenant_id), get_uuid())
self.set_output("chunks", cks)
self.callback(1, "Done.")