将flask改成fastapi
This commit is contained in:
255
rag/llm/sequence2txt_model.py
Normal file
255
rag/llm/sequence2txt_model.py
Normal file
@@ -0,0 +1,255 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from abc import ABC
|
||||
|
||||
import requests
|
||||
from openai import OpenAI
|
||||
from openai.lib.azure import AzureOpenAI
|
||||
|
||||
from rag.utils import num_tokens_from_string
|
||||
|
||||
|
||||
class Base(ABC):
|
||||
def __init__(self, key, model_name, **kwargs):
|
||||
"""
|
||||
Abstract base class constructor.
|
||||
Parameters are not stored; initialization is left to subclasses.
|
||||
"""
|
||||
pass
|
||||
|
||||
def transcription(self, audio_path, **kwargs):
|
||||
audio_file = open(audio_path, "rb")
|
||||
transcription = self.client.audio.transcriptions.create(model=self.model_name, file=audio_file)
|
||||
return transcription.text.strip(), num_tokens_from_string(transcription.text.strip())
|
||||
|
||||
def audio2base64(self, audio):
|
||||
if isinstance(audio, bytes):
|
||||
return base64.b64encode(audio).decode("utf-8")
|
||||
if isinstance(audio, io.BytesIO):
|
||||
return base64.b64encode(audio.getvalue()).decode("utf-8")
|
||||
raise TypeError("The input audio file should be in binary format.")
|
||||
|
||||
|
||||
class GPTSeq2txt(Base):
|
||||
_FACTORY_NAME = "OpenAI"
|
||||
|
||||
def __init__(self, key, model_name="whisper-1", base_url="https://api.openai.com/v1", **kwargs):
|
||||
if not base_url:
|
||||
base_url = "https://api.openai.com/v1"
|
||||
self.client = OpenAI(api_key=key, base_url=base_url)
|
||||
self.model_name = model_name
|
||||
|
||||
|
||||
class QWenSeq2txt(Base):
|
||||
_FACTORY_NAME = "Tongyi-Qianwen"
|
||||
|
||||
def __init__(self, key, model_name="qwen-audio-asr", **kwargs):
|
||||
import dashscope
|
||||
|
||||
dashscope.api_key = key
|
||||
self.model_name = model_name
|
||||
|
||||
def transcription(self, audio_path):
|
||||
if "paraformer" in self.model_name or "sensevoice" in self.model_name:
|
||||
return f"**ERROR**: model {self.model_name} is not suppported yet.", 0
|
||||
|
||||
from dashscope import MultiModalConversation
|
||||
|
||||
audio_path = f"file://{audio_path}"
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"audio": audio_path}],
|
||||
}
|
||||
]
|
||||
|
||||
response = None
|
||||
full_content = ""
|
||||
try:
|
||||
response = MultiModalConversation.call(model="qwen-audio-asr", messages=messages, result_format="message", stream=True)
|
||||
for response in response:
|
||||
try:
|
||||
full_content += response["output"]["choices"][0]["message"].content[0]["text"]
|
||||
except Exception:
|
||||
pass
|
||||
return full_content, num_tokens_from_string(full_content)
|
||||
except Exception as e:
|
||||
return "**ERROR**: " + str(e), 0
|
||||
|
||||
|
||||
class AzureSeq2txt(Base):
|
||||
_FACTORY_NAME = "Azure-OpenAI"
|
||||
|
||||
def __init__(self, key, model_name, lang="Chinese", **kwargs):
|
||||
self.client = AzureOpenAI(api_key=key, azure_endpoint=kwargs["base_url"], api_version="2024-02-01")
|
||||
self.model_name = model_name
|
||||
self.lang = lang
|
||||
|
||||
|
||||
class XinferenceSeq2txt(Base):
|
||||
_FACTORY_NAME = "Xinference"
|
||||
|
||||
def __init__(self, key, model_name="whisper-small", **kwargs):
|
||||
self.base_url = kwargs.get("base_url", None)
|
||||
self.model_name = model_name
|
||||
self.key = key
|
||||
|
||||
def transcription(self, audio, language="zh", prompt=None, response_format="json", temperature=0.7):
|
||||
if isinstance(audio, str):
|
||||
audio_file = open(audio, "rb")
|
||||
audio_data = audio_file.read()
|
||||
audio_file_name = audio.split("/")[-1]
|
||||
else:
|
||||
audio_data = audio
|
||||
audio_file_name = "audio.wav"
|
||||
|
||||
payload = {"model": self.model_name, "language": language, "prompt": prompt, "response_format": response_format, "temperature": temperature}
|
||||
|
||||
files = {"file": (audio_file_name, audio_data, "audio/wav")}
|
||||
|
||||
try:
|
||||
response = requests.post(f"{self.base_url}/v1/audio/transcriptions", files=files, data=payload)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
if "text" in result:
|
||||
transcription_text = result["text"].strip()
|
||||
return transcription_text, num_tokens_from_string(transcription_text)
|
||||
else:
|
||||
return "**ERROR**: Failed to retrieve transcription.", 0
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
return f"**ERROR**: {str(e)}", 0
|
||||
|
||||
|
||||
class TencentCloudSeq2txt(Base):
|
||||
_FACTORY_NAME = "Tencent Cloud"
|
||||
|
||||
def __init__(self, key, model_name="16k_zh", base_url="https://asr.tencentcloudapi.com"):
|
||||
from tencentcloud.asr.v20190614 import asr_client
|
||||
from tencentcloud.common import credential
|
||||
|
||||
key = json.loads(key)
|
||||
sid = key.get("tencent_cloud_sid", "")
|
||||
sk = key.get("tencent_cloud_sk", "")
|
||||
cred = credential.Credential(sid, sk)
|
||||
self.client = asr_client.AsrClient(cred, "")
|
||||
self.model_name = model_name
|
||||
|
||||
def transcription(self, audio, max_retries=60, retry_interval=5):
|
||||
import time
|
||||
|
||||
from tencentcloud.asr.v20190614 import models
|
||||
from tencentcloud.common.exception.tencent_cloud_sdk_exception import (
|
||||
TencentCloudSDKException,
|
||||
)
|
||||
|
||||
b64 = self.audio2base64(audio)
|
||||
try:
|
||||
# dispatch disk
|
||||
req = models.CreateRecTaskRequest()
|
||||
params = {
|
||||
"EngineModelType": self.model_name,
|
||||
"ChannelNum": 1,
|
||||
"ResTextFormat": 0,
|
||||
"SourceType": 1,
|
||||
"Data": b64,
|
||||
}
|
||||
req.from_json_string(json.dumps(params))
|
||||
resp = self.client.CreateRecTask(req)
|
||||
|
||||
# loop query
|
||||
req = models.DescribeTaskStatusRequest()
|
||||
params = {"TaskId": resp.Data.TaskId}
|
||||
req.from_json_string(json.dumps(params))
|
||||
retries = 0
|
||||
while retries < max_retries:
|
||||
resp = self.client.DescribeTaskStatus(req)
|
||||
if resp.Data.StatusStr == "success":
|
||||
text = re.sub(r"\[\d+:\d+\.\d+,\d+:\d+\.\d+\]\s*", "", resp.Data.Result).strip()
|
||||
return text, num_tokens_from_string(text)
|
||||
elif resp.Data.StatusStr == "failed":
|
||||
return (
|
||||
"**ERROR**: Failed to retrieve speech recognition results.",
|
||||
0,
|
||||
)
|
||||
else:
|
||||
time.sleep(retry_interval)
|
||||
retries += 1
|
||||
return "**ERROR**: Max retries exceeded. Task may still be processing.", 0
|
||||
|
||||
except TencentCloudSDKException as e:
|
||||
return "**ERROR**: " + str(e), 0
|
||||
except Exception as e:
|
||||
return "**ERROR**: " + str(e), 0
|
||||
|
||||
|
||||
class GPUStackSeq2txt(Base):
|
||||
_FACTORY_NAME = "GPUStack"
|
||||
|
||||
def __init__(self, key, model_name, base_url):
|
||||
if not base_url:
|
||||
raise ValueError("url cannot be None")
|
||||
if base_url.split("/")[-1] != "v1":
|
||||
base_url = os.path.join(base_url, "v1")
|
||||
self.base_url = base_url
|
||||
self.model_name = model_name
|
||||
self.key = key
|
||||
|
||||
|
||||
class GiteeSeq2txt(Base):
|
||||
_FACTORY_NAME = "GiteeAI"
|
||||
|
||||
def __init__(self, key, model_name="whisper-1", base_url="https://ai.gitee.com/v1/", **kwargs):
|
||||
if not base_url:
|
||||
base_url = "https://ai.gitee.com/v1/"
|
||||
self.client = OpenAI(api_key=key, base_url=base_url)
|
||||
self.model_name = model_name
|
||||
|
||||
|
||||
class DeepInfraSeq2txt(Base):
|
||||
_FACTORY_NAME = "DeepInfra"
|
||||
|
||||
def __init__(self, key, model_name, base_url="https://api.deepinfra.com/v1/openai", **kwargs):
|
||||
if not base_url:
|
||||
base_url = "https://api.deepinfra.com/v1/openai"
|
||||
|
||||
self.client = OpenAI(api_key=key, base_url=base_url)
|
||||
self.model_name = model_name
|
||||
|
||||
|
||||
class CometAPISeq2txt(Base):
|
||||
_FACTORY_NAME = "CometAPI"
|
||||
|
||||
def __init__(self, key, model_name="whisper-1", base_url="https://api.cometapi.com/v1", **kwargs):
|
||||
if not base_url:
|
||||
base_url = "https://api.cometapi.com/v1"
|
||||
self.client = OpenAI(api_key=key, base_url=base_url)
|
||||
self.model_name = model_name
|
||||
|
||||
class DeerAPISeq2txt(Base):
|
||||
_FACTORY_NAME = "DeerAPI"
|
||||
|
||||
def __init__(self, key, model_name="whisper-1", base_url="https://api.deerapi.com/v1", **kwargs):
|
||||
if not base_url:
|
||||
base_url = "https://api.deerapi.com/v1"
|
||||
self.client = OpenAI(api_key=key, base_url=base_url)
|
||||
self.model_name = model_name
|
||||
Reference in New Issue
Block a user