第一次
This commit is contained in:
0
fst_data_pipeline/pipelines/volc/__init__.py
Normal file
0
fst_data_pipeline/pipelines/volc/__init__.py
Normal file
97
fst_data_pipeline/pipelines/volc/bag-copy.sh
Normal file
97
fst_data_pipeline/pipelines/volc/bag-copy.sh
Normal file
@@ -0,0 +1,97 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
# 外部必须导出:
|
||||
# INPUT_ROOT WORKFLOW_ID SUB_DIR OUTPUT_ROOT
|
||||
|
||||
INPUT_ROOT="${INPUT_ROOT}/${WORKFLOW_ID}"
|
||||
INPUT_DIR="${INPUT_ROOT}/${SUB_DIR}" # 实际指向 bagdir_splits 上一级
|
||||
OUTPUT_DIR="${OUTPUT_ROOT}" # 通常就是 /nas_perception/.../output/wfxxx
|
||||
|
||||
log(){
|
||||
echo "[$(date '+%F %T')] [$$] $*"
|
||||
}
|
||||
|
||||
#----------------------------------------------------------
|
||||
# 统一封装:目录同步 / 文件拷贝
|
||||
#----------------------------------------------------------
|
||||
sync_dir(){
|
||||
local srcDir="$1" dstDir="$2"
|
||||
if [[ -d ${srcDir} ]]; then
|
||||
log " + $(basename "${srcDir}")/"
|
||||
mkdir -p "${dstDir}"
|
||||
rsync -a --delete "${srcDir}/" "${dstDir}/"
|
||||
fi
|
||||
}
|
||||
|
||||
sync_file(){
|
||||
local srcFile="$1" dstDir="$2"
|
||||
if [[ -f ${srcFile} ]]; then
|
||||
log " + $(basename "${srcFile}")"
|
||||
mkdir -p "${dstDir}"
|
||||
cp -p "${srcFile}" "${dstDir}/"
|
||||
fi
|
||||
}
|
||||
|
||||
#----------------------------------------------------------
|
||||
# 主逻辑
|
||||
#----------------------------------------------------------
|
||||
log "Script started"
|
||||
log "INPUT_DIR = ${INPUT_DIR}"
|
||||
log "OUTPUT_DIR = ${OUTPUT_DIR}"
|
||||
|
||||
[[ -d ${INPUT_DIR} ]] || { log "ERROR: INPUT_DIR not found: ${INPUT_DIR}"; exit 1; }
|
||||
|
||||
while IFS= read -r -d '' src; do
|
||||
# 去掉 split_N 层级,得到纯 bag.dir 名
|
||||
rel="${src#${INPUT_DIR}/*/}"
|
||||
dest="${OUTPUT_DIR}/${rel}/derived/${SUB_DIR}"
|
||||
mkdir -p "${dest}"
|
||||
|
||||
basename_bag=$(basename "$src") # xxx.bag.dir
|
||||
pkgname="${basename_bag%.bag.dir}" # xxx
|
||||
split_name=$(basename "$(dirname "$src")") # split_0 / split_1 / ...
|
||||
truth_root="${INPUT_DIR}/${split_name}"
|
||||
|
||||
log "==================== Processing ${pkgname} ==================="
|
||||
|
||||
# 1. 老 object 重命名同步
|
||||
sync_dir "${src}/object_det_ep20" "${dest}/object_det_al"
|
||||
sync_dir "${src}/object_tracking" "${dest}/object_tracking_al"
|
||||
|
||||
# 2. 目录类:slam + lidar_gt + 新增 6 目录
|
||||
for item in slam_lidar_ground slam_lidar_none_ground \
|
||||
lidar_gt_pandar128_5f_front lidar_gt_pandar128_5f_rear \
|
||||
object_det_ep20_lrgt_front object_det_ep20_lrgt_rear \
|
||||
object_lrgt_filter object_postprocess; do
|
||||
sync_dir "${src}/${item}" "${dest}/${item}"
|
||||
done
|
||||
|
||||
# 3. 文件类
|
||||
for item in bev_image_ground.png ego_motion_slam_lidar.csv; do
|
||||
sync_file "${src}/${item}" "${dest}"
|
||||
done
|
||||
|
||||
# 4. osm & split_json(源在 OUTPUT_ROOT/SUB_DIR/split_N/)
|
||||
osm_src="${truth_root}/osm_out/${pkgname}.osm"
|
||||
if [[ -f ${osm_src} ]]; then
|
||||
log " + ${pkgname}.osm -> input bag.dir"
|
||||
cp -p "${osm_src}" "${src}/"
|
||||
log " + ${pkgname}.osm -> output"
|
||||
cp -p "${src}/${pkgname}.osm" "${dest}/"
|
||||
fi
|
||||
|
||||
split_src="${truth_root}/split_json/${pkgname}"
|
||||
if [[ -d ${split_src} ]]; then
|
||||
log " + split_json/ -> input bag.dir"
|
||||
rsync -a --delete "${split_src}/" "${src}/split_json/"
|
||||
log " + split_json/ -> output"
|
||||
sync_dir "${src}/split_json" "${dest}/split_json"
|
||||
fi
|
||||
|
||||
# 5. 2dseg and occ
|
||||
sync_dir "${src}/${SUB_DIR}" "${dest}"
|
||||
|
||||
done < <(find "${INPUT_DIR}" -mindepth 2 -maxdepth 2 -type d -name '*.bag.dir' -print0)
|
||||
|
||||
log "============================================================"
|
||||
log "All done, success!"
|
||||
139
fst_data_pipeline/pipelines/volc/bag_operation/bag_scanner.py
Normal file
139
fst_data_pipeline/pipelines/volc/bag_operation/bag_scanner.py
Normal file
@@ -0,0 +1,139 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
check_bags.py
|
||||
|
||||
环境变量:
|
||||
BAG_DIR 必填 bag 根目录
|
||||
GT_API_URL 可选 获取 pipeline 路径的接口,默认 http://10.204.22.135:30000/api/gt/types
|
||||
OUTPUT_PREFIX 可选 输出前缀(直接拼接)
|
||||
其余变量 真值控制,示例:
|
||||
OBJECT_DETECTION=true
|
||||
LANE_DETECTION=false
|
||||
SLAM_GROUND=true
|
||||
…
|
||||
仅当变量值为 true/false 时参与检查;
|
||||
true → 该 path 必须存在(若是目录则不能为空)
|
||||
false → 该 path 必须不存在
|
||||
其它值或缺失 → 忽略
|
||||
|
||||
结果同时输出到 stdout 和 list.txt(每行一条完整拼接路径)
|
||||
新增:
|
||||
- 扫描前检查 BAG_DIR 是否为空
|
||||
- 统计:总 bag 数、规则数、通过数、失败数
|
||||
- 目录存在时额外检查“非空”
|
||||
"""
|
||||
|
||||
import os
|
||||
import requests
|
||||
import logging
|
||||
import sys
|
||||
|
||||
# ---------- 日志 ----------
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="[%(asctime)s][%(levelname)s] %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
log = logging.getLogger("check_bags")
|
||||
|
||||
# ---------- 1. 基础目录 ----------
|
||||
BASE = os.environ.get("BAG_DIR")
|
||||
if not BASE or not os.path.isdir(BASE):
|
||||
log.error("BAG_DIR not set or not a directory")
|
||||
sys.exit(1)
|
||||
|
||||
PREFIX = os.environ.get("OUTPUT_PREFIX", "")
|
||||
GT_API_URL = os.environ.get(
|
||||
"GT_API_URL", "http://10.204.22.135:30000/api/gt/types"
|
||||
).rstrip()
|
||||
log.info("GT_API_URL = %s", GT_API_URL)
|
||||
|
||||
|
||||
# ---------- 2. 拉取 API ----------
|
||||
try:
|
||||
log.info("fetching pipeline list from %s", GT_API_URL)
|
||||
api = requests.get(GT_API_URL, timeout=10).json()
|
||||
log.info("got %d items from API", len(api))
|
||||
except Exception as e:
|
||||
log.error("API unreachable: %s", e)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# ---------- 3. 收集检查规则 ----------
|
||||
checks = []
|
||||
for item in api:
|
||||
if item.get("type") != "pipeline":
|
||||
continue
|
||||
name = item["name"]
|
||||
env_val = os.environ.get(name, "").lower()
|
||||
if env_val in ("true", "false"):
|
||||
path = item["path"].lstrip("/")
|
||||
must_exist = env_val == "true"
|
||||
checks.append((path, must_exist))
|
||||
log.info("check rule: %-30s must_exist=%-5s path=%s", name, must_exist, path)
|
||||
|
||||
if not checks:
|
||||
log.error("No pipeline paths enabled for check")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# ---------- 4. 遍历 bag + 统计 ----------
|
||||
def _empty_dir(p: str) -> bool:
|
||||
"""目录存在且为空返回 True"""
|
||||
return os.path.isdir(p) and not bool(os.listdir(p))
|
||||
|
||||
|
||||
valid_cnt = invalid_cnt = 0
|
||||
bag_dirs = [
|
||||
d
|
||||
for d in os.listdir(BASE)
|
||||
if d.endswith(".bag.dir") and os.path.isdir(os.path.join(BASE, d))
|
||||
]
|
||||
if not bag_dirs:
|
||||
log.error("No *.bag.dir found under BAG_DIR (%s), aborting", BASE)
|
||||
sys.exit(1)
|
||||
|
||||
total_bag = len(bag_dirs)
|
||||
log.info("start scanning %d bag(s) against %d rule(s)", total_bag, len(checks))
|
||||
|
||||
valid = []
|
||||
for bag in bag_dirs:
|
||||
bag_path = os.path.join(BASE, bag)
|
||||
ok = True
|
||||
for rel, must_exist in checks:
|
||||
full = os.path.join(bag_path, rel)
|
||||
exists = os.path.exists(full)
|
||||
# 关键:目录不能为空
|
||||
if must_exist and os.path.isdir(full) and _empty_dir(full):
|
||||
exists = False
|
||||
if exists != must_exist:
|
||||
log.debug(
|
||||
"bag %s failed: %s exists=%s required=%s",
|
||||
bag,
|
||||
rel,
|
||||
exists,
|
||||
must_exist,
|
||||
)
|
||||
ok = False
|
||||
break
|
||||
if ok:
|
||||
valid_cnt += 1
|
||||
valid.append(bag_path)
|
||||
log.info("valid bag: %s", bag)
|
||||
else:
|
||||
invalid_cnt += 1
|
||||
|
||||
|
||||
# ---------- 5. 输出结果 & 统计 ----------
|
||||
out_file = "list.txt"
|
||||
with open(out_file, "w") as f:
|
||||
for bag_path in valid:
|
||||
line = f"{PREFIX}{os.path.basename(bag_path)}"
|
||||
f.write(line + "\n")
|
||||
|
||||
log.info("==== summary ====")
|
||||
log.info("total bags : %d", total_bag)
|
||||
log.info("rules : %d", len(checks))
|
||||
log.info("passed : %d", valid_cnt)
|
||||
log.info("failed : %d", invalid_cnt)
|
||||
log.info("wrote %d bags to %s and stdout", len(valid), out_file)
|
||||
113
fst_data_pipeline/pipelines/volc/bag_operation/merge_rosbag.py
Normal file
113
fst_data_pipeline/pipelines/volc/bag_operation/merge_rosbag.py
Normal file
@@ -0,0 +1,113 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
|
||||
import requests
|
||||
import tos
|
||||
import psycopg2
|
||||
from tqdm import tqdm
|
||||
|
||||
# ---------- 日志 ----------
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s | %(levelname)s | %(message)s",
|
||||
handlers=[logging.FileHandler("bag_merge.log"), logging.StreamHandler()],
|
||||
)
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# ---------- 环境变量 ----------
|
||||
API_URL = os.getenv("API_URL")
|
||||
TOS_ENDPOINT = os.getenv("TOS_ENDPOINT")
|
||||
TOS_REGION = os.getenv("TOS_REGION")
|
||||
TOS_BUCKET = os.getenv("TOS_BUCKET")
|
||||
TOS_AK = os.getenv("TOS_ACCESS_KEY")
|
||||
TOS_SK = os.getenv("TOS_SECRET_KEY")
|
||||
PG_DSN = os.getenv("PG_DSN")
|
||||
TEMP_ROOT = Path(os.getenv("TEMP_ROOT", "/tmp/bag_merge"))
|
||||
|
||||
# ---------- TOS 客户端 ----------
|
||||
tos_client = tos.TosClientV2(TOS_AK, TOS_SK, TOS_ENDPOINT, TOS_REGION)
|
||||
|
||||
|
||||
# ---------- 原子函数 ----------
|
||||
def fetch_mapping() -> dict:
|
||||
log.info("POST %s", API_URL)
|
||||
resp = requests.post(
|
||||
API_URL,
|
||||
json={"bag_names": ["*"]},
|
||||
headers={"Content-Type": "application/json"},
|
||||
timeout=30,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
def download_file(key: str, local: Path):
|
||||
meta = tos_client.head_object(TOS_BUCKET, key)
|
||||
total = int(meta.content_length)
|
||||
with tqdm(total=total, unit="B", unit_scale=True, desc=f"↓ {key}") as bar:
|
||||
tos_client.get_object_to_file(
|
||||
TOS_BUCKET,
|
||||
key,
|
||||
str(local),
|
||||
progress_callback=lambda c, t: bar.update(t - c),
|
||||
)
|
||||
|
||||
|
||||
def upload_file(local: Path, key: str) -> str:
|
||||
tos_client.put_object_from_file(TOS_BUCKET, key, str(local))
|
||||
return f"https://{TOS_BUCKET}.{TOS_ENDPOINT}/{key}"
|
||||
|
||||
|
||||
def merge_bags(inputs: list[Path], output: Path):
|
||||
subprocess.check_call(
|
||||
["rosbag-merge", "-o", str(output)] + [str(p) for p in inputs]
|
||||
)
|
||||
|
||||
|
||||
def update_db(parent: str, tos_url: str):
|
||||
sql = "UPDATE bag_task SET tos_path = %s WHERE parent_bag = %s"
|
||||
with psycopg2.connect(PG_DSN) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql, (tos_url, parent))
|
||||
conn.commit()
|
||||
log.info("[DB] %s tos_path ⇢ %s", parent, tos_url)
|
||||
|
||||
|
||||
def work_one(parent: str, children: list[str]) -> str:
|
||||
log.info("start parent=%s children=%d", parent, len(children))
|
||||
wd = TEMP_ROOT / parent
|
||||
wd.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
subs = [wd / c for c in children]
|
||||
for c, s in zip(children, subs):
|
||||
download_file(c, s)
|
||||
|
||||
out = wd / parent
|
||||
merge_bags(subs, out)
|
||||
|
||||
url = upload_file(out, parent)
|
||||
update_db(parent, url)
|
||||
|
||||
shutil.rmtree(wd)
|
||||
log.info("finish parent=%s", parent)
|
||||
return url
|
||||
|
||||
|
||||
# ---------- 主入口 ----------
|
||||
def main():
|
||||
TEMP_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
mapping = fetch_mapping()
|
||||
with ProcessPoolExecutor() as pool:
|
||||
futures = {pool.submit(work_one, p, c): p for p, c in mapping.items()}
|
||||
for fu in as_completed(futures):
|
||||
log.info("done %s -> %s", futures[fu], fu.result())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user