第一次

This commit is contained in:
ZhuJW
2026-04-16 15:44:32 +08:00
commit 5a98242f2f
171 changed files with 42954 additions and 0 deletions

View File

@@ -0,0 +1,97 @@
#!/usr/bin/env bash
set -euo pipefail
# 外部必须导出:
# INPUT_ROOT WORKFLOW_ID SUB_DIR OUTPUT_ROOT
INPUT_ROOT="${INPUT_ROOT}/${WORKFLOW_ID}"
INPUT_DIR="${INPUT_ROOT}/${SUB_DIR}" # 实际指向 bagdir_splits 上一级
OUTPUT_DIR="${OUTPUT_ROOT}" # 通常就是 /nas_perception/.../output/wfxxx
log(){
echo "[$(date '+%F %T')] [$$] $*"
}
#----------------------------------------------------------
# 统一封装:目录同步 / 文件拷贝
#----------------------------------------------------------
sync_dir(){
local srcDir="$1" dstDir="$2"
if [[ -d ${srcDir} ]]; then
log " + $(basename "${srcDir}")/"
mkdir -p "${dstDir}"
rsync -a --delete "${srcDir}/" "${dstDir}/"
fi
}
sync_file(){
local srcFile="$1" dstDir="$2"
if [[ -f ${srcFile} ]]; then
log " + $(basename "${srcFile}")"
mkdir -p "${dstDir}"
cp -p "${srcFile}" "${dstDir}/"
fi
}
#----------------------------------------------------------
# 主逻辑
#----------------------------------------------------------
log "Script started"
log "INPUT_DIR = ${INPUT_DIR}"
log "OUTPUT_DIR = ${OUTPUT_DIR}"
[[ -d ${INPUT_DIR} ]] || { log "ERROR: INPUT_DIR not found: ${INPUT_DIR}"; exit 1; }
while IFS= read -r -d '' src; do
# 去掉 split_N 层级,得到纯 bag.dir 名
rel="${src#${INPUT_DIR}/*/}"
dest="${OUTPUT_DIR}/${rel}/derived/${SUB_DIR}"
mkdir -p "${dest}"
basename_bag=$(basename "$src") # xxx.bag.dir
pkgname="${basename_bag%.bag.dir}" # xxx
split_name=$(basename "$(dirname "$src")") # split_0 / split_1 / ...
truth_root="${INPUT_DIR}/${split_name}"
log "==================== Processing ${pkgname} ==================="
# 1. 老 object 重命名同步
sync_dir "${src}/object_det_ep20" "${dest}/object_det_al"
sync_dir "${src}/object_tracking" "${dest}/object_tracking_al"
# 2. 目录类slam + lidar_gt + 新增 6 目录
for item in slam_lidar_ground slam_lidar_none_ground \
lidar_gt_pandar128_5f_front lidar_gt_pandar128_5f_rear \
object_det_ep20_lrgt_front object_det_ep20_lrgt_rear \
object_lrgt_filter object_postprocess; do
sync_dir "${src}/${item}" "${dest}/${item}"
done
# 3. 文件类
for item in bev_image_ground.png ego_motion_slam_lidar.csv; do
sync_file "${src}/${item}" "${dest}"
done
# 4. osm & split_json源在 OUTPUT_ROOT/SUB_DIR/split_N/
osm_src="${truth_root}/osm_out/${pkgname}.osm"
if [[ -f ${osm_src} ]]; then
log " + ${pkgname}.osm -> input bag.dir"
cp -p "${osm_src}" "${src}/"
log " + ${pkgname}.osm -> output"
cp -p "${src}/${pkgname}.osm" "${dest}/"
fi
split_src="${truth_root}/split_json/${pkgname}"
if [[ -d ${split_src} ]]; then
log " + split_json/ -> input bag.dir"
rsync -a --delete "${split_src}/" "${src}/split_json/"
log " + split_json/ -> output"
sync_dir "${src}/split_json" "${dest}/split_json"
fi
# 5. 2dseg and occ
sync_dir "${src}/${SUB_DIR}" "${dest}"
done < <(find "${INPUT_DIR}" -mindepth 2 -maxdepth 2 -type d -name '*.bag.dir' -print0)
log "============================================================"
log "All done, success!"

View File

@@ -0,0 +1,139 @@
#!/usr/bin/env python3
"""
check_bags.py
环境变量:
BAG_DIR 必填 bag 根目录
GT_API_URL 可选 获取 pipeline 路径的接口,默认 http://10.204.22.135:30000/api/gt/types
OUTPUT_PREFIX 可选 输出前缀(直接拼接)
其余变量 真值控制,示例:
OBJECT_DETECTION=true
LANE_DETECTION=false
SLAM_GROUND=true
仅当变量值为 true/false 时参与检查;
true → 该 path 必须存在(若是目录则不能为空)
false → 该 path 必须不存在
其它值或缺失 → 忽略
结果同时输出到 stdout 和 list.txt每行一条完整拼接路径
新增:
- 扫描前检查 BAG_DIR 是否为空
- 统计:总 bag 数、规则数、通过数、失败数
- 目录存在时额外检查“非空”
"""
import os
import requests
import logging
import sys
# ---------- 日志 ----------
logging.basicConfig(
level=logging.INFO,
format="[%(asctime)s][%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
log = logging.getLogger("check_bags")
# ---------- 1. 基础目录 ----------
BASE = os.environ.get("BAG_DIR")
if not BASE or not os.path.isdir(BASE):
log.error("BAG_DIR not set or not a directory")
sys.exit(1)
PREFIX = os.environ.get("OUTPUT_PREFIX", "")
GT_API_URL = os.environ.get(
"GT_API_URL", "http://10.204.22.135:30000/api/gt/types"
).rstrip()
log.info("GT_API_URL = %s", GT_API_URL)
# ---------- 2. 拉取 API ----------
try:
log.info("fetching pipeline list from %s", GT_API_URL)
api = requests.get(GT_API_URL, timeout=10).json()
log.info("got %d items from API", len(api))
except Exception as e:
log.error("API unreachable: %s", e)
sys.exit(1)
# ---------- 3. 收集检查规则 ----------
checks = []
for item in api:
if item.get("type") != "pipeline":
continue
name = item["name"]
env_val = os.environ.get(name, "").lower()
if env_val in ("true", "false"):
path = item["path"].lstrip("/")
must_exist = env_val == "true"
checks.append((path, must_exist))
log.info("check rule: %-30s must_exist=%-5s path=%s", name, must_exist, path)
if not checks:
log.error("No pipeline paths enabled for check")
sys.exit(1)
# ---------- 4. 遍历 bag + 统计 ----------
def _empty_dir(p: str) -> bool:
"""目录存在且为空返回 True"""
return os.path.isdir(p) and not bool(os.listdir(p))
valid_cnt = invalid_cnt = 0
bag_dirs = [
d
for d in os.listdir(BASE)
if d.endswith(".bag.dir") and os.path.isdir(os.path.join(BASE, d))
]
if not bag_dirs:
log.error("No *.bag.dir found under BAG_DIR (%s), aborting", BASE)
sys.exit(1)
total_bag = len(bag_dirs)
log.info("start scanning %d bag(s) against %d rule(s)", total_bag, len(checks))
valid = []
for bag in bag_dirs:
bag_path = os.path.join(BASE, bag)
ok = True
for rel, must_exist in checks:
full = os.path.join(bag_path, rel)
exists = os.path.exists(full)
# 关键:目录不能为空
if must_exist and os.path.isdir(full) and _empty_dir(full):
exists = False
if exists != must_exist:
log.debug(
"bag %s failed: %s exists=%s required=%s",
bag,
rel,
exists,
must_exist,
)
ok = False
break
if ok:
valid_cnt += 1
valid.append(bag_path)
log.info("valid bag: %s", bag)
else:
invalid_cnt += 1
# ---------- 5. 输出结果 & 统计 ----------
out_file = "list.txt"
with open(out_file, "w") as f:
for bag_path in valid:
line = f"{PREFIX}{os.path.basename(bag_path)}"
f.write(line + "\n")
log.info("==== summary ====")
log.info("total bags : %d", total_bag)
log.info("rules : %d", len(checks))
log.info("passed : %d", valid_cnt)
log.info("failed : %d", invalid_cnt)
log.info("wrote %d bags to %s and stdout", len(valid), out_file)

View File

@@ -0,0 +1,113 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import shutil
import subprocess
import logging
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor, as_completed
import requests
import tos
import psycopg2
from tqdm import tqdm
# ---------- 日志 ----------
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s | %(levelname)s | %(message)s",
handlers=[logging.FileHandler("bag_merge.log"), logging.StreamHandler()],
)
log = logging.getLogger(__name__)
# ---------- 环境变量 ----------
API_URL = os.getenv("API_URL")
TOS_ENDPOINT = os.getenv("TOS_ENDPOINT")
TOS_REGION = os.getenv("TOS_REGION")
TOS_BUCKET = os.getenv("TOS_BUCKET")
TOS_AK = os.getenv("TOS_ACCESS_KEY")
TOS_SK = os.getenv("TOS_SECRET_KEY")
PG_DSN = os.getenv("PG_DSN")
TEMP_ROOT = Path(os.getenv("TEMP_ROOT", "/tmp/bag_merge"))
# ---------- TOS 客户端 ----------
tos_client = tos.TosClientV2(TOS_AK, TOS_SK, TOS_ENDPOINT, TOS_REGION)
# ---------- 原子函数 ----------
def fetch_mapping() -> dict:
log.info("POST %s", API_URL)
resp = requests.post(
API_URL,
json={"bag_names": ["*"]},
headers={"Content-Type": "application/json"},
timeout=30,
)
resp.raise_for_status()
return resp.json()
def download_file(key: str, local: Path):
meta = tos_client.head_object(TOS_BUCKET, key)
total = int(meta.content_length)
with tqdm(total=total, unit="B", unit_scale=True, desc=f"{key}") as bar:
tos_client.get_object_to_file(
TOS_BUCKET,
key,
str(local),
progress_callback=lambda c, t: bar.update(t - c),
)
def upload_file(local: Path, key: str) -> str:
tos_client.put_object_from_file(TOS_BUCKET, key, str(local))
return f"https://{TOS_BUCKET}.{TOS_ENDPOINT}/{key}"
def merge_bags(inputs: list[Path], output: Path):
subprocess.check_call(
["rosbag-merge", "-o", str(output)] + [str(p) for p in inputs]
)
def update_db(parent: str, tos_url: str):
sql = "UPDATE bag_task SET tos_path = %s WHERE parent_bag = %s"
with psycopg2.connect(PG_DSN) as conn:
with conn.cursor() as cur:
cur.execute(sql, (tos_url, parent))
conn.commit()
log.info("[DB] %s tos_path ⇢ %s", parent, tos_url)
def work_one(parent: str, children: list[str]) -> str:
log.info("start parent=%s children=%d", parent, len(children))
wd = TEMP_ROOT / parent
wd.mkdir(parents=True, exist_ok=True)
subs = [wd / c for c in children]
for c, s in zip(children, subs):
download_file(c, s)
out = wd / parent
merge_bags(subs, out)
url = upload_file(out, parent)
update_db(parent, url)
shutil.rmtree(wd)
log.info("finish parent=%s", parent)
return url
# ---------- 主入口 ----------
def main():
TEMP_ROOT.mkdir(parents=True, exist_ok=True)
mapping = fetch_mapping()
with ProcessPoolExecutor() as pool:
futures = {pool.submit(work_one, p, c): p for p, c in mapping.items()}
for fu in as_completed(futures):
log.info("done %s -> %s", futures[fu], fu.result())
if __name__ == "__main__":
main()