commit 105ccf145cd9105e53aa6966af2d133376e0e6d4 Author: wangwei Date: Fri Apr 17 11:41:22 2026 +0800 first commit diff --git a/GB_Doc/GB 11551-2014 汽车正面碰撞的乘员保护.pdf b/GB_Doc/GB 11551-2014 汽车正面碰撞的乘员保护.pdf new file mode 100644 index 0000000..f9d62c1 Binary files /dev/null and b/GB_Doc/GB 11551-2014 汽车正面碰撞的乘员保护.pdf differ diff --git a/GB_Doc/GB 11564-2024 机动车回复反射装置.pdf b/GB_Doc/GB 11564-2024 机动车回复反射装置.pdf new file mode 100644 index 0000000..ea1ca28 Binary files /dev/null and b/GB_Doc/GB 11564-2024 机动车回复反射装置.pdf differ diff --git a/GB_Doc/GB 11566-2024 乘用车外部凸出物.pdf b/GB_Doc/GB 11566-2024 乘用车外部凸出物.pdf new file mode 100644 index 0000000..282a0d1 Binary files /dev/null and b/GB_Doc/GB 11566-2024 乘用车外部凸出物.pdf differ diff --git a/GB_Doc/GB 11567-2017 汽车及挂车侧面和后下部防护要求.pdf b/GB_Doc/GB 11567-2017 汽车及挂车侧面和后下部防护要求.pdf new file mode 100644 index 0000000..08bbc7b Binary files /dev/null and b/GB_Doc/GB 11567-2017 汽车及挂车侧面和后下部防护要求.pdf differ diff --git a/GB_Doc/GB 12995-2006 机动轮椅车.pdf b/GB_Doc/GB 12995-2006 机动轮椅车.pdf new file mode 100644 index 0000000..ccf4930 Binary files /dev/null and b/GB_Doc/GB 12995-2006 机动轮椅车.pdf differ diff --git a/GB_Doc/GB 13057-2023 客车座椅及其车辆固定件的强度.pdf b/GB_Doc/GB 13057-2023 客车座椅及其车辆固定件的强度.pdf new file mode 100644 index 0000000..3cf65a8 Binary files /dev/null and b/GB_Doc/GB 13057-2023 客车座椅及其车辆固定件的强度.pdf differ diff --git a/GB_Doc/GB 13094-2017 客车结构安全要求.pdf b/GB_Doc/GB 13094-2017 客车结构安全要求.pdf new file mode 100644 index 0000000..7308cb3 Binary files /dev/null and b/GB_Doc/GB 13094-2017 客车结构安全要求.pdf differ diff --git a/GB_Doc/GB 13365-2005 机动车排气火花熄灭器.pdf b/GB_Doc/GB 13365-2005 机动车排气火花熄灭器.pdf new file mode 100644 index 0000000..c063350 Binary files /dev/null and b/GB_Doc/GB 13365-2005 机动车排气火花熄灭器.pdf differ diff --git a/GB_Doc/GB 13392-2023 道路运输危险货物车辆标志.pdf b/GB_Doc/GB 13392-2023 道路运输危险货物车辆标志.pdf new file mode 100644 index 0000000..0ab7315 Binary files /dev/null and b/GB_Doc/GB 13392-2023 道路运输危险货物车辆标志.pdf differ diff --git a/GB_Doc/GB 13895-2018 重负荷车辆齿轮油(GL-5).pdf b/GB_Doc/GB 13895-2018 重负荷车辆齿轮油(GL-5).pdf new file mode 100644 index 0000000..4dff415 Binary files /dev/null and b/GB_Doc/GB 13895-2018 重负荷车辆齿轮油(GL-5).pdf differ diff --git a/GB_Doc/GB 13954-2009 警车、消防车、救护车、工程救险车标志灯具.pdf b/GB_Doc/GB 13954-2009 警车、消防车、救护车、工程救险车标志灯具.pdf new file mode 100644 index 0000000..5b05d51 Binary files /dev/null and b/GB_Doc/GB 13954-2009 警车、消防车、救护车、工程救险车标志灯具.pdf differ diff --git a/GB_Doc/GB 14166-2024 机动车乘员用安全带和约束系统.pdf b/GB_Doc/GB 14166-2024 机动车乘员用安全带和约束系统.pdf new file mode 100644 index 0000000..0eef7a8 Binary files /dev/null and b/GB_Doc/GB 14166-2024 机动车乘员用安全带和约束系统.pdf differ diff --git a/GB_Doc/GB 14167-2024 机动车乘员用安全带和约束系统安装固定点.pdf b/GB_Doc/GB 14167-2024 机动车乘员用安全带和约束系统安装固定点.pdf new file mode 100644 index 0000000..b8a1a7b Binary files /dev/null and b/GB_Doc/GB 14167-2024 机动车乘员用安全带和约束系统安装固定点.pdf differ diff --git a/GB_Doc/GB 146.1-2020 标准轨距铁路限界 第1部分:机车车辆限界.pdf b/GB_Doc/GB 146.1-2020 标准轨距铁路限界 第1部分:机车车辆限界.pdf new file mode 100644 index 0000000..1739a91 Binary files /dev/null and b/GB_Doc/GB 146.1-2020 标准轨距铁路限界 第1部分:机车车辆限界.pdf differ diff --git a/GB_Doc/GB 14681.2-2006 机车船舶用电加温玻璃 第2部分:机车电加温玻璃.pdf b/GB_Doc/GB 14681.2-2006 机车船舶用电加温玻璃 第2部分:机车电加温玻璃.pdf new file mode 100644 index 0000000..ed10aa0 Binary files /dev/null and b/GB_Doc/GB 14681.2-2006 机车船舶用电加温玻璃 第2部分:机车电加温玻璃.pdf differ diff --git a/GB_Doc/GB 14747-2006 儿童三轮车安全要求.pdf b/GB_Doc/GB 14747-2006 儿童三轮车安全要求.pdf new file mode 100644 index 0000000..c5d302e Binary files /dev/null and b/GB_Doc/GB 14747-2006 儿童三轮车安全要求.pdf differ diff --git a/GB_Doc/GB 14748-2006 儿童推车安全要求.pdf b/GB_Doc/GB 14748-2006 儿童推车安全要求.pdf new file mode 100644 index 0000000..c1d3867 Binary files /dev/null and b/GB_Doc/GB 14748-2006 儿童推车安全要求.pdf differ diff --git a/GB_Doc/GB 14749-2006 婴儿学步车安全要求.pdf b/GB_Doc/GB 14749-2006 婴儿学步车安全要求.pdf new file mode 100644 index 0000000..c700a53 Binary files /dev/null and b/GB_Doc/GB 14749-2006 婴儿学步车安全要求.pdf differ diff --git a/GB_Doc/GB 14892-2006 城市轨道交通列车噪声限值和测量方法.pdf b/GB_Doc/GB 14892-2006 城市轨道交通列车噪声限值和测量方法.pdf new file mode 100644 index 0000000..5e9f56e Binary files /dev/null and b/GB_Doc/GB 14892-2006 城市轨道交通列车噪声限值和测量方法.pdf differ diff --git a/GB_Doc/GB 15082-2008 汽车用车速表.pdf b/GB_Doc/GB 15082-2008 汽车用车速表.pdf new file mode 100644 index 0000000..71bb6e1 Binary files /dev/null and b/GB_Doc/GB 15082-2008 汽车用车速表.pdf differ diff --git a/GB_Doc/GB 15083-2019 汽车座椅、座椅固定装置及头枕强度要求和试验方法.pdf b/GB_Doc/GB 15083-2019 汽车座椅、座椅固定装置及头枕强度要求和试验方法.pdf new file mode 100644 index 0000000..3e303d3 Binary files /dev/null and b/GB_Doc/GB 15083-2019 汽车座椅、座椅固定装置及头枕强度要求和试验方法.pdf differ diff --git a/GB_Doc/GB 15084-2022 机动车辆 间接视野装置 性能和安装要求.pdf b/GB_Doc/GB 15084-2022 机动车辆 间接视野装置 性能和安装要求.pdf new file mode 100644 index 0000000..62b3c11 Binary files /dev/null and b/GB_Doc/GB 15084-2022 机动车辆 间接视野装置 性能和安装要求.pdf differ diff --git a/GB_Doc/GB 15606-2008 木工(材)车间安全生产通则.pdf b/GB_Doc/GB 15606-2008 木工(材)车间安全生产通则.pdf new file mode 100644 index 0000000..d48bbfc Binary files /dev/null and b/GB_Doc/GB 15606-2008 木工(材)车间安全生产通则.pdf differ diff --git a/GB_Doc/GB 15740-2024 汽车防盗装置.pdf b/GB_Doc/GB 15740-2024 汽车防盗装置.pdf new file mode 100644 index 0000000..106de15 Binary files /dev/null and b/GB_Doc/GB 15740-2024 汽车防盗装置.pdf differ diff --git a/GB_Doc/GB 15742-2019 机动车用喇叭的性能要求及试验方法.pdf b/GB_Doc/GB 15742-2019 机动车用喇叭的性能要求及试验方法.pdf new file mode 100644 index 0000000..80ed5fd Binary files /dev/null and b/GB_Doc/GB 15742-2019 机动车用喇叭的性能要求及试验方法.pdf differ diff --git a/GB_Doc/GB 15744-2019 摩托车和轻便摩托车燃油消耗量限值及测量方法.pdf b/GB_Doc/GB 15744-2019 摩托车和轻便摩托车燃油消耗量限值及测量方法.pdf new file mode 100644 index 0000000..940a367 Binary files /dev/null and b/GB_Doc/GB 15744-2019 摩托车和轻便摩托车燃油消耗量限值及测量方法.pdf differ diff --git a/GB_Doc/GB 1589-2016 汽车、挂车及汽车列车外廓尺寸、 轴荷及质量限值.pdf b/GB_Doc/GB 1589-2016 汽车、挂车及汽车列车外廓尺寸、 轴荷及质量限值.pdf new file mode 100644 index 0000000..9c3deb1 Binary files /dev/null and b/GB_Doc/GB 1589-2016 汽车、挂车及汽车列车外廓尺寸、 轴荷及质量限值.pdf differ diff --git a/GB_Doc/GB 16151.5-2008 农业机械运行安全技术条件 第5部分:挂车.pdf b/GB_Doc/GB 16151.5-2008 农业机械运行安全技术条件 第5部分:挂车.pdf new file mode 100644 index 0000000..d0181f6 Binary files /dev/null and b/GB_Doc/GB 16151.5-2008 农业机械运行安全技术条件 第5部分:挂车.pdf differ diff --git a/GB_Doc/GB 16735-2019 道路车辆 车辆识别代号(VIN).pdf b/GB_Doc/GB 16735-2019 道路车辆 车辆识别代号(VIN).pdf new file mode 100644 index 0000000..beaddf3 Binary files /dev/null and b/GB_Doc/GB 16735-2019 道路车辆 车辆识别代号(VIN).pdf differ diff --git a/GB_Doc/GB 16737-2019 道路车辆 世界制造厂识别代号(WMI).pdf b/GB_Doc/GB 16737-2019 道路车辆 世界制造厂识别代号(WMI).pdf new file mode 100644 index 0000000..5c9ba8b Binary files /dev/null and b/GB_Doc/GB 16737-2019 道路车辆 世界制造厂识别代号(WMI).pdf differ diff --git a/GB_Doc/GB 17353-2024 摩托车和轻便摩托车防盗装置.pdf b/GB_Doc/GB 17353-2024 摩托车和轻便摩托车防盗装置.pdf new file mode 100644 index 0000000..df73090 Binary files /dev/null and b/GB_Doc/GB 17353-2024 摩托车和轻便摩托车防盗装置.pdf differ diff --git a/GB_Doc/GB 17354-2024 乘用车前后端保护装置.pdf b/GB_Doc/GB 17354-2024 乘用车前后端保护装置.pdf new file mode 100644 index 0000000..6772f2b Binary files /dev/null and b/GB_Doc/GB 17354-2024 乘用车前后端保护装置.pdf differ diff --git a/GB_Doc/GB 17675-2021 汽车转向系 基本要求.pdf b/GB_Doc/GB 17675-2021 汽车转向系 基本要求.pdf new file mode 100644 index 0000000..138433f Binary files /dev/null and b/GB_Doc/GB 17675-2021 汽车转向系 基本要求.pdf differ diff --git a/GB_Doc/GB 17907-2010 机械式停车设备 通用安全要求.pdf b/GB_Doc/GB 17907-2010 机械式停车设备 通用安全要求.pdf new file mode 100644 index 0000000..445c2ad Binary files /dev/null and b/GB_Doc/GB 17907-2010 机械式停车设备 通用安全要求.pdf differ diff --git a/GB_Doc/GB 17930-2016 车用汽油.pdf b/GB_Doc/GB 17930-2016 车用汽油.pdf new file mode 100644 index 0000000..4af6645 Binary files /dev/null and b/GB_Doc/GB 17930-2016 车用汽油.pdf differ diff --git a/GB_Doc/GB 18047-2017 车用压缩天然气.pdf b/GB_Doc/GB 18047-2017 车用压缩天然气.pdf new file mode 100644 index 0000000..7845933 Binary files /dev/null and b/GB_Doc/GB 18047-2017 车用压缩天然气.pdf differ diff --git a/GB_Doc/GB 18296-2019 汽车燃油箱及其安装的安全性能要求和试验方法.pdf b/GB_Doc/GB 18296-2019 汽车燃油箱及其安装的安全性能要求和试验方法.pdf new file mode 100644 index 0000000..5a5f9d6 Binary files /dev/null and b/GB_Doc/GB 18296-2019 汽车燃油箱及其安装的安全性能要求和试验方法.pdf differ diff --git a/GB_Doc/GB 18320-2008 三轮汽车和低速货车 安全技术要求.pdf b/GB_Doc/GB 18320-2008 三轮汽车和低速货车 安全技术要求.pdf new file mode 100644 index 0000000..30e3b43 Binary files /dev/null and b/GB_Doc/GB 18320-2008 三轮汽车和低速货车 安全技术要求.pdf differ diff --git a/GB_Doc/GB 18321-2001 农用运输车 噪声限值.pdf b/GB_Doc/GB 18321-2001 农用运输车 噪声限值.pdf new file mode 100644 index 0000000..5ec713e Binary files /dev/null and b/GB_Doc/GB 18321-2001 农用运输车 噪声限值.pdf differ diff --git a/GB_Doc/GB 18351-2017 车用乙醇汽油(E10).pdf b/GB_Doc/GB 18351-2017 车用乙醇汽油(E10).pdf new file mode 100644 index 0000000..2a59ade Binary files /dev/null and b/GB_Doc/GB 18351-2017 车用乙醇汽油(E10).pdf differ diff --git a/GB_Doc/GB 18384-2020 电动汽车安全要求.pdf b/GB_Doc/GB 18384-2020 电动汽车安全要求.pdf new file mode 100644 index 0000000..7f88d08 Binary files /dev/null and b/GB_Doc/GB 18384-2020 电动汽车安全要求.pdf differ diff --git a/GB_Doc/GB 18436-2022 轮机日志和车钟记录簿.pdf b/GB_Doc/GB 18436-2022 轮机日志和车钟记录簿.pdf new file mode 100644 index 0000000..f9366dd Binary files /dev/null and b/GB_Doc/GB 18436-2022 轮机日志和车钟记录簿.pdf differ diff --git a/GB_Doc/GB 18564.1-2019 道路运输液体危险货物罐式车辆 第1部分:金属常压罐体技术要求.pdf b/GB_Doc/GB 18564.1-2019 道路运输液体危险货物罐式车辆 第1部分:金属常压罐体技术要求.pdf new file mode 100644 index 0000000..fd0c37e Binary files /dev/null and b/GB_Doc/GB 18564.1-2019 道路运输液体危险货物罐式车辆 第1部分:金属常压罐体技术要求.pdf differ diff --git a/GB_Doc/GB 18564.2-2008 道路运输液体危险货物罐式车辆 第2部分:非金属常压罐体技术要求.pdf b/GB_Doc/GB 18564.2-2008 道路运输液体危险货物罐式车辆 第2部分:非金属常压罐体技术要求.pdf new file mode 100644 index 0000000..fb3c28c Binary files /dev/null and b/GB_Doc/GB 18564.2-2008 道路运输液体危险货物罐式车辆 第2部分:非金属常压罐体技术要求.pdf differ diff --git a/GB_Doc/GB 19079.2-2005 体育场所开放条件与技术要求 第2部分 卡丁车场所.pdf b/GB_Doc/GB 19079.2-2005 体育场所开放条件与技术要求 第2部分 卡丁车场所.pdf new file mode 100644 index 0000000..4a8ff43 Binary files /dev/null and b/GB_Doc/GB 19079.2-2005 体育场所开放条件与技术要求 第2部分 卡丁车场所.pdf differ diff --git a/GB_Doc/GB 19079.2-2026 体育场所开放条件与技术要求 第2部分:卡丁车场所.pdf b/GB_Doc/GB 19079.2-2026 体育场所开放条件与技术要求 第2部分:卡丁车场所.pdf new file mode 100644 index 0000000..b32934e Binary files /dev/null and b/GB_Doc/GB 19079.2-2026 体育场所开放条件与技术要求 第2部分:卡丁车场所.pdf differ diff --git a/GB_Doc/GB 19147-2016 车用柴油.pdf b/GB_Doc/GB 19147-2016 车用柴油.pdf new file mode 100644 index 0000000..96a3465 Binary files /dev/null and b/GB_Doc/GB 19147-2016 车用柴油.pdf differ diff --git a/GB_Doc/GB 19194-2003 竞赛类卡丁车通用技术条件.pdf b/GB_Doc/GB 19194-2003 竞赛类卡丁车通用技术条件.pdf new file mode 100644 index 0000000..2b7be9a Binary files /dev/null and b/GB_Doc/GB 19194-2003 竞赛类卡丁车通用技术条件.pdf differ diff --git a/GB_Doc/GB 19197-2003 卡丁车场建设规范.pdf b/GB_Doc/GB 19197-2003 卡丁车场建设规范.pdf new file mode 100644 index 0000000..c0ca200 Binary files /dev/null and b/GB_Doc/GB 19197-2003 卡丁车场建设规范.pdf differ diff --git a/GB_Doc/GB 19239-2022 燃气汽车燃气系统安装规范.pdf b/GB_Doc/GB 19239-2022 燃气汽车燃气系统安装规范.pdf new file mode 100644 index 0000000..fe50dc4 Binary files /dev/null and b/GB_Doc/GB 19239-2022 燃气汽车燃气系统安装规范.pdf differ diff --git a/GB_Doc/GB 19260-2016 低地板及低入口城市客车结构要求.pdf b/GB_Doc/GB 19260-2016 低地板及低入口城市客车结构要求.pdf new file mode 100644 index 0000000..c89f399 Binary files /dev/null and b/GB_Doc/GB 19260-2016 低地板及低入口城市客车结构要求.pdf differ diff --git a/GB_Doc/GB 19402-2012 客运地面缆车安全要求.pdf b/GB_Doc/GB 19402-2012 客运地面缆车安全要求.pdf new file mode 100644 index 0000000..c1dcc93 Binary files /dev/null and b/GB_Doc/GB 19402-2012 客运地面缆车安全要求.pdf differ diff --git a/GB_Doc/GB 19522-2024 车辆驾驶人员血液、呼气酒精含量阈值与检验.pdf b/GB_Doc/GB 19522-2024 车辆驾驶人员血液、呼气酒精含量阈值与检验.pdf new file mode 100644 index 0000000..0891b12 Binary files /dev/null and b/GB_Doc/GB 19522-2024 车辆驾驶人员血液、呼气酒精含量阈值与检验.pdf differ diff --git a/GB_Doc/GB 19578-2024 乘用车燃料消耗量限值.pdf b/GB_Doc/GB 19578-2024 乘用车燃料消耗量限值.pdf new file mode 100644 index 0000000..fd62143 Binary files /dev/null and b/GB_Doc/GB 19578-2024 乘用车燃料消耗量限值.pdf differ diff --git a/GB_Doc/GB 19592-2019 车用汽油清净剂.pdf b/GB_Doc/GB 19592-2019 车用汽油清净剂.pdf new file mode 100644 index 0000000..e20e13a Binary files /dev/null and b/GB_Doc/GB 19592-2019 车用汽油清净剂.pdf differ diff --git a/GB_Doc/GB 20072-2024 乘用车后碰撞安全要求.pdf b/GB_Doc/GB 20072-2024 乘用车后碰撞安全要求.pdf new file mode 100644 index 0000000..ffd4487 Binary files /dev/null and b/GB_Doc/GB 20072-2024 乘用车后碰撞安全要求.pdf differ diff --git a/GB_Doc/GB 20073-2018 摩托车和轻便摩托车制动性能要求及试验方法.pdf b/GB_Doc/GB 20073-2018 摩托车和轻便摩托车制动性能要求及试验方法.pdf new file mode 100644 index 0000000..d0e8641 Binary files /dev/null and b/GB_Doc/GB 20073-2018 摩托车和轻便摩托车制动性能要求及试验方法.pdf differ diff --git a/GB_Doc/GB 20073-2025 摩托车和轻便摩托车制动性能要求及试验方法.pdf b/GB_Doc/GB 20073-2025 摩托车和轻便摩托车制动性能要求及试验方法.pdf new file mode 100644 index 0000000..8e0a843 Binary files /dev/null and b/GB_Doc/GB 20073-2025 摩托车和轻便摩托车制动性能要求及试验方法.pdf differ diff --git a/GB_Doc/GB 20074-2017 摩托车和轻便摩托车外部凸出物.pdf b/GB_Doc/GB 20074-2017 摩托车和轻便摩托车外部凸出物.pdf new file mode 100644 index 0000000..e3c17d7 Binary files /dev/null and b/GB_Doc/GB 20074-2017 摩托车和轻便摩托车外部凸出物.pdf differ diff --git a/GB_Doc/GB 20075-2020 摩托车乘员扶手和脚踏.pdf b/GB_Doc/GB 20075-2020 摩托车乘员扶手和脚踏.pdf new file mode 100644 index 0000000..7dab893 Binary files /dev/null and b/GB_Doc/GB 20075-2020 摩托车乘员扶手和脚踏.pdf differ diff --git a/GB_Doc/GB 20180-2006 矿用辅助绞车 安全要求.pdf b/GB_Doc/GB 20180-2006 矿用辅助绞车 安全要求.pdf new file mode 100644 index 0000000..b38a2cb Binary files /dev/null and b/GB_Doc/GB 20180-2006 矿用辅助绞车 安全要求.pdf differ diff --git a/GB_Doc/GB 20181-2006 矿井提升机和矿用提升绞车 安全要求.pdf b/GB_Doc/GB 20181-2006 矿井提升机和矿用提升绞车 安全要求.pdf new file mode 100644 index 0000000..99176be Binary files /dev/null and b/GB_Doc/GB 20181-2006 矿井提升机和矿用提升绞车 安全要求.pdf differ diff --git a/GB_Doc/GB 20182-2024 商用车驾驶室外部凸出物.pdf b/GB_Doc/GB 20182-2024 商用车驾驶室外部凸出物.pdf new file mode 100644 index 0000000..f36108a Binary files /dev/null and b/GB_Doc/GB 20182-2024 商用车驾驶室外部凸出物.pdf differ diff --git a/GB_Doc/GB 20300-2018 道路运输爆炸品和剧毒化学品车辆安全技术条件.pdf b/GB_Doc/GB 20300-2018 道路运输爆炸品和剧毒化学品车辆安全技术条件.pdf new file mode 100644 index 0000000..2a4a5de Binary files /dev/null and b/GB_Doc/GB 20300-2018 道路运输爆炸品和剧毒化学品车辆安全技术条件.pdf differ diff --git a/GB_Doc/GB 20997-2024 轻型商用车辆燃料消耗量限值及评价指标.pdf b/GB_Doc/GB 20997-2024 轻型商用车辆燃料消耗量限值及评价指标.pdf new file mode 100644 index 0000000..a6f0fa1 Binary files /dev/null and b/GB_Doc/GB 20997-2024 轻型商用车辆燃料消耗量限值及评价指标.pdf differ diff --git a/GB_Doc/GB 21011-2007 矿用人车 安全要求.pdf b/GB_Doc/GB 21011-2007 矿用人车 安全要求.pdf new file mode 100644 index 0000000..f3869d3 Binary files /dev/null and b/GB_Doc/GB 21011-2007 矿用人车 安全要求.pdf differ diff --git a/GB_Doc/GB 21377-2015 三轮汽车 燃料消耗量限值及测量方法.pdf b/GB_Doc/GB 21377-2015 三轮汽车 燃料消耗量限值及测量方法.pdf new file mode 100644 index 0000000..ca2f862 Binary files /dev/null and b/GB_Doc/GB 21377-2015 三轮汽车 燃料消耗量限值及测量方法.pdf differ diff --git a/GB_Doc/GB 21378-2015 低速货车 燃料消耗量限值及测量方法.pdf b/GB_Doc/GB 21378-2015 低速货车 燃料消耗量限值及测量方法.pdf new file mode 100644 index 0000000..522d2e3 Binary files /dev/null and b/GB_Doc/GB 21378-2015 低速货车 燃料消耗量限值及测量方法.pdf differ diff --git a/GB_Doc/GB 21500-2008 地下矿用无轨轮胎式运矿车 安全要求.pdf b/GB_Doc/GB 21500-2008 地下矿用无轨轮胎式运矿车 安全要求.pdf new file mode 100644 index 0000000..5f2ddf2 Binary files /dev/null and b/GB_Doc/GB 21500-2008 地下矿用无轨轮胎式运矿车 安全要求.pdf differ diff --git a/GB_Doc/GB 22030-2017 车用乙醇汽油调合组分油.pdf b/GB_Doc/GB 22030-2017 车用乙醇汽油调合组分油.pdf new file mode 100644 index 0000000..db1b5cb Binary files /dev/null and b/GB_Doc/GB 22030-2017 车用乙醇汽油调合组分油.pdf differ diff --git a/GB_Doc/GB 22128-2019 报废机动车回收拆解企业技术规范.pdf b/GB_Doc/GB 22128-2019 报废机动车回收拆解企业技术规范.pdf new file mode 100644 index 0000000..51be9b6 Binary files /dev/null and b/GB_Doc/GB 22128-2019 报废机动车回收拆解企业技术规范.pdf differ diff --git a/GB_Doc/GB 22757.1-2023 轻型汽车能源消耗量标识 第1部分:汽油和柴油汽车.pdf b/GB_Doc/GB 22757.1-2023 轻型汽车能源消耗量标识 第1部分:汽油和柴油汽车.pdf new file mode 100644 index 0000000..b3c5fc1 Binary files /dev/null and b/GB_Doc/GB 22757.1-2023 轻型汽车能源消耗量标识 第1部分:汽油和柴油汽车.pdf differ diff --git a/GB_Doc/GB 22757.2-2023 轻型汽车能源消耗量标识 第2部分:可外接充电式混合动力电动汽车和纯电动汽车.pdf b/GB_Doc/GB 22757.2-2023 轻型汽车能源消耗量标识 第2部分:可外接充电式混合动力电动汽车和纯电动汽车.pdf new file mode 100644 index 0000000..6d9a04a Binary files /dev/null and b/GB_Doc/GB 22757.2-2023 轻型汽车能源消耗量标识 第2部分:可外接充电式混合动力电动汽车和纯电动汽车.pdf differ diff --git a/GB_Doc/GB 24155-2020 电动摩托车和电动轻便摩托车安全要求.pdf b/GB_Doc/GB 24155-2020 电动摩托车和电动轻便摩托车安全要求.pdf new file mode 100644 index 0000000..8c498a2 Binary files /dev/null and b/GB_Doc/GB 24155-2020 电动摩托车和电动轻便摩托车安全要求.pdf differ diff --git a/GB_Doc/GB 24315-2009 校车标识.pdf b/GB_Doc/GB 24315-2009 校车标识.pdf new file mode 100644 index 0000000..6ca3d06 Binary files /dev/null and b/GB_Doc/GB 24315-2009 校车标识.pdf differ diff --git a/GB_Doc/GB 24406-2024 专用校车学生座椅及其车辆固定件的强度.pdf b/GB_Doc/GB 24406-2024 专用校车学生座椅及其车辆固定件的强度.pdf new file mode 100644 index 0000000..6b8d6d2 Binary files /dev/null and b/GB_Doc/GB 24406-2024 专用校车学生座椅及其车辆固定件的强度.pdf differ diff --git a/GB_Doc/GB 24407-2012 专用校车安全技术条件.pdf b/GB_Doc/GB 24407-2012 专用校车安全技术条件.pdf new file mode 100644 index 0000000..9d136da Binary files /dev/null and b/GB_Doc/GB 24407-2012 专用校车安全技术条件.pdf differ diff --git a/GB_Doc/GB 24407-2025 专用校车安全技术条件.pdf b/GB_Doc/GB 24407-2025 专用校车安全技术条件.pdf new file mode 100644 index 0000000..3d8017a Binary files /dev/null and b/GB_Doc/GB 24407-2025 专用校车安全技术条件.pdf differ diff --git a/GB_Doc/GB 24409-2020 车辆涂料中有害物质限量.pdf b/GB_Doc/GB 24409-2020 车辆涂料中有害物质限量.pdf new file mode 100644 index 0000000..1e27a1d Binary files /dev/null and b/GB_Doc/GB 24409-2020 车辆涂料中有害物质限量.pdf differ diff --git a/GB_Doc/GB 24429-2009 运动头盔 自行车、滑板、轮滑运动头盔的安全要求和试验方法.pdf b/GB_Doc/GB 24429-2009 运动头盔 自行车、滑板、轮滑运动头盔的安全要求和试验方法.pdf new file mode 100644 index 0000000..28d4436 Binary files /dev/null and b/GB_Doc/GB 24429-2009 运动头盔 自行车、滑板、轮滑运动头盔的安全要求和试验方法.pdf differ diff --git a/GB_Doc/GB 24545-2019 车辆车速限制系统技术要求及试验方法.pdf b/GB_Doc/GB 24545-2019 车辆车速限制系统技术要求及试验方法.pdf new file mode 100644 index 0000000..1d80f82 Binary files /dev/null and b/GB_Doc/GB 24545-2019 车辆车速限制系统技术要求及试验方法.pdf differ diff --git a/GB_Doc/GB 24550-2024 汽车对行人的碰撞保护.pdf b/GB_Doc/GB 24550-2024 汽车对行人的碰撞保护.pdf new file mode 100644 index 0000000..da56814 Binary files /dev/null and b/GB_Doc/GB 24550-2024 汽车对行人的碰撞保护.pdf differ diff --git a/GB_Doc/GB 24727-2009 非公路旅游观光车安全使用规范.pdf b/GB_Doc/GB 24727-2009 非公路旅游观光车安全使用规范.pdf new file mode 100644 index 0000000..a6f483e Binary files /dev/null and b/GB_Doc/GB 24727-2009 非公路旅游观光车安全使用规范.pdf differ diff --git a/GB_Doc/GB 24929-2010 全地形车加速行驶噪声限值及测量方法.pdf b/GB_Doc/GB 24929-2010 全地形车加速行驶噪声限值及测量方法.pdf new file mode 100644 index 0000000..dad63b3 Binary files /dev/null and b/GB_Doc/GB 24929-2010 全地形车加速行驶噪声限值及测量方法.pdf differ diff --git a/GB_Doc/GB 24943-2010 三轮汽车和低速货车用安全标志.pdf b/GB_Doc/GB 24943-2010 三轮汽车和低速货车用安全标志.pdf new file mode 100644 index 0000000..40a23f8 Binary files /dev/null and b/GB_Doc/GB 24943-2010 三轮汽车和低速货车用安全标志.pdf differ diff --git a/GB_Doc/GB 25527-2010 矿用混装炸药车 安全要求.pdf b/GB_Doc/GB 25527-2010 矿用混装炸药车 安全要求.pdf new file mode 100644 index 0000000..5c480a1 Binary files /dev/null and b/GB_Doc/GB 25527-2010 矿用混装炸药车 安全要求.pdf differ diff --git a/GB_Doc/GB 26134-2024 乘用车顶部抗压强度.pdf b/GB_Doc/GB 26134-2024 乘用车顶部抗压强度.pdf new file mode 100644 index 0000000..7e663ae Binary files /dev/null and b/GB_Doc/GB 26134-2024 乘用车顶部抗压强度.pdf differ diff --git a/GB_Doc/GB 26149-2017 乘用车轮胎气压监测系统的性能要求和试验方法.pdf b/GB_Doc/GB 26149-2017 乘用车轮胎气压监测系统的性能要求和试验方法.pdf new file mode 100644 index 0000000..bc51046 Binary files /dev/null and b/GB_Doc/GB 26149-2017 乘用车轮胎气压监测系统的性能要求和试验方法.pdf differ diff --git a/GB_Doc/GB 26512-2021 商用车驾驶室乘员保护.pdf b/GB_Doc/GB 26512-2021 商用车驾驶室乘员保护.pdf new file mode 100644 index 0000000..06f6d05 Binary files /dev/null and b/GB_Doc/GB 26512-2021 商用车驾驶室乘员保护.pdf differ diff --git a/GB_Doc/GB 27695-2011 汽车举升机安全规程.pdf b/GB_Doc/GB 27695-2011 汽车举升机安全规程.pdf new file mode 100644 index 0000000..b240dcf Binary files /dev/null and b/GB_Doc/GB 27695-2011 汽车举升机安全规程.pdf differ diff --git a/GB_Doc/GB 27887-2024 机动车儿童乘员用约束系统.pdf b/GB_Doc/GB 27887-2024 机动车儿童乘员用约束系统.pdf new file mode 100644 index 0000000..a34eee9 Binary files /dev/null and b/GB_Doc/GB 27887-2024 机动车儿童乘员用约束系统.pdf differ diff --git a/GB_Doc/GB 29743.1-2022 机动车冷却液 第1部分:燃油汽车发动机冷却液.pdf b/GB_Doc/GB 29743.1-2022 机动车冷却液 第1部分:燃油汽车发动机冷却液.pdf new file mode 100644 index 0000000..40af3cc Binary files /dev/null and b/GB_Doc/GB 29743.1-2022 机动车冷却液 第1部分:燃油汽车发动机冷却液.pdf differ diff --git a/GB_Doc/GB 29753-2023 道路运输 易腐食品与生物制品 冷藏车安全要求及试验方法.pdf b/GB_Doc/GB 29753-2023 道路运输 易腐食品与生物制品 冷藏车安全要求及试验方法.pdf new file mode 100644 index 0000000..f2a6083 Binary files /dev/null and b/GB_Doc/GB 29753-2023 道路运输 易腐食品与生物制品 冷藏车安全要求及试验方法.pdf differ diff --git a/GB_Doc/GB 30005-2013 独轮车安全要求.pdf b/GB_Doc/GB 30005-2013 独轮车安全要求.pdf new file mode 100644 index 0000000..bdf7146 Binary files /dev/null and b/GB_Doc/GB 30005-2013 独轮车安全要求.pdf differ diff --git a/GB_Doc/GB 30510-2024 重型商用车辆燃料消耗量限值.pdf b/GB_Doc/GB 30510-2024 重型商用车辆燃料消耗量限值.pdf new file mode 100644 index 0000000..c3f5f73 Binary files /dev/null and b/GB_Doc/GB 30510-2024 重型商用车辆燃料消耗量限值.pdf differ diff --git a/GB_Doc/GB 30678-2014 客车用安全标志和信息符号.pdf b/GB_Doc/GB 30678-2014 客车用安全标志和信息符号.pdf new file mode 100644 index 0000000..306152e Binary files /dev/null and b/GB_Doc/GB 30678-2014 客车用安全标志和信息符号.pdf differ diff --git a/GB_Doc/GB 32087-2015 轻型汽车牵引装置.pdf b/GB_Doc/GB 32087-2015 轻型汽车牵引装置.pdf new file mode 100644 index 0000000..14da9eb Binary files /dev/null and b/GB_Doc/GB 32087-2015 轻型汽车牵引装置.pdf differ diff --git a/GB_Doc/GB 32157-2015 消防车用功率输出装置.pdf b/GB_Doc/GB 32157-2015 消防车用功率输出装置.pdf new file mode 100644 index 0000000..a2322f9 Binary files /dev/null and b/GB_Doc/GB 32157-2015 消防车用功率输出装置.pdf differ diff --git a/GB_Doc/GB 34655-2017 客车灭火装备配置要求.pdf b/GB_Doc/GB 34655-2017 客车灭火装备配置要求.pdf new file mode 100644 index 0000000..1f4895f Binary files /dev/null and b/GB_Doc/GB 34655-2017 客车灭火装备配置要求.pdf differ diff --git a/GB_Doc/GB 34659-2017 汽车和挂车防飞溅系统性能要求和测量方法.pdf b/GB_Doc/GB 34659-2017 汽车和挂车防飞溅系统性能要求和测量方法.pdf new file mode 100644 index 0000000..ba8adeb Binary files /dev/null and b/GB_Doc/GB 34659-2017 汽车和挂车防飞溅系统性能要求和测量方法.pdf differ diff --git a/GB_Doc/GB 34660-2017 道路车辆 电磁兼容性要求和试验方法.pdf b/GB_Doc/GB 34660-2017 道路车辆 电磁兼容性要求和试验方法.pdf new file mode 100644 index 0000000..5b056b2 Binary files /dev/null and b/GB_Doc/GB 34660-2017 道路车辆 电磁兼容性要求和试验方法.pdf differ diff --git a/GB_Doc/GB 34660-2026 道路车辆 电磁兼容性要求和试验方法.pdf b/GB_Doc/GB 34660-2026 道路车辆 电磁兼容性要求和试验方法.pdf new file mode 100644 index 0000000..25cf2d8 Binary files /dev/null and b/GB_Doc/GB 34660-2026 道路车辆 电磁兼容性要求和试验方法.pdf differ diff --git a/GB_Doc/GB 34668-2024 电动平衡车安全技术规范.pdf b/GB_Doc/GB 34668-2024 电动平衡车安全技术规范.pdf new file mode 100644 index 0000000..1ae1deb Binary files /dev/null and b/GB_Doc/GB 34668-2024 电动平衡车安全技术规范.pdf differ diff --git a/GB_Doc/GB 35793-2018 车用乙醇汽油E85.pdf b/GB_Doc/GB 35793-2018 车用乙醇汽油E85.pdf new file mode 100644 index 0000000..24de71d Binary files /dev/null and b/GB_Doc/GB 35793-2018 车用乙醇汽油E85.pdf differ diff --git a/GB_Doc/GB 36220-2018 运油车辆和加油车辆安全技术条件.pdf b/GB_Doc/GB 36220-2018 运油车辆和加油车辆安全技术条件.pdf new file mode 100644 index 0000000..00d9b72 Binary files /dev/null and b/GB_Doc/GB 36220-2018 运油车辆和加油车辆安全技术条件.pdf differ diff --git a/GB_Doc/GB 36581-2018 汽车车轮安全性能要求及试验方法.pdf b/GB_Doc/GB 36581-2018 汽车车轮安全性能要求及试验方法.pdf new file mode 100644 index 0000000..f54f814 Binary files /dev/null and b/GB_Doc/GB 36581-2018 汽车车轮安全性能要求及试验方法.pdf differ diff --git a/GB_Doc/GB 38031-2020 电动汽车用动力蓄电池安全要求.pdf b/GB_Doc/GB 38031-2020 电动汽车用动力蓄电池安全要求.pdf new file mode 100644 index 0000000..5c29e74 Binary files /dev/null and b/GB_Doc/GB 38031-2020 电动汽车用动力蓄电池安全要求.pdf differ diff --git a/GB_Doc/GB 38032-2020 电动客车安全要求.pdf b/GB_Doc/GB 38032-2020 电动客车安全要求.pdf new file mode 100644 index 0000000..f188ff6 Binary files /dev/null and b/GB_Doc/GB 38032-2020 电动客车安全要求.pdf differ diff --git a/GB_Doc/GB 38262-2019 客车内饰材料的燃烧特性.pdf b/GB_Doc/GB 38262-2019 客车内饰材料的燃烧特性.pdf new file mode 100644 index 0000000..4334f78 Binary files /dev/null and b/GB_Doc/GB 38262-2019 客车内饰材料的燃烧特性.pdf differ diff --git a/GB_Doc/GB 38900-2020 机动车安全技术检验项目和方法.pdf b/GB_Doc/GB 38900-2020 机动车安全技术检验项目和方法.pdf new file mode 100644 index 0000000..7e5af13 Binary files /dev/null and b/GB_Doc/GB 38900-2020 机动车安全技术检验项目和方法.pdf differ diff --git a/GB_Doc/GB 39732-2020 汽车事件数据记录系统.pdf b/GB_Doc/GB 39732-2020 汽车事件数据记录系统.pdf new file mode 100644 index 0000000..ea204a6 Binary files /dev/null and b/GB_Doc/GB 39732-2020 汽车事件数据记录系统.pdf differ diff --git a/GB_Doc/GB 39752-2024 电动汽车供电设备安全要求.pdf b/GB_Doc/GB 39752-2024 电动汽车供电设备安全要求.pdf new file mode 100644 index 0000000..c65aa2d Binary files /dev/null and b/GB_Doc/GB 39752-2024 电动汽车供电设备安全要求.pdf differ diff --git a/GB_Doc/GB 39800.9-2024 个体防护装备配备规范 第9部分:汽车.pdf b/GB_Doc/GB 39800.9-2024 个体防护装备配备规范 第9部分:汽车.pdf new file mode 100644 index 0000000..c261f66 Binary files /dev/null and b/GB_Doc/GB 39800.9-2024 个体防护装备配备规范 第9部分:汽车.pdf differ diff --git a/GB_Doc/GB 40164-2021 汽车和挂车 制动器用零部件技术要求及试验方法.pdf b/GB_Doc/GB 40164-2021 汽车和挂车 制动器用零部件技术要求及试验方法.pdf new file mode 100644 index 0000000..943741d Binary files /dev/null and b/GB_Doc/GB 40164-2021 汽车和挂车 制动器用零部件技术要求及试验方法.pdf differ diff --git a/GB_Doc/GB 40559-2024 电动平衡车、滑板车用锂离子电池和电池组 安全技术规范.pdf b/GB_Doc/GB 40559-2024 电动平衡车、滑板车用锂离子电池和电池组 安全技术规范.pdf new file mode 100644 index 0000000..69053d7 Binary files /dev/null and b/GB_Doc/GB 40559-2024 电动平衡车、滑板车用锂离子电池和电池组 安全技术规范.pdf differ diff --git a/GB_Doc/GB 4094-2016 汽车操纵件、指示器及信号装置的标志.pdf b/GB_Doc/GB 4094-2016 汽车操纵件、指示器及信号装置的标志.pdf new file mode 100644 index 0000000..87521b6 Binary files /dev/null and b/GB_Doc/GB 4094-2016 汽车操纵件、指示器及信号装置的标志.pdf differ diff --git a/GB_Doc/GB 42295-2022 电动自行车电气安全要求.pdf b/GB_Doc/GB 42295-2022 电动自行车电气安全要求.pdf new file mode 100644 index 0000000..fb94b01 Binary files /dev/null and b/GB_Doc/GB 42295-2022 电动自行车电气安全要求.pdf differ diff --git a/GB_Doc/GB 42296-2022 电动自行车用充电器安全技术要求.pdf b/GB_Doc/GB 42296-2022 电动自行车用充电器安全技术要求.pdf new file mode 100644 index 0000000..3e01417 Binary files /dev/null and b/GB_Doc/GB 42296-2022 电动自行车用充电器安全技术要求.pdf differ diff --git a/GB_Doc/GB 43068-2023 煤矿用跑车防护装置安全技术要求.pdf b/GB_Doc/GB 43068-2023 煤矿用跑车防护装置安全技术要求.pdf new file mode 100644 index 0000000..dd1a362 Binary files /dev/null and b/GB_Doc/GB 43068-2023 煤矿用跑车防护装置安全技术要求.pdf differ diff --git a/GB_Doc/GB 43854-2024 电动自行车用锂离子蓄电池安全技术规范.pdf b/GB_Doc/GB 43854-2024 电动自行车用锂离子蓄电池安全技术规范.pdf new file mode 100644 index 0000000..9ce898e Binary files /dev/null and b/GB_Doc/GB 43854-2024 电动自行车用锂离子蓄电池安全技术规范.pdf differ diff --git a/GB_Doc/GB 44263-2024 电动汽车传导充电系统安全要求.pdf b/GB_Doc/GB 44263-2024 电动汽车传导充电系统安全要求.pdf new file mode 100644 index 0000000..4ffdf9b Binary files /dev/null and b/GB_Doc/GB 44263-2024 电动汽车传导充电系统安全要求.pdf differ diff --git a/GB_Doc/GB 44495-2024 汽车整车信息安全技术要求.pdf b/GB_Doc/GB 44495-2024 汽车整车信息安全技术要求.pdf new file mode 100644 index 0000000..f34842e Binary files /dev/null and b/GB_Doc/GB 44495-2024 汽车整车信息安全技术要求.pdf differ diff --git a/GB_Doc/GB 44496-2024 汽车软件升级通用技术要求.pdf b/GB_Doc/GB 44496-2024 汽车软件升级通用技术要求.pdf new file mode 100644 index 0000000..c0e0d72 Binary files /dev/null and b/GB_Doc/GB 44496-2024 汽车软件升级通用技术要求.pdf differ diff --git a/GB_Doc/GB 44497-2024 智能网联汽车 自动驾驶数据记录系统.pdf b/GB_Doc/GB 44497-2024 智能网联汽车 自动驾驶数据记录系统.pdf new file mode 100644 index 0000000..65741ed Binary files /dev/null and b/GB_Doc/GB 44497-2024 智能网联汽车 自动驾驶数据记录系统.pdf differ diff --git a/GB_Doc/GB 44503-2024 警车车徽.pdf b/GB_Doc/GB 44503-2024 警车车徽.pdf new file mode 100644 index 0000000..b25bdc1 Binary files /dev/null and b/GB_Doc/GB 44503-2024 警车车徽.pdf differ diff --git a/GB_Doc/GB 4599-2024 汽车道路照明装置及系统.pdf b/GB_Doc/GB 4599-2024 汽车道路照明装置及系统.pdf new file mode 100644 index 0000000..7303d91 Binary files /dev/null and b/GB_Doc/GB 4599-2024 汽车道路照明装置及系统.pdf differ diff --git a/GB_Doc/GB 4785-2019 汽车及挂车外部照明和光信号装置的安装规定.pdf b/GB_Doc/GB 4785-2019 汽车及挂车外部照明和光信号装置的安装规定.pdf new file mode 100644 index 0000000..727633e Binary files /dev/null and b/GB_Doc/GB 4785-2019 汽车及挂车外部照明和光信号装置的安装规定.pdf differ diff --git a/GB_Doc/GB 48001-2026 汽车车门把手安全技术要求.pdf b/GB_Doc/GB 48001-2026 汽车车门把手安全技术要求.pdf new file mode 100644 index 0000000..803f47d Binary files /dev/null and b/GB_Doc/GB 48001-2026 汽车车门把手安全技术要求.pdf differ diff --git a/GB_Doc/GB 48005-2026 轨道车辆用玻璃安全技术要求.pdf b/GB_Doc/GB 48005-2026 轨道车辆用玻璃安全技术要求.pdf new file mode 100644 index 0000000..8198e05 Binary files /dev/null and b/GB_Doc/GB 48005-2026 轨道车辆用玻璃安全技术要求.pdf differ diff --git a/GB_Doc/GB 518-2020 摩托车轮胎.pdf b/GB_Doc/GB 518-2020 摩托车轮胎.pdf new file mode 100644 index 0000000..282f8a1 Binary files /dev/null and b/GB_Doc/GB 518-2020 摩托车轮胎.pdf differ diff --git a/GB_Doc/GB 5763-2018 汽车用制动器衬片.pdf b/GB_Doc/GB 5763-2018 汽车用制动器衬片.pdf new file mode 100644 index 0000000..bf06b26 Binary files /dev/null and b/GB_Doc/GB 5763-2018 汽车用制动器衬片.pdf differ diff --git a/GB_Doc/GB 5768.7-2018 道路交通标志和标线 第7部分:非机动车和行人.pdf b/GB_Doc/GB 5768.7-2018 道路交通标志和标线 第7部分:非机动车和行人.pdf new file mode 100644 index 0000000..2d4263a Binary files /dev/null and b/GB_Doc/GB 5768.7-2018 道路交通标志和标线 第7部分:非机动车和行人.pdf differ diff --git a/GB_Doc/GB 5920-2024 汽车和挂车光信号装置及系统.pdf b/GB_Doc/GB 5920-2024 汽车和挂车光信号装置及系统.pdf new file mode 100644 index 0000000..39fc1ff Binary files /dev/null and b/GB_Doc/GB 5920-2024 汽车和挂车光信号装置及系统.pdf differ diff --git a/GB_Doc/GB 6675.12-2014 玩具安全 第12部分:玩具滑板车.pdf b/GB_Doc/GB 6675.12-2014 玩具安全 第12部分:玩具滑板车.pdf new file mode 100644 index 0000000..ee749ef Binary files /dev/null and b/GB_Doc/GB 6675.12-2014 玩具安全 第12部分:玩具滑板车.pdf differ diff --git a/GB_Doc/GB 7258-2017 机动车运行安全技术条件.pdf b/GB_Doc/GB 7258-2017 机动车运行安全技术条件.pdf new file mode 100644 index 0000000..a75972f Binary files /dev/null and b/GB_Doc/GB 7258-2017 机动车运行安全技术条件.pdf differ diff --git a/GB_Doc/GB 7956.1-2014 消防车 第1部分:通用技术条件.pdf b/GB_Doc/GB 7956.1-2014 消防车 第1部分:通用技术条件.pdf new file mode 100644 index 0000000..364b7a4 Binary files /dev/null and b/GB_Doc/GB 7956.1-2014 消防车 第1部分:通用技术条件.pdf differ diff --git a/GB_Doc/GB 7956.12-2015 消防车 第12部分:举高消防车.pdf b/GB_Doc/GB 7956.12-2015 消防车 第12部分:举高消防车.pdf new file mode 100644 index 0000000..68a5809 Binary files /dev/null and b/GB_Doc/GB 7956.12-2015 消防车 第12部分:举高消防车.pdf differ diff --git a/GB_Doc/GB 7956.14-2015 消防车 第14部分:抢险救援消防车.pdf b/GB_Doc/GB 7956.14-2015 消防车 第14部分:抢险救援消防车.pdf new file mode 100644 index 0000000..1dcd318 Binary files /dev/null and b/GB_Doc/GB 7956.14-2015 消防车 第14部分:抢险救援消防车.pdf differ diff --git a/GB_Doc/GB 7956.16-2019 消防车 第16部分:照明消防车.pdf b/GB_Doc/GB 7956.16-2019 消防车 第16部分:照明消防车.pdf new file mode 100644 index 0000000..bd2ab66 Binary files /dev/null and b/GB_Doc/GB 7956.16-2019 消防车 第16部分:照明消防车.pdf differ diff --git a/GB_Doc/GB 7956.17-2019 消防车 第17部分:排烟消防车.pdf b/GB_Doc/GB 7956.17-2019 消防车 第17部分:排烟消防车.pdf new file mode 100644 index 0000000..308af5b Binary files /dev/null and b/GB_Doc/GB 7956.17-2019 消防车 第17部分:排烟消防车.pdf differ diff --git a/GB_Doc/GB 7956.19-2026 消防车 第19部分:侦检消防车.pdf b/GB_Doc/GB 7956.19-2026 消防车 第19部分:侦检消防车.pdf new file mode 100644 index 0000000..ba62b6c Binary files /dev/null and b/GB_Doc/GB 7956.19-2026 消防车 第19部分:侦检消防车.pdf differ diff --git a/GB_Doc/GB 7956.2-2014 消防车 第2部分:水罐消防车.pdf b/GB_Doc/GB 7956.2-2014 消防车 第2部分:水罐消防车.pdf new file mode 100644 index 0000000..06aadf7 Binary files /dev/null and b/GB_Doc/GB 7956.2-2014 消防车 第2部分:水罐消防车.pdf differ diff --git a/GB_Doc/GB 7956.20-2026 消防车 第20部分:特种底盘消防车.pdf b/GB_Doc/GB 7956.20-2026 消防车 第20部分:特种底盘消防车.pdf new file mode 100644 index 0000000..695765c Binary files /dev/null and b/GB_Doc/GB 7956.20-2026 消防车 第20部分:特种底盘消防车.pdf differ diff --git a/GB_Doc/GB 7956.23-2019 消防车 第23部分:供气消防车.pdf b/GB_Doc/GB 7956.23-2019 消防车 第23部分:供气消防车.pdf new file mode 100644 index 0000000..da20e97 Binary files /dev/null and b/GB_Doc/GB 7956.23-2019 消防车 第23部分:供气消防车.pdf differ diff --git a/GB_Doc/GB 7956.3-2014 消防车 第3部分:泡沫消防车.pdf b/GB_Doc/GB 7956.3-2014 消防车 第3部分:泡沫消防车.pdf new file mode 100644 index 0000000..adc46bf Binary files /dev/null and b/GB_Doc/GB 7956.3-2014 消防车 第3部分:泡沫消防车.pdf differ diff --git a/GB_Doc/GB 7956.4-2019 消防车 第4部分:干粉消防车.pdf b/GB_Doc/GB 7956.4-2019 消防车 第4部分:干粉消防车.pdf new file mode 100644 index 0000000..f237aab Binary files /dev/null and b/GB_Doc/GB 7956.4-2019 消防车 第4部分:干粉消防车.pdf differ diff --git a/GB_Doc/GB 7956.5-2019 消防车 第5部分:气体消防车.pdf b/GB_Doc/GB 7956.5-2019 消防车 第5部分:气体消防车.pdf new file mode 100644 index 0000000..959dc7e Binary files /dev/null and b/GB_Doc/GB 7956.5-2019 消防车 第5部分:气体消防车.pdf differ diff --git a/GB_Doc/GB 7956.6-2015 消防车 第6部分:压缩空气泡沫消防车.pdf b/GB_Doc/GB 7956.6-2015 消防车 第6部分:压缩空气泡沫消防车.pdf new file mode 100644 index 0000000..b2047ed Binary files /dev/null and b/GB_Doc/GB 7956.6-2015 消防车 第6部分:压缩空气泡沫消防车.pdf differ diff --git a/GB_Doc/GB 7956.7-2019 消防车 第7部分:泵浦消防车.pdf b/GB_Doc/GB 7956.7-2019 消防车 第7部分:泵浦消防车.pdf new file mode 100644 index 0000000..d696681 Binary files /dev/null and b/GB_Doc/GB 7956.7-2019 消防车 第7部分:泵浦消防车.pdf differ diff --git a/GB_Doc/GB 8108-2014 车用电子警报器.pdf b/GB_Doc/GB 8108-2014 车用电子警报器.pdf new file mode 100644 index 0000000..65e1dc8 Binary files /dev/null and b/GB_Doc/GB 8108-2014 车用电子警报器.pdf differ diff --git a/GB_Doc/GB 8109-2023 推车式灭火器.pdf b/GB_Doc/GB 8109-2023 推车式灭火器.pdf new file mode 100644 index 0000000..73bfc34 Binary files /dev/null and b/GB_Doc/GB 8109-2023 推车式灭火器.pdf differ diff --git a/GB_Doc/GB 811-2022 摩托车、电动自行车乘员头盔.pdf b/GB_Doc/GB 811-2022 摩托车、电动自行车乘员头盔.pdf new file mode 100644 index 0000000..d9ab3f2 Binary files /dev/null and b/GB_Doc/GB 811-2022 摩托车、电动自行车乘员头盔.pdf differ diff --git a/GB_Doc/GB 8176-2012 冲压车间安全生产通则.pdf b/GB_Doc/GB 8176-2012 冲压车间安全生产通则.pdf new file mode 100644 index 0000000..1b3df1a Binary files /dev/null and b/GB_Doc/GB 8176-2012 冲压车间安全生产通则.pdf differ diff --git a/GB_Doc/GB 9656-2021 机动车玻璃安全技术规范.pdf b/GB_Doc/GB 9656-2021 机动车玻璃安全技术规范.pdf new file mode 100644 index 0000000..5b8298b Binary files /dev/null and b/GB_Doc/GB 9656-2021 机动车玻璃安全技术规范.pdf differ diff --git a/GB_Doc/GB 9743-2024 轿车轮胎.pdf b/GB_Doc/GB 9743-2024 轿车轮胎.pdf new file mode 100644 index 0000000..5631caa Binary files /dev/null and b/GB_Doc/GB 9743-2024 轿车轮胎.pdf differ diff --git a/GB_Doc/GB 9744-2024 载重汽车轮胎.pdf b/GB_Doc/GB 9744-2024 载重汽车轮胎.pdf new file mode 100644 index 0000000..0995378 Binary files /dev/null and b/GB_Doc/GB 9744-2024 载重汽车轮胎.pdf differ diff --git a/GB_Doc/GB+34660-2026.pdf b/GB_Doc/GB+34660-2026.pdf new file mode 100644 index 0000000..25cf2d8 Binary files /dev/null and b/GB_Doc/GB+34660-2026.pdf differ diff --git a/GB_Doc/GB+48001-2026.pdf b/GB_Doc/GB+48001-2026.pdf new file mode 100644 index 0000000..803f47d Binary files /dev/null and b/GB_Doc/GB+48001-2026.pdf differ diff --git a/GB_Doc/国家标准清单_车_20260416_160424.xlsx b/GB_Doc/国家标准清单_车_20260416_160424.xlsx new file mode 100644 index 0000000..cf8ec77 Binary files /dev/null and b/GB_Doc/国家标准清单_车_20260416_160424.xlsx differ diff --git a/catarc_iso_scraper.py b/catarc_iso_scraper.py new file mode 100644 index 0000000..85dedb5 --- /dev/null +++ b/catarc_iso_scraper.py @@ -0,0 +1,557 @@ +""" +全国汽车标准化技术委员会 - ISO发布标准数据采集脚本 +数据来源: https://www.catarc.org.cn/gjbzh/isoiec/iso/gzdt/fbbz/index.html +方式: 解析服务端渲染的 HTML 页面 (非 API) + +功能: + 1. 自动发现并采集所有子页面中的 ISO 标准表格数据 + 2. 支持不同时期的表格列结构差异 (新旧格式自适应) + 3. 采集: 序号、所属机构、文件号、英文名称、中文名称、代替标准、所属分技术委员会 + 4. 自动补充: 发布批次、发布日期、来源页面 + 5. 支持断点续采 + 6. 导出为格式化的 Excel 文件 (含统计 Sheet) + +用法: + python catarc_iso_scraper.py # 全量采集 + python catarc_iso_scraper.py --resume # 断点续采 + python catarc_iso_scraper.py --output result # 自定义输出文件名 + python catarc_iso_scraper.py --delay 1.0 # 自定义请求间隔(秒) +""" + +import sys +import io +import os +import re +import json +import time +import argparse +from datetime import datetime +from html.parser import HTMLParser +from urllib.parse import urljoin + +import requests +from openpyxl import Workbook +from openpyxl.styles import Font, Alignment, PatternFill, Border, Side +from openpyxl.utils import get_column_letter + +# ─── Windows 控制台中文输出修复 ───────────────────────── +if sys.platform == "win32": + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace") + +# ─── 配置 ─────────────────────────────────────────────── +BASE_URL = "https://www.catarc.org.cn" +LIST_PAGE_PATTERN = "/gjbzh/isoiec/iso/gzdt/fbbz/index{page}.html" +CACHE_FILE = ".catarc_iso_cache.json" +MAX_RETRIES = 3 +DEFAULT_DELAY = 0.5 # 秒 +REQUEST_TIMEOUT = 30 + +HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", +} + +# 已知的表头关键词 → 标准字段名映射 +HEADER_KEYWORDS = { + "序号": "序号", + "所属机构": "所属机构", + "文件号": "文件号", + "标准号": "文件号", + "英文名称": "英文名称", + "中文名称": "中文名称", + "所属分技术委员会": "所属分技术委员会", + "分技术委员会": "所属分技术委员会", + "代替": "代替标准", + "代替标准": "代替标准", +} + +# Excel 列定义 (输出顺序) +OUTPUT_COLUMNS = [ + ("序号", 8), + ("所属机构", 28), + ("文件号", 25), + ("英文名称", 60), + ("中文名称", 50), + ("所属分技术委员会", 18), + ("代替标准", 22), + ("发布批次", 22), + ("发布日期", 14), + ("来源页面", 55), +] + + +# ─── HTML 解析器 ───────────────────────────────────────── +class TableParser(HTMLParser): + """从 HTML 中提取 数据, 支持跨 /', html, re.S) + if len(rows) < 2: + return [] + + # 解析表头 + header_cells = re.findall(r']*>(.*?)', rows[0], re.S) + headers = normalize_headers([re.sub(r'<[^>]+>', ' ', c).strip() for c in header_cells]) + + for row in rows[1:]: + cells = re.findall(r']*>(.*?)', row, re.S) + if not cells or len(cells) < 2: + continue + cells_clean = [re.sub(r'<[^>]+>', ' ', c).strip() for c in cells] + + record = {} + for i, val in enumerate(cells_clean): + if i < len(headers): + record[headers[i]] = val + record["发布批次"] = title + record["发布日期"] = date + records.append(record) + + return records + + +# ─── 缓存管理 ─────────────────────────────────────────── +def load_cache(): + if os.path.exists(CACHE_FILE): + try: + with open(CACHE_FILE, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, IOError): + pass + return {"records": [], "done_urls": [], "total_pages": 0} + + +def save_cache(cache): + with open(CACHE_FILE, "w", encoding="utf-8") as f: + json.dump(cache, f, ensure_ascii=False) + + +# ─── Excel 导出 ───────────────────────────────────────── +def export_to_excel(records, output_path): + wb = Workbook() + ws = wb.active + ws.title = "ISO发布标准" + + # ── 标题行 ── + header_font = Font(name="微软雅黑", bold=True, color="FFFFFF", size=11) + header_fill = PatternFill(start_color="2F5496", end_color="2F5496", fill_type="solid") + header_align = Alignment(horizontal="center", vertical="center", wrap_text=True) + thin_border = Border( + left=Side(style="thin", color="B4C6E7"), + right=Side(style="thin", color="B4C6E7"), + top=Side(style="thin", color="B4C6E7"), + bottom=Side(style="thin", color="B4C6E7"), + ) + + col_names = [col[0] for col in OUTPUT_COLUMNS] + for col_idx, name in enumerate(col_names, 1): + cell = ws.cell(row=1, column=col_idx, value=name) + cell.font = header_font + cell.fill = header_fill + cell.alignment = header_align + cell.border = thin_border + + # ── 数据行 ── + data_font = Font(name="微软雅黑", size=10) + data_align = Alignment(vertical="center", wrap_text=True) + even_fill = PatternFill(start_color="D6E4F0", end_color="D6E4F0", fill_type="solid") + + for row_idx, record in enumerate(records, 2): + for col_idx, (col_name, _) in enumerate(OUTPUT_COLUMNS, 1): + val = record.get(col_name, "") + if col_name == "序号": + try: + val = int(val) + except (ValueError, TypeError): + pass + cell = ws.cell(row=row_idx, column=col_idx, value=val) + cell.font = data_font + cell.alignment = data_align + cell.border = thin_border + if row_idx % 2 == 0: + cell.fill = even_fill + + # ── 列宽 ── + for col_idx, (name, width) in enumerate(OUTPUT_COLUMNS, 1): + ws.column_dimensions[get_column_letter(col_idx)].width = width + + # ── 冻结 & 筛选 ── + ws.freeze_panes = "A2" + ws.auto_filter.ref = f"A1:{get_column_letter(len(col_names))}{len(records) + 1}" + + # ── 统计信息 Sheet ── + ws_stat = wb.create_sheet("统计信息") + + # 所属机构分布 + org_count = {} + batch_count = {} + year_count = {} + tc_count = {} + + for r in records: + org = r.get("所属机构", "未知") or "未知" + org_count[org] = org_count.get(org, 0) + 1 + + batch = r.get("发布批次", "未知") or "未知" + batch_count[batch] = batch_count.get(batch, 0) + 1 + + pub_date = r.get("发布日期", "") or "" + year = pub_date[:4] if pub_date else "未知" + year_count[year] = year_count.get(year, 0) + 1 + + tc = r.get("所属分技术委员会", "") or "" + if tc: + tc_count[tc] = tc_count.get(tc, 0) + 1 + + stat_rows = [ + ("采集时间", datetime.now().strftime("%Y-%m-%d %H:%M:%S")), + ("数据来源", "全国汽车标准化技术委员会 — ISO发布标准"), + ("来源网址", "https://www.catarc.org.cn/gjbzh/isoiec/iso/gzdt/fbbz/index.html"), + ("标准总数", len(records)), + ("子页面数", len(batch_count)), + ("", ""), + ("── 所属机构分布 (Top 30) ──", ""), + ("所属机构", "标准数量"), + ] + for org, cnt in sorted(org_count.items(), key=lambda x: -x[1])[:30]: + stat_rows.append((org, cnt)) + + stat_rows.append(("", "")) + stat_rows.append(("── 按发布年份分布 ──", "")) + stat_rows.append(("年份", "标准数量")) + for y, cnt in sorted(year_count.items(), reverse=True): + stat_rows.append((y, cnt)) + + stat_rows.append(("", "")) + stat_rows.append(("── 按发布批次分布 ──", "")) + stat_rows.append(("批次", "标准数量")) + for b, cnt in sorted(batch_count.items(), key=lambda x: -x[1]): + stat_rows.append((b, cnt)) + + if tc_count: + stat_rows.append(("", "")) + stat_rows.append(("── 分技术委员会分布 (Top 20) ──", "")) + stat_rows.append(("分技术委员会", "标准数量")) + for tc, cnt in sorted(tc_count.items(), key=lambda x: -x[1])[:20]: + stat_rows.append((tc, cnt)) + + for row_idx, (a, b) in enumerate(stat_rows, 1): + cell_a = ws_stat.cell(row=row_idx, column=1, value=a) + cell_b = ws_stat.cell(row=row_idx, column=2, value=b) + if a.startswith("──"): + cell_a.font = Font(name="微软雅黑", bold=True, size=11) + else: + cell_a.font = Font(name="微软雅黑", size=10) + cell_b.font = Font(name="微软雅黑", size=10) + + ws_stat.column_dimensions["A"].width = 50 + ws_stat.column_dimensions["B"].width = 20 + + wb.save(output_path) + + +# ─── 主流程 ───────────────────────────────────────────── +def main(): + parser = argparse.ArgumentParser( + description="全国汽车标准化技术委员会 — ISO发布标准数据采集工具", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +示例: + python catarc_iso_scraper.py # 全量采集 + python catarc_iso_scraper.py --resume # 断点续采 + python catarc_iso_scraper.py --delay 1.0 # 每次请求间隔1秒 + python catarc_iso_scraper.py --output ISO标准 # 自定义输出文件名 + """, + ) + parser.add_argument("--resume", "-r", action="store_true", help="从上次中断处继续采集") + parser.add_argument("--delay", "-d", type=float, default=DEFAULT_DELAY, help="请求间隔秒数 (默认0.5)") + parser.add_argument("--output", "-o", default=None, help="输出文件名 (不含扩展名)") + + args = parser.parse_args() + + if args.output: + output_name = args.output + else: + output_name = f"ISO发布标准_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + output_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), f"{output_name}.xlsx") + + print("=" * 60) + print(" 全国汽车标准化技术委员会 — ISO发布标准数据采集工具") + print("=" * 60) + print(f" 数据来源: catarc.org.cn/gjbzh/isoiec/iso/gzdt/fbbz/") + print(f" 请求间隔: {args.delay}s") + print(f" 输出文件: {output_path}") + print("-" * 60) + + session = requests.Session() + all_records = [] + done_urls = set() + + # 断点续采 + if args.resume: + cache = load_cache() + if cache["records"]: + all_records = cache["records"] + done_urls = set(cache["done_urls"]) + print(f" [*] 从缓存恢复: 已有 {len(all_records)} 条, {len(done_urls)} 个子页面已完成") + + # Step 1: 发现子页面 + print(" [1/3] 发现子页面...") + subpages = discover_subpages(session) + print(f" 共发现 {len(subpages)} 个子页面") + if not subpages: + print(" [!] 未发现任何子页面, 请检查网络") + sys.exit(1) + + # Step 2: 逐个采集 + print(f" [2/3] 采集标准数据...") + start_time = time.time() + + for idx, (url, title, date) in enumerate(subpages, 1): + if url in done_urls: + continue + + records = scrape_detail_page(session, url, title, date) + all_records.extend(records) + done_urls.add(url) + + print(f" [{idx}/{len(subpages)}] {title} — 获取 {len(records)} 条标准", end="\r") + + # 保存进度 + cache = {"records": all_records, "done_urls": list(done_urls), "total_pages": len(subpages)} + save_cache(cache) + + time.sleep(args.delay) + + elapsed = time.time() - start_time + print() + print(f" 采集完成! 用时 {elapsed:.1f} 秒, 共获取 {len(all_records)} 条标准") + + if not all_records: + print(" [!] 未获取到任何数据") + sys.exit(1) + + # Step 3: 导出 Excel + print(f" [3/3] 生成 Excel 文件...") + export_to_excel(all_records, output_path) + file_size = os.path.getsize(output_path) / 1024 + + print() + print(f" {'=' * 50}") + print(f" 导出完成: {output_path}") + print(f" 文件大小: {file_size:.1f} KB") + print(f" 标准总数: {len(all_records)}") + print(f" 子页面数: {len(subpages)}") + print(f" {'=' * 50}") + + # 清理缓存 + if not args.resume and os.path.exists(CACHE_FILE): + os.remove(CACHE_FILE) + + +if __name__ == "__main__": + main() diff --git a/catarc_scraper.py b/catarc_scraper.py new file mode 100644 index 0000000..904bf72 --- /dev/null +++ b/catarc_scraper.py @@ -0,0 +1,389 @@ +""" +全国汽车标准化技术委员会 - 汽车标准数据采集脚本 +数据来源: https://www.catarc.org.cn/bzzxd/qcbz/index.html +API: POST https://www.catarc.org.cn/prod-api/api/customform/getPageList + +功能: + 1. 全量采集汽车标准数据 (标准编号、标准名称、英文名称、代替标准、发布日期、实施日期、标准状态) + 2. 支持关键词搜索 + 3. 支持断点续采 + 4. 导出为格式化的 Excel 文件 + 5. 支持增量更新 (已有数据自动跳过) + +用法: + python catarc_scraper.py # 全量采集 + python catarc_scraper.py --search "制动" # 搜索包含"制动"的标准 + python catarc_scraper.py --resume # 断点续采 + python catarc_scraper.py --page-size 100 # 自定义每页大小 + python catarc_scraper.py --output result # 自定义输出文件名 +""" + +import sys +import io +import os + +# 修复 Windows 控制台中文输出问题 +if sys.platform == "win32": + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace") + +import requests +import json +import time +import argparse +import sys +from datetime import datetime +from pathlib import Path + +# ─── 配置 ─────────────────────────────────────────────── +API_URL = "https://www.catarc.org.cn/prod-api/api/customform/getPageList" +FORM_ID = "615560029638725" +DEFAULT_PAGE_SIZE = 50 +MAX_RETRIES = 5 +RETRY_DELAY = 3 # 秒 +REQUEST_TIMEOUT = 30 # 秒 +CACHE_FILE = ".catarc_cache.json" +HEADERS = { + "Content-Type": "application/json", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "Accept": "application/json, text/plain, */*", + "Origin": "https://www.catarc.org.cn", + "Referer": "https://www.catarc.org.cn/bzzxd/qcbz/index.html", +} + +# 字段映射: API字段 → 中文显示名 +FIELD_MAP = { + "STD_CODE": "标准编号", + "C_NAME": "标准名称", + "E_NAME": "英文名称", + "REVISE_STD_CODES": "代替标准", + "ISSUE_TIME": "发布日期", + "ACT_TIME": "实施日期", + "STD_STATUS": "标准状态", +} + +# Excel 列宽 +COLUMN_WIDTHS = { + "标准编号": 20, + "标准名称": 55, + "英文名称": 70, + "代替标准": 20, + "发布日期": 14, + "实施日期": 14, + "标准状态": 12, +} + + +def timestamp_to_date(ts_ms): + """将毫秒级时间戳转换为 YYYY-MM-DD 字符串""" + if not ts_ms: + return "" + try: + return datetime.fromtimestamp(int(ts_ms) / 1000).strftime("%Y-%m-%d") + except (ValueError, OSError): + return str(ts_ms) + + +def fetch_page(session, page_num, page_size, keyword=""): + """请求单页数据""" + payload = { + "formId": FORM_ID, + "pageSize": page_size, + "pageNum": page_num, + "key": keyword, + } + for attempt in range(1, MAX_RETRIES + 1): + try: + resp = session.post(API_URL, json=payload, headers=HEADERS, timeout=REQUEST_TIMEOUT) + resp.raise_for_status() + data = resp.json() + if data.get("code") == 200: + return data["data"] + print(f" [!] API 返回错误: {data.get('msg', '未知错误')}") + return None + except requests.exceptions.RequestException as e: + print(f" [!] 第 {page_num} 页请求失败 (第 {attempt}/{MAX_RETRIES} 次): {e}") + if attempt < MAX_RETRIES: + time.sleep(RETRY_DELAY * attempt) + return None + + +def normalize_record(record): + """标准化单条记录: 转换时间戳, 清理空白""" + return { + "标准编号": (record.get("STD_CODE") or "").strip(), + "标准名称": (record.get("C_NAME") or "").strip(), + "英文名称": (record.get("E_NAME") or "").strip(), + "代替标准": (record.get("REVISE_STD_CODES") or "").strip(), + "发布日期": timestamp_to_date(record.get("ISSUE_TIME")), + "实施日期": timestamp_to_date(record.get("ACT_TIME")), + "标准状态": (record.get("STD_STATUS") or "").strip(), + } + + +def load_cache(): + """加载缓存 (用于断点续采和去重)""" + if os.path.exists(CACHE_FILE): + try: + with open(CACHE_FILE, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, IOError): + pass + return {"records": [], "last_page": 0, "keyword": "", "total": 0} + + +def save_cache(cache): + """保存缓存""" + with open(CACHE_FILE, "w", encoding="utf-8") as f: + json.dump(cache, f, ensure_ascii=False) + + +def scrape_all(page_size, keyword="", resume=False): + """采集所有数据""" + session = requests.Session() + all_records = [] + start_page = 1 + total = 0 + + # 断点续采 + if resume: + cache = load_cache() + if cache["records"] and cache["keyword"] == keyword: + all_records = cache["records"] + start_page = cache["last_page"] + 1 + total = cache["total"] + print(f" [*] 从缓存恢复: 已有 {len(all_records)} 条, 从第 {start_page} 页继续") + else: + print(" [*] 缓存不匹配, 从头开始采集") + + # 第一次请求, 获取总页数 + first_data = fetch_page(session, start_page, page_size, keyword) + if not first_data: + print(" [✗] 无法获取数据, 请检查网络连接") + return all_records + + total = first_data.get("total", 0) + total_pages = first_data.get("pages", 0) + + records = [normalize_record(r) for r in (first_data.get("list") or [])] + all_records.extend(records) + + print(f" 总计: {total} 条标准, 共 {total_pages} 页, 每页 {page_size} 条") + print(f" 已采集: {len(all_records)}/{total}", end="\r") + + # 更新缓存 + cache = { + "records": all_records, + "last_page": start_page, + "keyword": keyword, + "total": total, + } + save_cache(cache) + + # 逐页采集 + for page_num in range(start_page + 1, total_pages + 1): + data = fetch_page(session, page_num, page_size, keyword) + if data is None: + print(f"\n [!] 第 {page_num} 页采集失败, 保存进度并退出") + save_cache(cache) + break + + records = [normalize_record(r) for r in (data.get("list") or [])] + all_records.extend(records) + + # 更新缓存 + cache["records"] = all_records + cache["last_page"] = page_num + save_cache(cache) + + print(f" 已采集: {len(all_records)}/{total} (第 {page_num}/{total_pages} 页)", end="\r") + + # 礼貌延迟, 避免给服务器造成压力 + time.sleep(0.3) + + print() + return all_records + + +def export_to_excel(records, output_path): + """将记录导出为格式化的 Excel 文件""" + from openpyxl import Workbook + from openpyxl.styles import Font, Alignment, PatternFill, Border, Side + from openpyxl.utils import get_column_letter + + wb = Workbook() + ws = wb.active + ws.title = "汽车标准" + + # ── 标题行 ── + headers = list(FIELD_MAP.values()) + header_font = Font(name="微软雅黑", bold=True, color="FFFFFF", size=11) + header_fill = PatternFill(start_color="2F5496", end_color="2F5496", fill_type="solid") + header_align = Alignment(horizontal="center", vertical="center", wrap_text=True) + thin_border = Border( + left=Side(style="thin", color="B4C6E7"), + right=Side(style="thin", color="B4C6E7"), + top=Side(style="thin", color="B4C6E7"), + bottom=Side(style="thin", color="B4C6E7"), + ) + + for col_idx, header in enumerate(headers, 1): + cell = ws.cell(row=1, column=col_idx, value=header) + cell.font = header_font + cell.fill = header_fill + cell.alignment = header_align + cell.border = thin_border + + # ── 数据行 ── + data_font = Font(name="微软雅黑", size=10) + data_align = Alignment(vertical="center", wrap_text=True) + even_fill = PatternFill(start_color="D6E4F0", end_color="D6E4F0", fill_type="solid") + + for row_idx, record in enumerate(records, 2): + for col_idx, key in enumerate(FIELD_MAP.keys()): + cn_key = FIELD_MAP[key] + cell = ws.cell(row=row_idx, column=col_idx + 1, value=record.get(cn_key, "")) + cell.font = data_font + cell.alignment = data_align + cell.border = thin_border + if row_idx % 2 == 0: + cell.fill = even_fill + + # ── 列宽 ── + for col_idx, header in enumerate(headers, 1): + ws.column_dimensions[get_column_letter(col_idx)].width = COLUMN_WIDTHS.get(header, 15) + + # ── 冻结首行 & 自动筛选 ── + ws.freeze_panes = "A2" + ws.auto_filter.ref = f"A1:{get_column_letter(len(headers))}{len(records) + 1}" + + # ── 添加统计信息 Sheet ── + ws_stat = wb.create_sheet("统计信息") + status_count = {} + year_count = {} + for r in records: + s = r.get("标准状态", "未知") + status_count[s] = status_count.get(s, 0) + 1 + issue = r.get("发布日期", "") + year = issue[:4] if issue else "未知" + year_count[year] = year_count.get(year, 0) + 1 + + stat_data = [ + ["采集时间", datetime.now().strftime("%Y-%m-%d %H:%M:%S")], + ["数据来源", "全国汽车标准化技术委员会 (catarc.org.cn)"], + ["标准总数", len(records)], + [], # 空行 + ["标准状态分布"], + ["状态", "数量"], + ] + for s, c in sorted(status_count.items(), key=lambda x: -x[1]): + stat_data.append([s, c]) + + stat_data.append([]) + stat_data.append(["按发布年份分布"]) + stat_data.append(["年份", "数量"]) + for y, c in sorted(year_count.items(), reverse=True): + stat_data.append([y, c]) + + for row_idx, row in enumerate(stat_data, 1): + for col_idx, val in enumerate(row, 1): + cell = ws_stat.cell(row=row_idx, column=col_idx, value=val) + if row_idx in (5, stat_data.index([]) + 5 + len(status_count) + 2 + 1): + cell.font = Font(name="微软雅黑", bold=True, size=11) + else: + cell.font = Font(name="微软雅黑", size=10) + + ws_stat.column_dimensions["A"].width = 20 + ws_stat.column_dimensions["B"].width = 50 + + # ── 保存 ── + wb.save(output_path) + return output_path + + +def main(): + parser = argparse.ArgumentParser( + description="全国汽车标准化技术委员会 - 汽车标准数据采集工具", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +示例: + python catarc_scraper.py # 全量采集 + python catarc_scraper.py --search "制动" # 搜索"制动"相关标准 + python catarc_scraper.py --search "GB 7258" # 按标准编号搜索 + python catarc_scraper.py --resume # 断点续采 + python catarc_scraper.py --page-size 100 # 每页100条, 减少请求次数 + python catarc_scraper.py --output 我的标准 # 自定义输出文件名 + """, + ) + parser.add_argument("--search", "-s", default="", help="搜索关键词 (标准编号或标准名称)") + parser.add_argument("--resume", "-r", action="store_true", help="从上次中断处继续采集") + parser.add_argument("--page-size", "-p", type=int, default=DEFAULT_PAGE_SIZE, help="每页条数 (默认50, 最大建议100)") + parser.add_argument("--output", "-o", default=None, help="输出文件名 (不含扩展名, 默认自动生成)") + + args = parser.parse_args() + + # 输出文件名 + if args.output: + output_name = args.output + elif args.search: + output_name = f"汽车标准_搜索_{args.search}_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + else: + output_name = f"汽车标准全量_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + output_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), f"{output_name}.xlsx") + + print("=" * 60) + print(" 全国汽车标准化技术委员会 - 汽车标准数据采集工具") + print("=" * 60) + print(f" 数据来源: catarc.org.cn") + if args.search: + print(f" 搜索关键词: {args.search}") + print(f" 每页条数: {args.page_size}") + print(f" 输出文件: {output_path}") + print("-" * 60) + + # 开始采集 + start_time = time.time() + records = scrape_all(args.page_size, args.search, args.resume) + elapsed = time.time() - start_time + + if not records: + print("\n 未获取到任何数据") + sys.exit(1) + + # 去重 (按标准编号) + seen = set() + unique_records = [] + for r in records: + code = r["标准编号"] + if code not in seen: + seen.add(code) + unique_records.append(r) + dup_count = len(records) - len(unique_records) + + print("-" * 60) + print(f" 采集完成! 用时 {elapsed:.1f} 秒") + print(f" 获取 {len(records)} 条, 去重后 {len(unique_records)} 条", end="") + if dup_count: + print(f" (移除 {dup_count} 条重复)") + else: + print() + + # 导出 Excel + print(f" 正在生成 Excel 文件...") + export_to_excel(unique_records, output_path) + file_size = os.path.getsize(output_path) / 1024 + + print(f"\n {'=' * 50}") + print(f" 导出完成: {output_path}") + print(f" 文件大小: {file_size:.1f} KB") + print(f" 标准总数: {len(unique_records)}") + print(f" {'=' * 50}") + + # 清理缓存 + if not args.resume and os.path.exists(CACHE_FILE): + os.remove(CACHE_FILE) + + +if __name__ == "__main__": + main() diff --git a/openstd_downloader.py b/openstd_downloader.py new file mode 100644 index 0000000..dfbc02a --- /dev/null +++ b/openstd_downloader.py @@ -0,0 +1,438 @@ +""" +国家标准全文公开系统 — PDF 批量下载工具 +数据来源: https://openstd.samr.gov.cn/bzgk/std/std_list_type +下载地址: http://c.gb688.cn/bzgk/gb/viewGb + +功能: + 1. 按关键词搜索国家标准 (如 "车" 可匹配所有车辆相关标准) + 2. 自动识别验证码 (ddddocr) 并下载 PDF 全文 + 3. 支持筛选: 强制性/推荐性/指导性/全部, 现行/即将实施/废止 + 4. 文件命名: "标准号 标准名称.pdf" (如 "GB 34660-2026 道路车辆 电磁兼容性要求和试验方法.pdf") + 5. 断点续传: 已下载的文件自动跳过 + 6. 导出标准元数据 Excel + +用法: + python openstd_downloader.py # 下载"车"相关强制性国家标准 + python openstd_downloader.py --keyword "制动" # 搜索关键词 + python openstd_downloader.py --type 2 # 推荐性国家标准 + python openstd_downloader.py --page-size 50 # 每页50条 + python openstd_downloader.py --output-dir ./GB_Doc # 自定义下载目录 + python openstd_downloader.py --status "现行" # 只下载现行标准 + python openstd_downloader.py --no-download # 仅采集元数据, 不下载PDF + +依赖: + pip install requests ddddocr openpyxl +""" + +import sys +import io +import os +import re +import json +import time +import argparse +from datetime import datetime + +import requests +from openpyxl import Workbook +from openpyxl.styles import Font, Alignment, PatternFill, Border, Side +from openpyxl.utils import get_column_letter + +# ─── Windows 控制台中文输出修复 ───────────────────────── +if sys.platform == "win32": + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace") + +# ─── 配置 ─────────────────────────────────────────────── +LIST_URL = "https://openstd.samr.gov.cn/bzgk/std/std_list_type" +DOWNLOAD_INIT_URL = "http://c.gb688.cn/bzgk/gb/showGb?type=download&hcno={hcno}" +CAPTCHA_URL = "http://c.gb688.cn/bzgk/gb/gc?_{ts}" +VERIFY_URL = "http://c.gb688.cn/bzgk/gb/verifyCode" +PDF_URL = "http://c.gb688.cn/bzgk/gb/viewGb?hcno={hcno}" + +MAX_CAPTCHA_RETRIES = 8 +REQUEST_TIMEOUT = 30 +CACHE_FILE = ".openstd_cache.json" + +HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", +} + +# 标准类型参数 p.p1 +STD_TYPES = { + "强制性": "1", + "推荐性": "2", + "指导性": "3", + "全部": "", +} + +# 输出列定义 +OUTPUT_COLUMNS = [ + ("标准号", 20), + ("标准名称", 50), + ("标准状态", 10), + ("发布日期", 14), + ("实施日期", 14), + ("是否采标", 10), + ("hcno", 35), + ("文件名", 60), + ("下载状态", 10), +] + +# ─── 列表页解析 ───────────────────────────────────────── +def fetch_list_page(session, keyword, std_type, page_num, page_size): + """请求列表页, 返回 HTML""" + params = { + "p.p1": std_type, + "p.p2": keyword, + "p.p90": "circulation_date", + "p.p91": "desc", + } + if page_num > 1: + params["page"] = page_num + params["pageSize"] = page_size + + for attempt in range(3): + try: + resp = session.get(LIST_URL, params=params, headers=HEADERS, timeout=REQUEST_TIMEOUT) + resp.raise_for_status() + return resp.content.decode("utf-8") + except Exception as e: + print(f" [!] 列表页请求失败 (第 {attempt+1} 次): {e}") + time.sleep(2) + return None + + +def parse_list_page(html): + """解析列表页 HTML, 返回标准列表和总数""" + # 提取每个标准的 hcno (去重, 每个标准有3个 onclick) + hcnos = list(dict.fromkeys(re.findall(r"showInfo\('([A-F0-9]{32})'\)", html))) + + # 提取总数: 现行(216) 即将实施(36) 废止(408) + status_counts = re.findall(r'现行\((\d+)\).*?即将实施\((\d+)\).*?废止\((\d+)\)', html, re.S) + total = 0 + if status_counts: + total = sum(int(x) for x in status_counts[0]) + + # 提取表格数据 + rows = re.findall(r']*>(.*?)', html, re.S) + standards = [] + for row in rows: + cells = re.findall(r']*>(.*?)', row, re.S) + if len(cells) < 6: + continue + first_cell = re.sub(r'<[^>]+>', '', cells[0]).strip() + if not first_cell.isdigit(): + continue + + std_code = re.sub(r'<[^>]+>', '', cells[1]).strip() + std_name_raw = cells[3] + std_name = re.sub(r'<[^>]+>', '', std_name_raw).strip() + std_status = re.sub(r'<[^>]+>', '', cells[4]).strip() + issue_date = re.sub(r'<[^>]+>', '', cells[5]).strip() + act_date = re.sub(r'<[^>]+>', '', cells[6]).strip() if len(cells) > 6 else "" + + # 从 onclick 提取 hcno + hcno_m = re.search(r"showInfo\('([A-F0-9]{32})'\)", cells[1]) + hcno = hcno_m.group(1) if hcno_m else "" + + # 采标信息 + adopted = re.sub(r'<[^>]+>', '', cells[2]).strip() if len(cells) > 2 else "" + + standards.append({ + "标准号": std_code, + "标准名称": std_name, + "标准状态": std_status, + "发布日期": issue_date[:10] if issue_date else "", + "实施日期": act_date[:10] if act_date else "", + "是否采标": adopted, + "hcno": hcno, + }) + + return standards, total + + +def collect_all_standards(keyword, std_type, page_size, status_filter=""): + """采集所有标准列表""" + session = requests.Session() + all_standards = [] + + html = fetch_list_page(session, keyword, std_type, 1, page_size) + if not html: + return all_standards + + standards, total = parse_list_page(html) + all_standards.extend(standards) + + total_pages = (total + page_size - 1) // page_size if total > 0 else 1 + print(f" 总计: {total} 条标准, {total_pages} 页") + + for page_num in range(2, total_pages + 1): + html = fetch_list_page(session, keyword, std_type, page_num, page_size) + if not html: + break + standards, _ = parse_list_page(html) + if not standards: + break + all_standards.extend(standards) + print(f" 已采集: {len(all_standards)}/{total} (第 {page_num}/{total_pages} 页)", end="\r") + time.sleep(0.3) + + print() + + # 状态筛选 + if status_filter: + all_standards = [s for s in all_standards if status_filter in s.get("标准状态", "")] + print(f" 筛选 [{status_filter}]: {len(all_standards)} 条") + + return all_standards + + +# ─── PDF 下载 ─────────────────────────────────────────── +def download_pdf(hcno, save_path, max_retries=3): + """下载单个标准 PDF, 自动识别验证码""" + import ddddocr + ocr = ddddocr.DdddOcr(show_ad=False) + + for retry in range(max_retries): + s = requests.Session() + s.headers.update(HEADERS) + + try: + # Step 1: 访问下载页获取 session + s.get(DOWNLOAD_INIT_URL.format(hcno=hcno), timeout=REQUEST_TIMEOUT) + + # Step 2: 获取验证码 + OCR + verified = False + for captcha_attempt in range(MAX_CAPTCHA_RETRIES): + r = s.get(CAPTCHA_URL.format(ts=int(time.time() * 1000)), timeout=REQUEST_TIMEOUT) + if len(r.content) < 100: + time.sleep(1) + continue + + code = ocr.classification(r.content) + + # Step 3: 验证验证码 + vr = s.post(VERIFY_URL, data={"verifyCode": code}, timeout=REQUEST_TIMEOUT) + if vr.text.strip() == "success": + verified = True + break + + if not verified: + if retry < max_retries - 1: + print(f"验证码失败,重试({retry+1})") + continue + + # Step 4: 下载 PDF + dr = s.get(PDF_URL.format(hcno=hcno), timeout=60) + if len(dr.content) > 1000: + with open(save_path, "wb") as f: + f.write(dr.content) + return True, len(dr.content) + else: + # PDF 为空可能意味着该标准暂未提供全文, 不再重试 + return False, -1 + + except Exception as e: + print(f" [!] 下载异常: {e}, 重试 ({retry+1}/{max_retries})") + + time.sleep(2) + + return False, 0 + + +def sanitize_filename(name): + """清理文件名中的非法字符""" + return re.sub(r'[\\/:*?"<>|]', ' ', name).strip() + + +# ─── Excel 导出 ───────────────────────────────────────── +def export_to_excel(records, output_path): + wb = Workbook() + ws = wb.active + ws.title = "国家标准清单" + + hdr_font = Font(name="微软雅黑", bold=True, color="FFFFFF", size=11) + hdr_fill = PatternFill(start_color="2F5496", end_color="2F5496", fill_type="solid") + hdr_align = Alignment(horizontal="center", vertical="center", wrap_text=True) + dat_font = Font(name="微软雅黑", size=10) + dat_align = Alignment(vertical="center", wrap_text=True) + even_fill = PatternFill(start_color="D6E4F0", end_color="D6E4F0", fill_type="solid") + border = Border( + left=Side(style="thin", color="B4C6E7"), + right=Side(style="thin", color="B4C6E7"), + top=Side(style="thin", color="B4C6E7"), + bottom=Side(style="thin", color="B4C6E7"), + ) + + col_names = [c[0] for c in OUTPUT_COLUMNS] + for ci, name in enumerate(col_names, 1): + cell = ws.cell(row=1, column=ci, value=name) + cell.font = hdr_font + cell.fill = hdr_fill + cell.alignment = hdr_align + cell.border = border + + for ri, rec in enumerate(records, 2): + for ci, (col_name, _) in enumerate(OUTPUT_COLUMNS, 1): + val = rec.get(col_name, "") + cell = ws.cell(row=ri, column=ci, value=val) + cell.font = dat_font + cell.alignment = dat_align + cell.border = border + if ri % 2 == 0: + cell.fill = even_fill + + for ci, (_, w) in enumerate(OUTPUT_COLUMNS, 1): + ws.column_dimensions[get_column_letter(ci)].width = w + + ws.freeze_panes = "A2" + ws.auto_filter.ref = f"A1:{get_column_letter(len(col_names))}{len(records) + 1}" + + wb.save(output_path) + + +# ─── 缓存 ─────────────────────────────────────────────── +def load_cache(): + if os.path.exists(CACHE_FILE): + try: + with open(CACHE_FILE, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, IOError): + pass + return {"downloaded_hcnos": [], "records": []} + + +def save_cache(cache): + with open(CACHE_FILE, "w", encoding="utf-8") as f: + json.dump(cache, f, ensure_ascii=False) + + +# ─── 主流程 ───────────────────────────────────────────── +def main(): + parser = argparse.ArgumentParser( + description="国家标准全文公开系统 — PDF 批量下载工具", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +示例: + python openstd_downloader.py # 下载"车"相关强制性国家标准 + python openstd_downloader.py --keyword "制动" # 搜索关键词 + python openstd_downloader.py --type 2 # 推荐性国家标准 + python openstd_downloader.py --status "现行" # 只下载现行标准 + python openstd_downloader.py --no-download # 仅采集元数据, 不下载PDF + """, + ) + parser.add_argument("--keyword", "-k", default="车", help="搜索关键词 (默认: 车)") + parser.add_argument("--type", "-t", default="强制性", + choices=["强制性", "推荐性", "指导性", "全部"], help="标准类型 (默认: 强制性)") + parser.add_argument("--status", "-s", default="", help="状态筛选: 现行/即将实施/废止 (默认: 全部)") + parser.add_argument("--page-size", "-p", type=int, default=50, help="每页条数 (默认50)") + parser.add_argument("--output-dir", "-o", default="GB_Doc", help="PDF下载目录 (默认: GB_Doc)") + parser.add_argument("--no-download", action="store_true", help="仅采集元数据, 不下载PDF") + parser.add_argument("--max-count", "-n", type=int, default=0, help="最大下载数量 (0=全部)") + + args = parser.parse_args() + std_type = STD_TYPES.get(args.type, "1") + + os.makedirs(args.output_dir, exist_ok=True) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + excel_path = os.path.join(args.output_dir, f"国家标准清单_{args.keyword}_{timestamp}.xlsx") + + print("=" * 60) + print(" 国家标准全文公开系统 — PDF 批量下载工具") + print("=" * 60) + print(f" 关键词: {args.keyword}") + print(f" 类型: {args.type}") + print(f" 状态: {args.status or '全部'}") + print(f" 下载目录: {args.output_dir}/") + print(f" 下载PDF: {'否' if args.no_download else '是'}") + print("-" * 60) + + # Step 1: 采集标准列表 + print(" [1/2] 采集标准列表...") + standards = collect_all_standards(args.keyword, std_type, args.page_size, args.status) + + if not standards: + print(" 未找到任何标准") + sys.exit(1) + + print(f" 共 {len(standards)} 条标准") + + # 限制数量 + if args.max_count > 0: + standards = standards[:args.max_count] + print(f" 限制下载前 {args.max_count} 条") + + # Step 2: 下载 PDF + cache = load_cache() + downloaded_hcnos = set(cache.get("downloaded_hcnos", [])) + + if not args.no_download: + print(f"\n [2/2] 下载 PDF 文件...") + success_count = 0 + skip_count = 0 + fail_count = 0 + + for idx, std in enumerate(standards, 1): + hcno = std.get("hcno", "") + code = std.get("标准号", "") + name = std.get("标准名称", "") + filename = sanitize_filename(f"{code} {name}.pdf") + filepath = os.path.join(args.output_dir, filename) + + # 跳过已下载 + if hcno in downloaded_hcnos or os.path.exists(filepath): + std["下载状态"] = "已存在" + std["文件名"] = filename + skip_count += 1 + continue + + print(f" [{idx}/{len(standards)}] {code} {name[:30]}...", end=" ") + + ok, size = download_pdf(hcno, filepath) + + if ok: + std["下载状态"] = "成功" + std["文件名"] = filename + downloaded_hcnos.add(hcno) + success_count += 1 + print(f"OK ({size/1024:.0f} KB)") + else: + std["下载状态"] = "失败" + std["文件名"] = "" + fail_count += 1 + print("FAILED") + + # 保存进度 + cache["downloaded_hcnos"] = list(downloaded_hcnos) + save_cache(cache) + + # 礼貌延迟 + time.sleep(1) + + print(f"\n 下载完成: 成功 {success_count}, 跳过 {skip_count}, 失败 {fail_count}") + else: + for std in standards: + std["下载状态"] = "跳过" + std["文件名"] = "" + print("\n [2/2] 跳过下载 (--no-download)") + + # 导出 Excel + export_to_excel(standards, excel_path) + print(f"\n 元数据已导出: {excel_path}") + + # 统计 + print(f"\n {'=' * 50}") + print(f" 总计: {len(standards)} 条标准") + print(f" Excel: {excel_path}") + if not args.no_download: + print(f" PDF目录: {args.output_dir}/") + # 列出已下载文件 + pdfs = [f for f in os.listdir(args.output_dir) if f.endswith('.pdf')] + print(f" PDF文件数: {len(pdfs)}") + print(f" {'=' * 50}") + + +if __name__ == "__main__": + main() diff --git a/openstd_gb_t_downloader.py b/openstd_gb_t_downloader.py new file mode 100644 index 0000000..698cdcb --- /dev/null +++ b/openstd_gb_t_downloader.py @@ -0,0 +1,416 @@ +""" +国家标准全文公开系统 — 推荐性国家标准 PDF 批量下载工具 +数据来源: https://openstd.samr.gov.cn/bzgk/std/std_list_type (p.p1=2 推荐性国家标准) +下载地址: http://c.gb688.cn/bzgk/gb/viewGb + +功能: + 1. 按关键词搜索推荐性国家标准 (如 "车" 可匹配所有车辆相关标准) + 2. 自动识别验证码 (ddddocr) 并下载 PDF 全文 + 3. 支持筛选: 现行/即将实施/废止 + 4. 文件命名: "标准号 标准名称.pdf" (如 "GB/T 1234-2024 xxx技术要求.pdf") + 5. 断点续传: 已下载的文件自动跳过 + 6. 导出标准元数据 Excel + +用法: + python openstd_gb_t_downloader.py # 下载"车"相关推荐性国家标准 + python openstd_gb_t_downloader.py --keyword "制动" # 搜索关键词 + python openstd_gb_t_downloader.py --status "现行" # 只下载现行标准 + python openstd_gb_t_downloader.py --page-size 50 # 每页50条 + python openstd_gb_t_downloader.py --output-dir ./GB_T_Doc # 自定义下载目录 + python openstd_gb_t_downloader.py --no-download # 仅采集元数据, 不下载PDF + +依赖: + pip install requests ddddocr openpyxl +""" + +import sys +import io +import os +import re +import json +import time +import argparse +from datetime import datetime + +import requests +from openpyxl import Workbook +from openpyxl.styles import Font, Alignment, PatternFill, Border, Side +from openpyxl.utils import get_column_letter + +# ─── Windows 控制台中文输出修复 ───────────────────────── +if sys.platform == "win32": + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace") + +# ─── 配置 ─────────────────────────────────────────────── +LIST_URL = "https://openstd.samr.gov.cn/bzgk/std/std_list_type" +DOWNLOAD_INIT_URL = "http://c.gb688.cn/bzgk/gb/showGb?type=download&hcno={hcno}" +CAPTCHA_URL = "http://c.gb688.cn/bzgk/gb/gc?_{ts}" +VERIFY_URL = "http://c.gb688.cn/bzgk/gb/verifyCode" +PDF_URL = "http://c.gb688.cn/bzgk/gb/viewGb?hcno={hcno}" + +# 推荐性国家标准 p.p1=2 +STD_TYPE_P1 = "2" +DEFAULT_KEYWORD = "车" + +MAX_CAPTCHA_RETRIES = 8 +REQUEST_TIMEOUT = 30 +CACHE_FILE = ".openstd_gb_t_cache.json" + +HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", +} + +# 输出列定义 +OUTPUT_COLUMNS = [ + ("标准号", 22), + ("标准名称", 50), + ("标准状态", 10), + ("发布日期", 14), + ("实施日期", 14), + ("是否采标", 10), + ("hcno", 35), + ("文件名", 60), + ("下载状态", 10), +] + +# ─── 列表页解析 ───────────────────────────────────────── +def fetch_list_page(session, keyword, page_num, page_size): + """请求列表页, 返回 HTML""" + params = { + "p.p1": STD_TYPE_P1, # 推荐性国家标准 + "p.p2": keyword, + "p.p90": "circulation_date", + "p.p91": "desc", + } + if page_num > 1: + params["page"] = page_num + params["pageSize"] = page_size + + for attempt in range(3): + try: + resp = session.get(LIST_URL, params=params, headers=HEADERS, timeout=REQUEST_TIMEOUT) + resp.raise_for_status() + return resp.content.decode("utf-8") + except Exception as e: + print(f" [!] 列表页请求失败 (第 {attempt+1} 次): {e}") + time.sleep(2) + return None + + +def parse_list_page(html): + """解析列表页 HTML, 返回标准列表和总数""" + hcnos = list(dict.fromkeys(re.findall(r"showInfo\('([A-F0-9]{32})'\)", html))) + + status_counts = re.findall(r'现行\((\d+)\).*?即将实施\((\d+)\).*?废止\((\d+)\)', html, re.S) + total = 0 + if status_counts: + total = sum(int(x) for x in status_counts[0]) + + rows = re.findall(r']*>(.*?)', html, re.S) + standards = [] + for row in rows: + cells = re.findall(r']*>(.*?)', row, re.S) + if len(cells) < 6: + continue + first_cell = re.sub(r'<[^>]+>', '', cells[0]).strip() + if not first_cell.isdigit(): + continue + + std_code = re.sub(r'<[^>]+>', '', cells[1]).strip() + std_name_raw = cells[3] + std_name = re.sub(r'<[^>]+>', '', std_name_raw).strip() + std_status = re.sub(r'<[^>]+>', '', cells[4]).strip() + issue_date = re.sub(r'<[^>]+>', '', cells[5]).strip() + act_date = re.sub(r'<[^>]+>', '', cells[6]).strip() if len(cells) > 6 else "" + + hcno_m = re.search(r"showInfo\('([A-F0-9]{32})'\)", cells[1]) + hcno = hcno_m.group(1) if hcno_m else "" + + adopted = re.sub(r'<[^>]+>', '', cells[2]).strip() if len(cells) > 2 else "" + + standards.append({ + "标准号": std_code, + "标准名称": std_name, + "标准状态": std_status, + "发布日期": issue_date[:10] if issue_date else "", + "实施日期": act_date[:10] if act_date else "", + "是否采标": adopted, + "hcno": hcno, + }) + + return standards, total + + +def collect_all_standards(keyword, page_size, status_filter=""): + """采集所有标准列表""" + session = requests.Session() + all_standards = [] + + html = fetch_list_page(session, keyword, 1, page_size) + if not html: + return all_standards + + standards, total = parse_list_page(html) + all_standards.extend(standards) + + total_pages = (total + page_size - 1) // page_size if total > 0 else 1 + print(f" 总计: {total} 条标准, {total_pages} 页") + + for page_num in range(2, total_pages + 1): + html = fetch_list_page(session, keyword, page_num, page_size) + if not html: + break + standards, _ = parse_list_page(html) + if not standards: + break + all_standards.extend(standards) + print(f" 已采集: {len(all_standards)}/{total} (第 {page_num}/{total_pages} 页)", end="\r") + time.sleep(0.3) + + print() + + if status_filter: + all_standards = [s for s in all_standards if status_filter in s.get("标准状态", "")] + print(f" 筛选 [{status_filter}]: {len(all_standards)} 条") + + return all_standards + + +# ─── PDF 下载 ─────────────────────────────────────────── +def download_pdf(hcno, save_path, max_retries=3): + """下载单个标准 PDF, 自动识别验证码""" + import ddddocr + ocr = ddddocr.DdddOcr(show_ad=False) + + for retry in range(max_retries): + s = requests.Session() + s.headers.update(HEADERS) + + try: + s.get(DOWNLOAD_INIT_URL.format(hcno=hcno), timeout=REQUEST_TIMEOUT) + + verified = False + for captcha_attempt in range(MAX_CAPTCHA_RETRIES): + r = s.get(CAPTCHA_URL.format(ts=int(time.time() * 1000)), timeout=REQUEST_TIMEOUT) + if len(r.content) < 100: + time.sleep(1) + continue + + code = ocr.classification(r.content) + + vr = s.post(VERIFY_URL, data={"verifyCode": code}, timeout=REQUEST_TIMEOUT) + if vr.text.strip() == "success": + verified = True + break + + if not verified: + if retry < max_retries - 1: + print(f"验证码失败,重试({retry+1})") + continue + + dr = s.get(PDF_URL.format(hcno=hcno), timeout=60) + if len(dr.content) > 1000: + with open(save_path, "wb") as f: + f.write(dr.content) + return True, len(dr.content) + else: + return False, -1 + + except Exception as e: + print(f" [!] 下载异常: {e}, 重试 ({retry+1}/{max_retries})") + + time.sleep(2) + + return False, 0 + + +def sanitize_filename(name): + """清理文件名中的非法字符""" + return re.sub(r'[\\/:*?"<>|]', ' ', name).strip() + + +# ─── Excel 导出 ───────────────────────────────────────── +def export_to_excel(records, output_path): + wb = Workbook() + ws = wb.active + ws.title = "推荐性国家标准清单" + + hdr_font = Font(name="微软雅黑", bold=True, color="FFFFFF", size=11) + hdr_fill = PatternFill(start_color="375623", end_color="375623", fill_type="solid") # 绿色表示推荐性 + hdr_align = Alignment(horizontal="center", vertical="center", wrap_text=True) + dat_font = Font(name="微软雅黑", size=10) + dat_align = Alignment(vertical="center", wrap_text=True) + even_fill = PatternFill(start_color="E2EFDA", end_color="E2EFDA", fill_type="solid") + border = Border( + left=Side(style="thin", color="A9D08E"), + right=Side(style="thin", color="A9D08E"), + top=Side(style="thin", color="A9D08E"), + bottom=Side(style="thin", color="A9D08E"), + ) + + col_names = [c[0] for c in OUTPUT_COLUMNS] + for ci, name in enumerate(col_names, 1): + cell = ws.cell(row=1, column=ci, value=name) + cell.font = hdr_font + cell.fill = hdr_fill + cell.alignment = hdr_align + cell.border = border + + for ri, rec in enumerate(records, 2): + for ci, (col_name, _) in enumerate(OUTPUT_COLUMNS, 1): + val = rec.get(col_name, "") + cell = ws.cell(row=ri, column=ci, value=val) + cell.font = dat_font + cell.alignment = dat_align + cell.border = border + if ri % 2 == 0: + cell.fill = even_fill + + for ci, (_, w) in enumerate(OUTPUT_COLUMNS, 1): + ws.column_dimensions[get_column_letter(ci)].width = w + + ws.freeze_panes = "A2" + ws.auto_filter.ref = f"A1:{get_column_letter(len(col_names))}{len(records) + 1}" + + wb.save(output_path) + + +# ─── 缓存 ─────────────────────────────────────────────── +def load_cache(): + if os.path.exists(CACHE_FILE): + try: + with open(CACHE_FILE, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, IOError): + pass + return {"downloaded_hcnos": [], "records": []} + + +def save_cache(cache): + with open(CACHE_FILE, "w", encoding="utf-8") as f: + json.dump(cache, f, ensure_ascii=False) + + +# ─── 主流程 ───────────────────────────────────────────── +def main(): + parser = argparse.ArgumentParser( + description="国家标准全文公开系统 — 推荐性国家标准 PDF 批量下载工具", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +示例: + python openstd_gb_t_downloader.py # 下载"车"相关推荐性国家标准 + python openstd_gb_t_downloader.py --keyword "制动" # 搜索关键词 + python openstd_gb_t_downloader.py --status "现行" # 只下载现行标准 + python openstd_gb_t_downloader.py --no-download # 仅采集元数据, 不下载PDF + """, + ) + parser.add_argument("--keyword", "-k", default=DEFAULT_KEYWORD, help="搜索关键词 (默认: 车)") + parser.add_argument("--status", "-s", default="", help="状态筛选: 现行/即将实施/废止 (默认: 全部)") + parser.add_argument("--page-size", "-p", type=int, default=50, help="每页条数 (默认50)") + parser.add_argument("--output-dir", "-o", default="GB_T_Doc", help="PDF下载目录 (默认: GB_T_Doc)") + parser.add_argument("--no-download", action="store_true", help="仅采集元数据, 不下载PDF") + parser.add_argument("--max-count", "-n", type=int, default=0, help="最大下载数量 (0=全部)") + + args = parser.parse_args() + + os.makedirs(args.output_dir, exist_ok=True) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + excel_path = os.path.join(args.output_dir, f"推荐性国家标准清单_{args.keyword}_{timestamp}.xlsx") + + print("=" * 60) + print(" 国家标准全文公开系统 — 推荐性国家标准 PDF 批量下载工具") + print("=" * 60) + print(f" 关键词: {args.keyword}") + print(f" 类型: 推荐性国家标准 (GB/T)") + print(f" 状态: {args.status or '全部'}") + print(f" 下载目录: {args.output_dir}/") + print(f" 下载PDF: {'否' if args.no_download else '是'}") + print("-" * 60) + + # Step 1: 采集标准列表 + print(" [1/2] 采集标准列表...") + standards = collect_all_standards(args.keyword, args.page_size, args.status) + + if not standards: + print(" 未找到任何标准") + sys.exit(1) + + print(f" 共 {len(standards)} 条标准") + + if args.max_count > 0: + standards = standards[:args.max_count] + print(f" 限制下载前 {args.max_count} 条") + + # Step 2: 下载 PDF + cache = load_cache() + downloaded_hcnos = set(cache.get("downloaded_hcnos", [])) + + if not args.no_download: + print(f"\n [2/2] 下载 PDF 文件...") + success_count = 0 + skip_count = 0 + fail_count = 0 + + for idx, std in enumerate(standards, 1): + hcno = std.get("hcno", "") + code = std.get("标准号", "") + name = std.get("标准名称", "") + filename = sanitize_filename(f"{code} {name}.pdf") + filepath = os.path.join(args.output_dir, filename) + + if hcno in downloaded_hcnos or os.path.exists(filepath): + std["下载状态"] = "已存在" + std["文件名"] = filename + skip_count += 1 + continue + + print(f" [{idx}/{len(standards)}] {code} {name[:30]}...", end=" ") + + ok, size = download_pdf(hcno, filepath) + + if ok: + std["下载状态"] = "成功" + std["文件名"] = filename + downloaded_hcnos.add(hcno) + success_count += 1 + print(f"OK ({size/1024:.0f} KB)") + elif size == -1: + std["下载状态"] = "无PDF" + std["文件名"] = "" + fail_count += 1 + print("NO PDF") + else: + std["下载状态"] = "失败" + std["文件名"] = "" + fail_count += 1 + print("FAILED") + + cache["downloaded_hcnos"] = list(downloaded_hcnos) + save_cache(cache) + + time.sleep(1) + + print(f"\n 下载完成: 成功 {success_count}, 跳过 {skip_count}, 无PDF/失败 {fail_count}") + else: + for std in standards: + std["下载状态"] = "跳过" + std["文件名"] = "" + print("\n [2/2] 跳过下载 (--no-download)") + + export_to_excel(standards, excel_path) + print(f"\n 元数据已导出: {excel_path}") + + print(f"\n {'=' * 50}") + print(f" 总计: {len(standards)} 条推荐性国家标准") + print(f" Excel: {excel_path}") + if not args.no_download: + print(f" PDF目录: {args.output_dir}/") + pdfs = [f for f in os.listdir(args.output_dir) if f.endswith('.pdf')] + print(f" PDF文件数: {len(pdfs)}") + print(f" {'=' * 50}") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/samr_qc_scraper.py b/samr_qc_scraper.py new file mode 100644 index 0000000..42c802e --- /dev/null +++ b/samr_qc_scraper.py @@ -0,0 +1,406 @@ +""" +全国标准信息公共服务平台 - 行业标准(汽车)数据采集脚本 +数据来源: https://std.samr.gov.cn/hb/hbQuery?initnode=QC%20%E6%B1%BD%E8%BD%A6 +API: GET https://std.samr.gov.cn/hb/search/hbPage + +功能: + 1. 全量采集 QC 汽车行业标准数据 (共约 990 条) + 2. 采集字段: 标准号、标准名称、发布日期、实施日期、所属行业、标准状态、 + 标准性质、标准类别、归口单位、发布文号、CCS分类、ICS分类、制修定、备案号 等 + 3. 支持关键词搜索 + 4. 支持断点续采 + 5. 导出为格式化的 Excel 文件 (含统计 Sheet) + +用法: + python samr_qc_scraper.py # 全量采集 QC 汽车行业标准 + python samr_qc_scraper.py --search "制动" # 搜索关键词 + python samr_qc_scraper.py --industry "QC 汽车" # 指定行业 (默认 QC 汽车) + python samr_qc_scraper.py --resume # 断点续采 + python samr_qc_scraper.py --page-size 50 # 每页50条 + python samr_qc_scraper.py --output 自定义名称 # 自定义输出文件名 +""" + +import sys +import io +import os +import json +import time +import argparse +from datetime import datetime + +import requests +from openpyxl import Workbook +from openpyxl.styles import Font, Alignment, PatternFill, Border, Side +from openpyxl.utils import get_column_letter + +# ─── Windows 控制台中文输出修复 ───────────────────────── +if sys.platform == "win32": + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace") + +# ─── 配置 ─────────────────────────────────────────────── +API_URL = "https://std.samr.gov.cn/hb/search/hbPage" +DETAIL_URL = "https://std.samr.gov.cn/hb/search/stdHBDetailed?id={id}" +SOURCE_PAGE = "https://std.samr.gov.cn/hb/hbQuery?initnode=QC%20%E6%B1%BD%E8%BD%A6" +DEFAULT_PAGE_SIZE = 50 +MAX_RETRIES = 5 +RETRY_DELAY = 3 +REQUEST_TIMEOUT = 30 +CACHE_FILE = ".samr_qc_cache.json" + +HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "Accept": "application/json, text/plain, */*", + "Referer": "https://std.samr.gov.cn/hb/hbQuery?initnode=QC%20%E6%B1%BD%E8%BD%A6", + "X-Requested-With": "XMLHttpRequest", +} + +# 字段映射: API 字段 → 中文显示名 +FIELD_MAP = { + "C_STD_CODE": "标准号", + "C_NAME": "标准名称", + "ISSUE_DATE": "发布日期", + "ACT_DATE": "实施日期", + "TRADE_DEPT": "所属行业", + "STATE": "标准状态", + "STD_NATURE": "标准性质", + "STD_CATEGORY": "标准类别", + "CHARGE_DEPT": "归口单位", + "NOTICE_NO": "发布文号", + "CCS": "CCS分类", + "ICS": "ICS分类", + "ICS_NAME1_1": "ICS分类名称", + "STD_ZXD": "制修定", + "RECORD_NO": "备案号", + "STD_LEVEL": "标准层级", + "STD_DOMAIN": "标准领域", + "id": "标准ID", +} + +# Excel 输出列定义 (列名, 列宽) +OUTPUT_COLUMNS = [ + ("标准号", 20), + ("标准名称", 52), + ("发布日期", 13), + ("实施日期", 13), + ("所属行业", 12), + ("标准状态", 12), + ("标准性质", 10), + ("标准类别", 12), + ("归口单位", 20), + ("发布文号", 18), + ("CCS分类", 10), + ("ICS分类", 10), + ("ICS分类名称", 20), + ("制修定", 8), + ("备案号", 16), + ("标准层级", 10), + ("标准领域", 10), + ("详情链接", 55), +] + + +# ─── 网络请求 ─────────────────────────────────────────── +def fetch_page(session, page_num, page_size, industry="QC 汽车"): + """请求单页数据""" + params = { + "op": industry, + "ISSUE_DATE": "", + "pageNumber": page_num, + "pageSize": page_size, + } + for attempt in range(1, MAX_RETRIES + 1): + try: + resp = session.get(API_URL, params=params, headers=HEADERS, timeout=REQUEST_TIMEOUT) + resp.raise_for_status() + data = json.loads(resp.content.decode("utf-8")) + return data + except Exception as e: + print(f" [!] 第 {page_num} 页请求失败 (第 {attempt}/{MAX_RETRIES} 次): {e}") + if attempt < MAX_RETRIES: + time.sleep(RETRY_DELAY * attempt) + return None + + +def normalize_record(record): + """标准化单条记录""" + result = {} + for api_key, cn_name in FIELD_MAP.items(): + val = record.get(api_key, "") + if isinstance(val, str): + val = val.strip() + result[cn_name] = val if val else "" + # 补充详情链接 + std_id = record.get("id", "") + result["详情链接"] = DETAIL_URL.format(id=std_id) if std_id else "" + # 清理 ICS 分类名称 (格式: "43_道路车辆工程" → "道路车辆工程") + ics_name = result.get("ICS分类名称", "") + if "_" in ics_name: + result["ICS分类名称"] = ics_name.split("_")[-1] or ics_name + return result + + +# ─── 缓存 ─────────────────────────────────────────────── +def load_cache(): + if os.path.exists(CACHE_FILE): + try: + with open(CACHE_FILE, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, IOError): + pass + return {"records": [], "last_page": 0, "industry": "", "total": 0} + + +def save_cache(cache): + with open(CACHE_FILE, "w", encoding="utf-8") as f: + json.dump(cache, f, ensure_ascii=False) + + +# ─── 主采集流程 ───────────────────────────────────────── +def scrape_all(page_size, industry="QC 汽车", search_keyword="", resume=False): + """采集所有数据""" + session = requests.Session() + all_records = [] + start_page = 1 + total = 0 + + if resume: + cache = load_cache() + if cache["records"] and cache["industry"] == industry: + all_records = cache["records"] + start_page = cache["last_page"] + 1 + total = cache["total"] + print(f" [*] 从缓存恢复: 已有 {len(all_records)} 条, 从第 {start_page} 页继续") + else: + print(" [*] 缓存不匹配, 从头开始采集") + + # 首次请求, 获取总数 + first_data = fetch_page(session, start_page, page_size, industry) + if not first_data: + print(" [!] 无法获取数据, 请检查网络") + return all_records + + total = first_data.get("total", 0) + rows = first_data.get("rows") or [] + total_pages = (total + page_size - 1) // page_size + + records = [normalize_record(r) for r in rows] + all_records.extend(records) + + print(f" 总计: {total} 条标准, 共 {total_pages} 页, 每页 {page_size} 条") + print(f" 已采集: {len(all_records)}/{total}", end="\r") + + cache = {"records": all_records, "last_page": start_page, "industry": industry, "total": total} + save_cache(cache) + + for page_num in range(start_page + 1, total_pages + 1): + data = fetch_page(session, page_num, page_size, industry) + if data is None: + print(f"\n [!] 第 {page_num} 页失败, 保存进度退出") + save_cache(cache) + break + + rows = data.get("rows") or [] + records = [normalize_record(r) for r in rows] + all_records.extend(records) + + cache["records"] = all_records + cache["last_page"] = page_num + save_cache(cache) + + print(f" 已采集: {len(all_records)}/{total} (第 {page_num}/{total_pages} 页)", end="\r") + time.sleep(0.3) + + print() + return all_records + + +# ─── Excel 导出 ───────────────────────────────────────── +def export_to_excel(records, output_path): + wb = Workbook() + ws = wb.active + ws.title = "汽车行业标准" + + # ── 样式定义 ── + hdr_font = Font(name="微软雅黑", bold=True, color="FFFFFF", size=11) + hdr_fill = PatternFill(start_color="2F5496", end_color="2F5496", fill_type="solid") + hdr_align = Alignment(horizontal="center", vertical="center", wrap_text=True) + dat_font = Font(name="微软雅黑", size=10) + dat_align = Alignment(vertical="center", wrap_text=True) + even_fill = PatternFill(start_color="D6E4F0", end_color="D6E4F0", fill_type="solid") + border = Border( + left=Side(style="thin", color="B4C6E7"), + right=Side(style="thin", color="B4C6E7"), + top=Side(style="thin", color="B4C6E7"), + bottom=Side(style="thin", color="B4C6E7"), + ) + + # ── 标题行 ── + col_names = [c[0] for c in OUTPUT_COLUMNS] + for ci, name in enumerate(col_names, 1): + cell = ws.cell(row=1, column=ci, value=name) + cell.font = hdr_font + cell.fill = hdr_fill + cell.alignment = hdr_align + cell.border = border + + # ── 数据行 ── + for ri, rec in enumerate(records, 2): + for ci, (col_name, _) in enumerate(OUTPUT_COLUMNS, 1): + val = rec.get(col_name, "") + cell = ws.cell(row=ri, column=ci, value=val) + cell.font = dat_font + cell.alignment = dat_align + cell.border = border + if ri % 2 == 0: + cell.fill = even_fill + + # ── 列宽 ── + for ci, (_, w) in enumerate(OUTPUT_COLUMNS, 1): + ws.column_dimensions[get_column_letter(ci)].width = w + + # ── 冻结 & 筛选 ── + ws.freeze_panes = "A2" + ws.auto_filter.ref = f"A1:{get_column_letter(len(col_names))}{len(records) + 1}" + + # ── 统计信息 Sheet ── + ws_stat = wb.create_sheet("统计信息") + + status_count = {} + nature_count = {} + category_count = {} + zxd_count = {} + year_count = {} + + for r in records: + for field, target in [ + ("标准状态", status_count), + ("标准性质", nature_count), + ("标准类别", category_count), + ("制修定", zxd_count), + ]: + v = r.get(field, "未知") or "未知" + target[v] = target.get(v, 0) + 1 + + issue = r.get("发布日期", "") or "" + year = issue[:4] if len(issue) >= 4 else "未知" + year_count[year] = year_count.get(year, 0) + 1 + + stat_rows = [ + ("采集时间", datetime.now().strftime("%Y-%m-%d %H:%M:%S")), + ("数据来源", "全国标准信息公共服务平台 (std.samr.gov.cn)"), + ("所属行业", "QC 汽车"), + ("标准总数", len(records)), + ("", ""), + ("── 标准状态分布 ──", ""), + ("状态", "数量"), + ] + for k, v in sorted(status_count.items(), key=lambda x: -x[1]): + stat_rows.append((k, v)) + + stat_rows += [("", ""), ("── 标准性质分布 ──", ""), ("性质", "数量")] + for k, v in sorted(nature_count.items(), key=lambda x: -x[1]): + stat_rows.append((k, v)) + + stat_rows += [("", ""), ("── 标准类别分布 ──", ""), ("类别", "数量")] + for k, v in sorted(category_count.items(), key=lambda x: -x[1]): + stat_rows.append((k, v)) + + stat_rows += [("", ""), ("── 制修定分布 ──", ""), ("类型", "数量")] + for k, v in sorted(zxd_count.items(), key=lambda x: -x[1]): + stat_rows.append((k, v)) + + stat_rows += [("", ""), ("── 按发布年份分布 ──", ""), ("年份", "数量")] + for y, c in sorted(year_count.items(), reverse=True): + stat_rows.append((y, c)) + + for ri, (a, b) in enumerate(stat_rows, 1): + ca = ws_stat.cell(row=ri, column=1, value=a) + cb = ws_stat.cell(row=ri, column=2, value=b) + if a.startswith("──"): + ca.font = Font(name="微软雅黑", bold=True, size=11) + else: + ca.font = Font(name="微软雅黑", size=10) + cb.font = Font(name="微软雅黑", size=10) + + ws_stat.column_dimensions["A"].width = 28 + ws_stat.column_dimensions["B"].width = 50 + + wb.save(output_path) + + +# ─── 入口 ─────────────────────────────────────────────── +def main(): + parser = argparse.ArgumentParser( + description="全国标准信息公共服务平台 — 行业标准(汽车)数据采集工具", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +示例: + python samr_qc_scraper.py # 全量采集 + python samr_qc_scraper.py --resume # 断点续采 + python samr_qc_scraper.py --page-size 50 # 每页50条 + python samr_qc_scraper.py --output QC汽车标准 # 自定义文件名 + """, + ) + parser.add_argument("--industry", "-i", default="QC 汽车", help="行业筛选 (默认: QC 汽车)") + parser.add_argument("--resume", "-r", action="store_true", help="断点续采") + parser.add_argument("--page-size", "-p", type=int, default=DEFAULT_PAGE_SIZE, help="每页条数 (默认50)") + parser.add_argument("--output", "-o", default=None, help="输出文件名 (不含扩展名)") + + args = parser.parse_args() + + if args.output: + output_name = args.output + else: + output_name = f"行业标准_QC汽车_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + output_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), f"{output_name}.xlsx") + + print("=" * 60) + print(" 全国标准信息公共服务平台 — 行业标准数据采集工具") + print("=" * 60) + print(f" 数据来源: std.samr.gov.cn") + print(f" 所属行业: {args.industry}") + print(f" 每页条数: {args.page_size}") + print(f" 输出文件: {output_path}") + print("-" * 60) + + start_time = time.time() + records = scrape_all(args.page_size, args.industry, resume=args.resume) + elapsed = time.time() - start_time + + if not records: + print(" 未获取到任何数据") + sys.exit(1) + + # 去重 + seen = set() + unique = [] + for r in records: + code = r.get("标准号", "") + if code not in seen: + seen.add(code) + unique.append(r) + dup = len(records) - len(unique) + + print("-" * 60) + print(f" 采集完成! 用时 {elapsed:.1f} 秒") + print(f" 获取 {len(records)} 条, 去重后 {len(unique)} 条", end="") + print(f" (移除 {dup} 条重复)" if dup else "") + + print(" 生成 Excel 文件...") + export_to_excel(unique, output_path) + size = os.path.getsize(output_path) / 1024 + + print(f"\n {'=' * 50}") + print(f" 导出完成: {output_path}") + print(f" 文件大小: {size:.1f} KB") + print(f" 标准总数: {len(unique)}") + print(f" {'=' * 50}") + + if not args.resume and os.path.exists(CACHE_FILE): + os.remove(CACHE_FILE) + + +if __name__ == "__main__": + main()
解析""" + + def __init__(self): + super().__init__() + self.tables = [] # list of list of list of str + self._current_table = [] + self._current_row = [] + self._current_cell = [] + self._in_table = 0 + self._in_td = False + self._skip = False + + def handle_starttag(self, tag, attrs): + tag = tag.lower() + if tag == "table": + self._in_table += 1 + if self._in_table == 1: + self._current_table = [] + elif tag == "tr" and self._in_table == 1: + self._current_row = [] + elif tag in ("td", "th") and self._in_table == 1: + self._in_td = True + self._current_cell = [] + elif tag in ("script", "style"): + self._skip = True + + def handle_endtag(self, tag): + tag = tag.lower() + if tag == "table": + if self._in_table == 1 and self._current_table: + self.tables.append(self._current_table) + self._in_table = max(0, self._in_table - 1) + elif tag == "tr" and self._in_table == 1: + if self._current_row: + self._current_table.append(self._current_row) + elif tag in ("td", "th") and self._in_table == 1: + self._in_td = False + cell_text = " ".join("".join(self._current_cell).split()).strip() + self._current_row.append(cell_text) + elif tag in ("script", "style"): + self._skip = False + + def handle_data(self, data): + if self._skip: + return + if self._in_td: + self._current_cell.append(data) + elif self._in_table == 1: + # 处理 中没有子标签包裹的文本 + pass + + def handle_entityref(self, name): + char_map = {"nbsp": " ", "amp": "&", "lt": "<", "gt": ">", "mdash": "—", "ndash": "–"} + if self._in_td: + self._current_cell.append(char_map.get(name, f"&{name};")) + + +def parse_html_tables(html_text): + """解析 HTML 文本, 返回所有表格数据""" + parser = TableParser() + try: + parser.feed(html_text) + except Exception: + pass + return parser.tables + + +def normalize_headers(headers): + """将表头文本映射为标准字段名, 返回字段名列表""" + result = [] + for h in headers: + mapped = None + for keyword, field in HEADER_KEYWORDS.items(): + if keyword in h: + mapped = field + break + result.append(mapped if mapped else h.strip()) + return result + + +def extract_records_from_table(table): + """从解析后的表格中提取记录列表, 返回 (records, headers)""" + if len(table) < 2: + return [], [] + + # 第一行是表头 + raw_headers = table[0] + headers = normalize_headers(raw_headers) + + records = [] + for row in table[1:]: + if len(row) < 2: + continue + # 跳过空行或重复表头 + first_cell = row[0].strip() + if not first_cell or first_cell in ("序号", "所属机构", "文件号"): + continue + + record = {} + for i, val in enumerate(row): + if i < len(headers): + record[headers[i]] = val + else: + record[f"col_{i}"] = val + records.append(record) + + return records, headers + + +# ─── 网络请求 ─────────────────────────────────────────── +def fetch_page(session, url): + """请求页面, 返回 UTF-8 编码的 HTML 文本""" + for attempt in range(1, MAX_RETRIES + 1): + try: + resp = session.get(url, headers=HEADERS, timeout=REQUEST_TIMEOUT) + resp.raise_for_status() + # 显式用 utf-8 解码, 避免 requests 自动检测编码错误 + return resp.content.decode("utf-8") + except requests.exceptions.RequestException as e: + print(f" [!] 请求失败 (第 {attempt}/{MAX_RETRIES} 次): {e}") + if attempt < MAX_RETRIES: + time.sleep(2 * attempt) + return None + + +# ─── 列表页解析 ───────────────────────────────────────── +def discover_subpages(session): + """从列表页发现所有子页面, 返回 [(url, title, date), ...]""" + subpages = [] + page_num = 1 + + while True: + if page_num == 1: + page_suffix = "" + else: + page_suffix = f"_{page_num}" + + url = BASE_URL + LIST_PAGE_PATTERN.format(page=page_suffix) + html = fetch_page(session, url) + if not html: + break + + # 提取子页面链接:
  • datetitle
  • + pattern = r'href="(/gjbzh/isoiec/iso/gzdt/fbbz/[^"]+\.html)"' + links = re.findall(pattern, html) + + if not links: + break + + for link in links: + full_url = BASE_URL + link + # 从链接上下文提取日期和标题 + escaped = re.escape(link) + ctx_pattern = rf'\s*(\d{{4}}-\d{{2}}-\d{{2}})([^<]+)' + ctx_match = re.search(ctx_pattern, html) + date = ctx_match.group(1) if ctx_match else "" + title = ctx_match.group(2).strip() if ctx_match else os.path.basename(link) + + subpages.append((full_url, title, date)) + + # 检查是否有下一页 + if f'index_{page_num + 1}.html' in html or f'index_{page_num + 1}"' in html: + page_num += 1 + else: + # 也检查下一页链接 + if f'>下一页<' in html and len(links) > 0: + page_num += 1 + else: + break + + return subpages + + +# ─── 详情页解析 ───────────────────────────────────────── +def scrape_detail_page(session, url, title, date): + """采集单个详情页, 返回记录列表""" + html = fetch_page(session, url) + if not html: + return [] + + tables = parse_html_tables(html) + if not tables: + # 可能表格在 iframe 或特殊容器中, 尝试用正则直接提取 + return _fallback_parse(html, title, date) + + # 找最大的表格 (通常是标准数据表) + best_table = max(tables, key=len) + records, headers = extract_records_from_table(best_table) + + # 补充元数据 + for r in records: + r["发布批次"] = title + r["发布日期"] = date + r["来源页面"] = url + # 确保 "所属机构" 字段存在 (旧格式可能只有 "所属分技术委员会") + if "所属机构" not in r or not r.get("所属机构"): + r["所属机构"] = r.get("所属分技术委员会", "") + + return records + + +def _fallback_parse(html, title, date): + """备用: 正则解析表格行""" + records = [] + rows = re.findall(r']*>(.*?)