Files
appium_ui_test/xml_analyzer.py
2025-10-31 17:53:12 +08:00

658 lines
24 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
XML布局分析器模块
用于解析和分析Android UI XML布局文件以及SVG设计图的布局结构
"""
from lxml import etree
import re
from pathlib import Path
from config import Config
class XMLAnalyzer:
"""XML布局分析器"""
def __init__(self):
self.elements_data = []
self.issues = []
self.svg_elements = []
self.layout_comparison_result = None
self.config = Config.XML_ANALYSIS
def parse_xml(self, xml_content):
"""解析XML内容"""
try:
# 如果xml_content是字符串且包含编码声明需要特殊处理
if isinstance(xml_content, str):
# 移除XML声明因为etree.fromstring不支持带编码声明的Unicode字符串
if xml_content.strip().startswith('<?xml'):
# 找到XML声明的结束位置
declaration_end = xml_content.find('?>') + 2
xml_content = xml_content[declaration_end:].strip()
root = etree.fromstring(xml_content)
self.elements_data = []
self.issues = []
self._parse_element(root, 0)
return True
except Exception as e:
print(f"❌ XML解析失败: {e}")
return False
def _parse_element(self, element, depth):
"""递归解析XML元素"""
# 提取元素信息
element_info = {
'tag': element.tag,
'depth': depth,
'attributes': dict(element.attrib),
'text': element.text.strip() if element.text else '',
'children_count': len(element)
}
# 解析bounds属性
bounds = element.get('bounds', '')
if bounds:
try:
# bounds格式: [x1,y1][x2,y2]
coords = bounds.replace('[', '').replace(']', ',').split(',')
if len(coords) >= 4:
x1, y1, x2, y2 = map(int, coords[:4])
element_info['bounds'] = {
'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2,
'width': x2 - x1, 'height': y2 - y1,
'center_x': (x1 + x2) // 2, 'center_y': (y1 + y2) // 2
}
except:
pass
# 检查常见问题
if self.config['check_accessibility'] or self.config['check_duplicates']:
self._check_element_issues(element_info)
self.elements_data.append(element_info)
# 递归处理子元素
for child in element:
self._parse_element(child, depth + 1)
def _check_element_issues(self, element_info):
"""检查元素问题"""
issues = []
# 检查可访问性
if self.config['check_accessibility']:
if element_info.get('attributes', {}).get('clickable') == 'true':
if not element_info.get('attributes', {}).get('content-desc'):
issues.append("可点击元素缺少content-desc")
# 检查文本大小
if 'bounds' in element_info:
bounds = element_info['bounds']
min_size = self.config['min_clickable_size']
if bounds['width'] < min_size or bounds['height'] < min_size:
if element_info.get('attributes', {}).get('clickable') == 'true':
issues.append(f"可点击元素尺寸过小: {bounds['width']}x{bounds['height']}")
# 检查文本内容
text = element_info.get('text', '')
if text:
max_length = self.config['max_text_length']
if len(text) > max_length:
issues.append(f"文本过长: {len(text)}字符")
if text.lower() in ['click here', 'button', 'text']:
issues.append(f"文本描述不明确: '{text}'")
# 检查重叠元素
if 'bounds' in element_info:
bounds = element_info['bounds']
if bounds['width'] <= 0 or bounds['height'] <= 0:
issues.append("元素尺寸无效")
if issues:
self.issues.extend([{
'element': element_info['tag'],
'issue': issue,
'bounds': element_info.get('bounds', {}),
'attributes': element_info.get('attributes', {})
} for issue in issues])
def get_statistics(self):
"""获取统计信息"""
stats = {
'total_elements': len(self.elements_data),
'clickable_elements': 0,
'text_elements': 0,
'image_elements': 0,
'max_depth': 0,
'issues_count': len(self.issues)
}
for element in self.elements_data:
if element.get('attributes', {}).get('clickable') == 'true':
stats['clickable_elements'] += 1
if element.get('text'):
stats['text_elements'] += 1
if 'Image' in element.get('tag', ''):
stats['image_elements'] += 1
stats['max_depth'] = max(stats['max_depth'], element.get('depth', 0))
return stats
def find_duplicate_ids(self):
"""查找重复的resource-id"""
if not self.config['check_duplicates']:
return []
id_counts = {}
duplicates = []
for element in self.elements_data:
resource_id = element.get('attributes', {}).get('resource-id')
if resource_id:
id_counts[resource_id] = id_counts.get(resource_id, 0) + 1
for resource_id, count in id_counts.items():
if count > 1:
duplicates.append({
'resource_id': resource_id,
'count': count
})
return duplicates
def get_accessibility_issues(self):
"""获取可访问性问题"""
accessibility_issues = []
for issue in self.issues:
if 'content-desc' in issue['issue'] or '可访问性' in issue['issue']:
accessibility_issues.append(issue)
return accessibility_issues
def export_elements_data(self, format='json'):
"""导出元素数据"""
if format == 'json':
import json
return json.dumps(self.elements_data, ensure_ascii=False, indent=2)
elif format == 'csv':
import csv
import io
output = io.StringIO()
if self.elements_data:
fieldnames = ['tag', 'depth', 'text', 'clickable', 'resource-id']
writer = csv.DictWriter(output, fieldnames=fieldnames)
writer.writeheader()
for element in self.elements_data:
row = {
'tag': element.get('tag', ''),
'depth': element.get('depth', 0),
'text': element.get('text', ''),
'clickable': element.get('attributes', {}).get('clickable', 'false'),
'resource-id': element.get('attributes', {}).get('resource-id', '')
}
writer.writerow(row)
return output.getvalue()
return str(self.elements_data)
def parse_svg(self, svg_path):
"""解析SVG文件提取布局元素信息"""
try:
svg_path = Path(svg_path)
if not svg_path.exists():
print(f"❌ SVG文件不存在: {svg_path}")
return False
with open(svg_path, 'r', encoding='utf-8') as f:
svg_content = f.read()
# 解析SVG XML
root = etree.fromstring(svg_content.encode('utf-8'))
# 获取SVG的命名空间
namespaces = {'svg': 'http://www.w3.org/2000/svg'}
if root.nsmap:
namespaces.update(root.nsmap)
self.svg_elements = []
self._parse_svg_element(root, 0, namespaces)
print(f"✅ SVG解析完成找到 {len(self.svg_elements)} 个元素")
return True
except Exception as e:
print(f"❌ SVG解析失败: {e}")
return False
def _parse_svg_element(self, element, depth, namespaces):
"""递归解析SVG元素"""
# 获取元素标签名(去除命名空间前缀)
tag = element.tag
if '}' in tag:
tag = tag.split('}')[1]
# 提取元素信息
element_info = {
'tag': tag,
'depth': depth,
'attributes': dict(element.attrib),
'text': (element.text or '').strip(),
'children_count': len(element)
}
# 解析位置和尺寸信息
self._extract_svg_geometry(element_info)
# 解析样式信息
self._extract_svg_styles(element_info)
# 识别UI元素类型
self._classify_svg_element(element_info)
self.svg_elements.append(element_info)
# 递归处理子元素
for child in element:
self._parse_svg_element(child, depth + 1, namespaces)
def _extract_svg_geometry(self, element_info):
"""提取SVG元素的几何信息"""
attrs = element_info['attributes']
tag = element_info['tag']
# 初始化几何信息
geometry = {
'x': 0, 'y': 0, 'width': 0, 'height': 0,
'center_x': 0, 'center_y': 0
}
try:
if tag == 'rect':
geometry['x'] = float(attrs.get('x', 0))
geometry['y'] = float(attrs.get('y', 0))
geometry['width'] = float(attrs.get('width', 0))
geometry['height'] = float(attrs.get('height', 0))
elif tag == 'circle':
cx = float(attrs.get('cx', 0))
cy = float(attrs.get('cy', 0))
r = float(attrs.get('r', 0))
geometry['x'] = cx - r
geometry['y'] = cy - r
geometry['width'] = r * 2
geometry['height'] = r * 2
elif tag == 'ellipse':
cx = float(attrs.get('cx', 0))
cy = float(attrs.get('cy', 0))
rx = float(attrs.get('rx', 0))
ry = float(attrs.get('ry', 0))
geometry['x'] = cx - rx
geometry['y'] = cy - ry
geometry['width'] = rx * 2
geometry['height'] = ry * 2
elif tag == 'line':
x1 = float(attrs.get('x1', 0))
y1 = float(attrs.get('y1', 0))
x2 = float(attrs.get('x2', 0))
y2 = float(attrs.get('y2', 0))
geometry['x'] = min(x1, x2)
geometry['y'] = min(y1, y2)
geometry['width'] = abs(x2 - x1)
geometry['height'] = abs(y2 - y1)
elif tag == 'text':
geometry['x'] = float(attrs.get('x', 0))
geometry['y'] = float(attrs.get('y', 0))
# 文本的宽高需要根据字体大小估算
font_size = self._extract_font_size(attrs)
text_length = len(element_info.get('text', ''))
geometry['width'] = text_length * font_size * 0.6 # 估算宽度
geometry['height'] = font_size
elif tag == 'g': # 组元素
# 对于组元素尝试从transform属性获取位置
transform = attrs.get('transform', '')
translate_match = re.search(r'translate\(([^)]+)\)', transform)
if translate_match:
coords = translate_match.group(1).split(',')
if len(coords) >= 2:
geometry['x'] = float(coords[0].strip())
geometry['y'] = float(coords[1].strip())
# 计算中心点
geometry['center_x'] = geometry['x'] + geometry['width'] / 2
geometry['center_y'] = geometry['y'] + geometry['height'] / 2
element_info['geometry'] = geometry
except (ValueError, TypeError):
# 如果解析失败,使用默认值
element_info['geometry'] = geometry
def _extract_font_size(self, attrs):
"""提取字体大小"""
# 从style属性中提取
style = attrs.get('style', '')
font_size_match = re.search(r'font-size:\s*(\d+(?:\.\d+)?)', style)
if font_size_match:
return float(font_size_match.group(1))
# 从font-size属性中提取
font_size = attrs.get('font-size', '12')
try:
return float(re.sub(r'[^\d.]', '', font_size))
except:
return 12.0 # 默认字体大小
def _extract_svg_styles(self, element_info):
"""提取SVG元素的样式信息"""
attrs = element_info['attributes']
styles = {}
# 解析style属性
style_attr = attrs.get('style', '')
if style_attr:
for style_rule in style_attr.split(';'):
if ':' in style_rule:
key, value = style_rule.split(':', 1)
styles[key.strip()] = value.strip()
# 直接的样式属性
style_attrs = ['fill', 'stroke', 'stroke-width', 'opacity', 'font-family', 'font-size', 'color']
for attr in style_attrs:
if attr in attrs:
styles[attr] = attrs[attr]
element_info['styles'] = styles
def _classify_svg_element(self, element_info):
"""分类SVG元素识别可能的UI组件类型"""
tag = element_info['tag']
attrs = element_info['attributes']
styles = element_info.get('styles', {})
text = element_info.get('text', '')
ui_type = 'unknown'
if tag == 'text' or text:
ui_type = 'text'
elif tag == 'rect':
# 判断是否为按钮
if styles.get('fill') and styles.get('stroke'):
ui_type = 'button'
else:
ui_type = 'container'
elif tag == 'circle' or tag == 'ellipse':
ui_type = 'button' # 圆形通常是按钮
elif tag == 'image':
ui_type = 'image'
elif tag == 'g':
ui_type = 'group'
elif tag == 'path':
ui_type = 'icon' # path通常用于图标
element_info['ui_type'] = ui_type
def compare_layouts(self, svg_path):
"""比较XML布局与SVG设计图的结构"""
if not self.elements_data:
print("❌ 请先解析XML布局")
return None
if not self.parse_svg(svg_path):
return None
try:
# 分析XML中的UI元素
xml_ui_elements = self._extract_xml_ui_elements()
# 分析SVG中的UI元素
svg_ui_elements = self._extract_svg_ui_elements()
# 进行布局比对
comparison_result = {
'xml_elements_count': len(xml_ui_elements),
'svg_elements_count': len(svg_ui_elements),
'matched_elements': [],
'unmatched_xml': [],
'unmatched_svg': [],
'layout_similarity': 0.0,
'position_differences': [],
'size_differences': []
}
# 匹配相似的元素
self._match_ui_elements(xml_ui_elements, svg_ui_elements, comparison_result)
# 计算布局相似度
self._calculate_layout_similarity(comparison_result)
self.layout_comparison_result = comparison_result
print(f"📊 布局比对完成:")
print(f" XML元素: {comparison_result['xml_elements_count']}")
print(f" SVG元素: {comparison_result['svg_elements_count']}")
print(f" 匹配元素: {len(comparison_result['matched_elements'])}")
print(f" 布局相似度: {comparison_result['layout_similarity']:.1%}")
return comparison_result
except Exception as e:
print(f"❌ 布局比对失败: {e}")
return None
def _extract_xml_ui_elements(self):
"""从XML数据中提取UI元素"""
ui_elements = []
for element in self.elements_data:
# 过滤掉容器元素只保留实际的UI组件
if self._is_ui_component(element):
ui_element = {
'type': self._classify_xml_element(element),
'text': element.get('text', ''),
'bounds': element.get('bounds', {}),
'attributes': element.get('attributes', {}),
'source': 'xml'
}
ui_elements.append(ui_element)
return ui_elements
def _extract_svg_ui_elements(self):
"""从SVG数据中提取UI元素"""
ui_elements = []
for element in self.svg_elements:
if element['ui_type'] != 'unknown':
ui_element = {
'type': element['ui_type'],
'text': element.get('text', ''),
'geometry': element.get('geometry', {}),
'styles': element.get('styles', {}),
'source': 'svg'
}
ui_elements.append(ui_element)
return ui_elements
def _is_ui_component(self, element):
"""判断XML元素是否为UI组件"""
tag = element.get('tag', '')
attrs = element.get('attributes', {})
# 排除纯容器元素
container_tags = ['LinearLayout', 'RelativeLayout', 'FrameLayout', 'ConstraintLayout']
if any(container in tag for container in container_tags):
return False
# 包含文本或可点击的元素
if element.get('text') or attrs.get('clickable') == 'true':
return True
# 特定的UI组件
ui_tags = ['Button', 'TextView', 'ImageView', 'EditText', 'CheckBox', 'RadioButton']
return any(ui_tag in tag for ui_tag in ui_tags)
def _classify_xml_element(self, element):
"""分类XML元素"""
tag = element.get('tag', '')
attrs = element.get('attributes', {})
if 'Button' in tag:
return 'button'
elif 'TextView' in tag or element.get('text'):
return 'text'
elif 'ImageView' in tag:
return 'image'
elif 'EditText' in tag:
return 'input'
elif attrs.get('clickable') == 'true':
return 'button'
else:
return 'container'
def _match_ui_elements(self, xml_elements, svg_elements, result):
"""匹配XML和SVG中的UI元素"""
matched_xml = set()
matched_svg = set()
for i, xml_elem in enumerate(xml_elements):
best_match = None
best_score = 0
for j, svg_elem in enumerate(svg_elements):
if j in matched_svg:
continue
score = self._calculate_element_similarity(xml_elem, svg_elem)
if score > best_score and score > 0.5: # 相似度阈值
best_score = score
best_match = j
if best_match is not None:
matched_xml.add(i)
matched_svg.add(best_match)
match_info = {
'xml_element': xml_elements[i],
'svg_element': svg_elements[best_match],
'similarity': best_score,
'position_diff': self._calculate_position_difference(
xml_elements[i], svg_elements[best_match]
),
'size_diff': self._calculate_size_difference(
xml_elements[i], svg_elements[best_match]
)
}
result['matched_elements'].append(match_info)
# 记录未匹配的元素
result['unmatched_xml'] = [xml_elements[i] for i in range(len(xml_elements)) if i not in matched_xml]
result['unmatched_svg'] = [svg_elements[j] for j in range(len(svg_elements)) if j not in matched_svg]
def _calculate_element_similarity(self, xml_elem, svg_elem):
"""计算两个元素的相似度"""
score = 0
# 类型匹配
if xml_elem['type'] == svg_elem['type']:
score += 0.4
# 文本匹配
xml_text = xml_elem.get('text', '').strip().lower()
svg_text = svg_elem.get('text', '').strip().lower()
if xml_text and svg_text:
if xml_text == svg_text:
score += 0.4
elif xml_text in svg_text or svg_text in xml_text:
score += 0.2
elif not xml_text and not svg_text:
score += 0.2
# 位置相似度(相对位置)
position_score = self._calculate_relative_position_similarity(xml_elem, svg_elem)
score += position_score * 0.2
return min(score, 1.0)
def _calculate_relative_position_similarity(self, xml_elem, svg_elem):
"""计算相对位置相似度"""
# 这里简化处理,实际应该考虑屏幕尺寸的缩放
return 0.5 # 暂时返回中等相似度
def _calculate_position_difference(self, xml_elem, svg_elem):
"""计算位置差异"""
xml_bounds = xml_elem.get('bounds', {})
svg_geometry = svg_elem.get('geometry', {})
if not xml_bounds or not svg_geometry:
return {'x': 0, 'y': 0}
return {
'x': abs(xml_bounds.get('center_x', 0) - svg_geometry.get('center_x', 0)),
'y': abs(xml_bounds.get('center_y', 0) - svg_geometry.get('center_y', 0))
}
def _calculate_size_difference(self, xml_elem, svg_elem):
"""计算尺寸差异"""
xml_bounds = xml_elem.get('bounds', {})
svg_geometry = svg_elem.get('geometry', {})
if not xml_bounds or not svg_geometry:
return {'width': 0, 'height': 0}
return {
'width': abs(xml_bounds.get('width', 0) - svg_geometry.get('width', 0)),
'height': abs(xml_bounds.get('height', 0) - svg_geometry.get('height', 0))
}
def _calculate_layout_similarity(self, result):
"""计算整体布局相似度"""
total_elements = max(result['xml_elements_count'], result['svg_elements_count'])
if total_elements == 0:
result['layout_similarity'] = 0.0
return
matched_count = len(result['matched_elements'])
base_similarity = matched_count / total_elements
# 考虑匹配质量
if result['matched_elements']:
avg_match_quality = sum(match['similarity'] for match in result['matched_elements']) / len(result['matched_elements'])
result['layout_similarity'] = base_similarity * avg_match_quality
else:
result['layout_similarity'] = 0.0
def get_layout_comparison_summary(self):
"""获取布局比对摘要"""
if not self.layout_comparison_result:
return "未进行布局比对"
result = self.layout_comparison_result
summary = f"""
📊 布局比对摘要:
• XML元素数量: {result['xml_elements_count']}
• SVG元素数量: {result['svg_elements_count']}
• 匹配元素数量: {len(result['matched_elements'])}
• 布局相似度: {result['layout_similarity']:.1%}
• 未匹配XML元素: {len(result['unmatched_xml'])}
• 未匹配SVG元素: {len(result['unmatched_svg'])}
"""
if result['layout_similarity'] >= 0.8:
summary += "\n✅ 布局高度一致"
elif result['layout_similarity'] >= 0.6:
summary += "\n⚠️ 布局基本一致,有少量差异"
else:
summary += "\n❌ 布局差异较大,建议检查"
return summary