79 lines
2.5 KiB
Python
79 lines
2.5 KiB
Python
"""
|
|
Level 3 title recommendation algorithm - only count the number of most frequently used # numbers
|
|
"""
|
|
|
|
from collections import Counter
|
|
from typing import Dict, Any, List
|
|
import re
|
|
|
|
def get_third_level_hash_counts_simple(content: str) -> List[int]:
|
|
hash_counts = []
|
|
in_code_block = False
|
|
|
|
for line in content.split('\n'):
|
|
line = line.strip()
|
|
|
|
if not line:
|
|
continue
|
|
|
|
# Processing code blocks
|
|
if line.startswith('```'):
|
|
in_code_block = not in_code_block
|
|
continue
|
|
|
|
if in_code_block:
|
|
continue
|
|
|
|
# Match the title line: #+ space Content
|
|
match = re.match(r'^(#{1,6})\s+(.+)$', line)
|
|
if match:
|
|
hash_count = len(match.group(1))
|
|
title_text = match.group(2).strip()
|
|
|
|
# Check if it is a third-level heading - supports two formats:
|
|
# 1. Traditional numeric format: "1.2.3", "1 . 2 . 3", "1. 2. 3", etc.
|
|
# 2. Letter+number format: "A.1.2.3" (treat A.x.x.x as a third-level heading)
|
|
|
|
is_third_level = False
|
|
|
|
# Traditional numeric third-level format: x.x.x
|
|
if re.match(r'^\d+\s*\.\s*\d+\s*\.\s*\d+(?:\s|$|[^\d\.])', title_text):
|
|
is_third_level = True
|
|
|
|
# Letter+number third-level format: A.x.x.x (treat as third-level heading)
|
|
elif re.match(r'^[A-Z]\.\d+\.\d+\.\d+(?:\s|$|[^\d\.])', title_text):
|
|
is_third_level = True
|
|
|
|
if is_third_level:
|
|
hash_counts.append(hash_count)
|
|
|
|
return hash_counts
|
|
|
|
def get_recommended_hash_count_simple(content: str) -> Dict[str, Any]:
|
|
hash_counts = get_third_level_hash_counts_simple(content)
|
|
|
|
if not hash_counts:
|
|
return {
|
|
'recommendation': 5, # Default value
|
|
'reason': 'No third-level headings detected, using default value',
|
|
'statistics': {},
|
|
'total_count': 0
|
|
}
|
|
|
|
# Count the frequency of various # usage
|
|
usage_stats = Counter(hash_counts)
|
|
|
|
# Select the most frequently used # count
|
|
most_common = usage_stats.most_common(1)[0]
|
|
recommended_hash_count = most_common[0]
|
|
frequency = most_common[1]
|
|
total_count = len(hash_counts)
|
|
percentage = frequency / total_count * 100
|
|
|
|
return {
|
|
'recommendation': recommended_hash_count,
|
|
'reason': f'Most frequently used: {frequency}/{total_count} times ({percentage:.1f}%)',
|
|
'statistics': dict(usage_stats),
|
|
'total_count': total_count
|
|
}
|