technical-patterns-lab/scripts/example_scoring_usage.py
褚宏光 bf6baa5483 Add scoring module and enhance HTML viewer with standardization
- Add scripts/scoring/ module with normalizer, sensitivity analysis, and config
- Enhance stock_viewer.html with standardized scoring display
- Add integration tests and normalization verification scripts
- Add documentation for standardization implementation and usage guides
- Add data distribution analysis reports for strength scoring dimensions
- Update discussion documents with algorithm optimization plans
2026-01-30 18:43:37 +08:00

205 lines
6.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
强度分标准化系统使用示例
展示如何使用 scoring 模块进行标准化、筛选和分析。
"""
import pandas as pd
from pathlib import Path
import sys
# 添加路径
sys.path.insert(0, str(Path(__file__).parent / 'scoring'))
from scoring import (
normalize_all,
CONFIG_EQUAL, CONFIG_AGGRESSIVE, CONFIG_CONSERVATIVE, CONFIG_VOLUME_FOCUS,
filter_signals, calculate_strength, filter_top_n
)
def example_1_basic_normalization():
"""示例1基础标准化"""
print("=" * 80)
print("示例1基础标准化")
print("=" * 80)
# 加载原始数据
data_path = Path(__file__).parent.parent / 'outputs' / 'converging_triangles' / 'all_results.csv'
df = pd.read_csv(data_path)
df = df[df['is_valid'] == True]
print(f"\n原始数据: {len(df)} 条记录")
print(f"原始字段: {df.columns.tolist()[:10]}...")
# 标准化
df_norm = normalize_all(df)
print(f"\n标准化后新增字段:")
new_cols = df_norm.columns.difference(df.columns).tolist()
for col in new_cols:
print(f" - {col}")
# 对比统计
print(f"\n标准化效果对比:")
print(f"{'维度':<20s} | {'原始中位数':>10s} | {'标准化中位数':>12s}")
print("-" * 50)
for col in ['price_score_up', 'convergence_score', 'volume_score']:
before = df[col].median()
after = df_norm[f'{col}_norm'].median()
print(f"{col:<20s} | {before:>10.4f} | {after:>12.4f}")
def example_2_preset_configs():
"""示例2使用预设配置筛选信号"""
print("\n" + "=" * 80)
print("示例2使用预设配置筛选信号")
print("=" * 80)
# 加载标准化数据
data_path = Path(__file__).parent.parent / 'outputs' / 'converging_triangles' / 'all_results_normalized.csv'
df = pd.read_csv(data_path)
# 测试各种配置
configs = [
CONFIG_EQUAL,
CONFIG_AGGRESSIVE,
CONFIG_CONSERVATIVE,
CONFIG_VOLUME_FOCUS,
]
print(f"\n总样本数: {len(df)}")
print("\n配置名称 | 信号数 | 占比 | 主要特点")
print("-" * 80)
for config in configs:
filtered = filter_signals(df, config)
pct = len(filtered) / len(df) * 100
# 获取最高权重的维度
weights = [
('突破', config.w_price),
('收敛', config.w_convergence),
('成交量', config.w_volume),
]
weights.sort(key=lambda x: x[1], reverse=True)
top_weights = ', '.join([f"{k}{v:.0%}" for k, v in weights[:2]])
print(f"{config.name:<20s} | {len(filtered):>6d} | {pct:>4.1f}% | {top_weights}")
def example_3_custom_config():
"""示例3自定义配置"""
print("\n" + "=" * 80)
print("示例3自定义配置")
print("=" * 80)
from scoring.config import StrengthConfig
# 创建自定义配置
my_config = StrengthConfig(
name="我的配置",
w_price=0.40, # 重视突破40%
w_volume=0.30, # 重视放量30%
w_convergence=0.15,
w_geometry=0.05,
w_activity=0.05,
w_tilt=0.05,
threshold_price=0.65, # 中等突破阈值
threshold_volume=0.70, # 高放量要求
direction='up',
)
# 打印配置摘要
print("\n" + my_config.summary())
# 加载数据并筛选
data_path = Path(__file__).parent.parent / 'outputs' / 'converging_triangles' / 'all_results_normalized.csv'
df = pd.read_csv(data_path)
filtered = filter_signals(df, my_config, return_strength=True)
print(f"\n筛选结果: {len(filtered)} 个信号 ({len(filtered)/len(df)*100:.1f}%)")
# 显示Top 5
print("\nTop 5 信号:")
print("股票代码 | 日期 | 强度 | 突破 | 成交量")
print("-" * 60)
for _, row in filtered.head(5).iterrows():
print(f"{row['stock_code']:10s} | {int(row['date'])} | {row['strength']:.4f} | "
f"{row['price_score_up_norm']:.4f} | {row['volume_score_norm']:.4f}")
def example_4_top_n_signals():
"""示例4获取Top N信号"""
print("\n" + "=" * 80)
print("示例4获取Top N信号")
print("=" * 80)
data_path = Path(__file__).parent.parent / 'outputs' / 'converging_triangles' / 'all_results_normalized.csv'
df = pd.read_csv(data_path)
# 获取等权配置下的Top 20信号
top20 = filter_top_n(df, CONFIG_EQUAL, n=20)
print(f"\n等权模式 - Top 20 信号:")
print("\n排名 | 股票代码 | 日期 | 强度 | 突破 | 收敛 | 放量")
print("-" * 80)
for idx, (_, row) in enumerate(top20.iterrows(), 1):
print(f"{idx:>4d} | {row['stock_code']:10s} | {int(row['date'])} | "
f"{row['strength']:.4f} | {row['price_score_up_norm']:.4f} | "
f"{row['convergence_score_norm']:.4f} | {row['volume_score_norm']:.4f}")
def example_5_compare_configs():
"""示例5对比不同配置的结果"""
print("\n" + "=" * 80)
print("示例5对比不同配置的Top信号")
print("=" * 80)
data_path = Path(__file__).parent.parent / 'outputs' / 'converging_triangles' / 'all_results_normalized.csv'
df = pd.read_csv(data_path)
configs = [
CONFIG_EQUAL,
CONFIG_AGGRESSIVE,
CONFIG_CONSERVATIVE,
]
for config in configs:
print(f"\n{config.name} - Top 3:")
print("-" * 60)
top3 = filter_top_n(df, config, n=3)
for idx, (_, row) in enumerate(top3.iterrows(), 1):
print(f" {idx}. {row['stock_code']} ({int(row['date'])}) - 强度: {row['strength']:.4f}")
def main():
"""运行所有示例"""
try:
example_1_basic_normalization()
example_2_preset_configs()
example_3_custom_config()
example_4_top_n_signals()
example_5_compare_configs()
print("\n" + "=" * 80)
print("所有示例运行完成!")
print("=" * 80)
print("\n更多功能:")
print(" 1. 查看敏感性分析: python scripts/scoring/sensitivity.py")
print(" 2. 完整报告: outputs/converging_triangles/sensitivity_analysis_report.md")
print(" 3. 对比图表: outputs/converging_triangles/normalization_comparison.png")
except Exception as e:
print(f"\n错误: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()