褚宏光 bf6baa5483 Add scoring module and enhance HTML viewer with standardization
- Add scripts/scoring/ module with normalizer, sensitivity analysis, and config
- Enhance stock_viewer.html with standardized scoring display
- Add integration tests and normalization verification scripts
- Add documentation for standardization implementation and usage guides
- Add data distribution analysis reports for strength scoring dimensions
- Update discussion documents with algorithm optimization plans
2026-01-30 18:43:37 +08:00

471 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
强度分配置管理模块
提供可配置的权重、阈值和预设模式,支持:
1. 等权模式(默认)
2. 激进模式(重视突破和成交量)
3. 保守模式(重视形态质量)
4. 放量模式(重视成交量确认)
"""
from dataclasses import dataclass, field
from typing import Literal, Optional
import pandas as pd
@dataclass
class StrengthConfig:
"""
强度分配置类
属性:
w_price: 突破幅度分权重
w_convergence: 收敛度分权重
w_volume: 成交量分权重
w_geometry: 形态规则度权重
w_activity: 价格活跃度权重
w_tilt: 倾斜度分权重
threshold_price: 突破幅度分阈值(标准化后)
threshold_convergence: 收敛度分阈值
threshold_volume: 成交量分阈值
threshold_geometry: 形态规则度阈值
threshold_activity: 价格活跃度阈值
direction: 突破方向 ('up', 'down', 'both')
filter_mode: 筛选模式 ('and', 'or')
"""
# 权重配置(默认等权)
w_price: float = 1/6
w_convergence: float = 1/6
w_volume: float = 1/6
w_geometry: float = 1/6
w_activity: float = 1/6
w_tilt: float = 1/6
# 阈值配置(标准化后的值,范围[0, 1]
threshold_price: float = 0.60 # 突破幅度阈值
threshold_convergence: float = 0.50 # 收敛度阈值(中性)
threshold_volume: float = 0.50 # 成交量阈值(中性=不筛选)
threshold_geometry: float = 0.50 # 形态规则度阈值(中性)
threshold_activity: float = 0.30 # 价格活跃度阈值
# 方向与模式
direction: Literal['up', 'down', 'both'] = 'up'
filter_mode: Literal['and', 'or'] = 'and'
# 配置名称(用于显示)
name: str = "自定义配置"
def validate(self) -> bool:
"""验证配置的有效性"""
# 检查权重和是否为1
total_weight = (self.w_price + self.w_convergence + self.w_volume +
self.w_geometry + self.w_activity + self.w_tilt)
if abs(total_weight - 1.0) > 0.001:
raise ValueError(f"权重和必须为1.0,当前为{total_weight:.6f}")
# 检查权重范围
for name, weight in [
('price', self.w_price), ('convergence', self.w_convergence),
('volume', self.w_volume), ('geometry', self.w_geometry),
('activity', self.w_activity), ('tilt', self.w_tilt)
]:
if not 0 <= weight <= 1:
raise ValueError(f"{name}权重{weight}超出[0, 1]范围")
# 检查阈值范围
for name, threshold in [
('price', self.threshold_price), ('convergence', self.threshold_convergence),
('volume', self.threshold_volume), ('geometry', self.threshold_geometry),
('activity', self.threshold_activity)
]:
if not 0 <= threshold <= 1:
raise ValueError(f"{name}阈值{threshold}超出[0, 1]范围")
return True
def summary(self) -> str:
"""返回配置摘要"""
lines = [
f"配置名称: {self.name}",
f"\n权重分配:",
f" 突破幅度分: {self.w_price:.2%}",
f" 收敛度分: {self.w_convergence:.2%}",
f" 成交量分: {self.w_volume:.2%}",
f" 形态规则度: {self.w_geometry:.2%}",
f" 价格活跃度: {self.w_activity:.2%}",
f" 倾斜度分: {self.w_tilt:.2%}",
f"\n筛选阈值:",
f" 突破幅度分: ≥{self.threshold_price:.2f}",
f" 收敛度分: ≥{self.threshold_convergence:.2f}",
f" 成交量分: ≥{self.threshold_volume:.2f}",
f" 价格活跃度: ≥{self.threshold_activity:.2f}",
f"\n其他:",
f" 方向: {self.direction}",
f" 筛选模式: {self.filter_mode}",
]
return '\n'.join(lines)
# ============================================================================
# 预设配置
# ============================================================================
# 等权模式(默认)
CONFIG_EQUAL = StrengthConfig(
name="等权模式",
w_price=1/6,
w_convergence=1/6,
w_volume=1/6,
w_geometry=1/6,
w_activity=1/6,
w_tilt=1/6,
threshold_price=0.60,
threshold_convergence=0.50,
threshold_volume=0.50,
)
# 激进模式(重视突破和成交量,适合趋势行情)
CONFIG_AGGRESSIVE = StrengthConfig(
name="激进模式",
w_price=0.35, # 突破最重要
w_volume=0.25, # 成交量确认
w_convergence=0.15, # 收敛度
w_geometry=0.10, # 形态
w_activity=0.10, # 活跃度
w_tilt=0.05, # 倾斜度
threshold_price=0.55, # 较低阈值,捕获更多信号
threshold_volume=0.60, # 要求一定放量
direction='up',
)
# 保守模式(重视形态质量,适合震荡市)
CONFIG_CONSERVATIVE = StrengthConfig(
name="保守模式",
w_price=0.15, # 突破不是最重要
w_convergence=0.30, # 收敛度最重要
w_volume=0.10, # 成交量
w_geometry=0.15, # 形态质量
w_activity=0.25, # 价格活跃度重要
w_tilt=0.05, # 倾斜度
threshold_price=0.70, # 较高阈值,筛选强信号
threshold_convergence=0.65, # 要求高质量收敛
threshold_activity=0.50, # 要求活跃度正常
)
# 放量模式(重视成交量确认,捕获主力异动)
CONFIG_VOLUME_FOCUS = StrengthConfig(
name="放量模式",
w_price=0.25, # 突破
w_volume=0.35, # 成交量最重要
w_convergence=0.15, # 收敛度
w_geometry=0.10, # 形态
w_activity=0.10, # 活跃度
w_tilt=0.05, # 倾斜度
threshold_price=0.60, # 中等突破要求
threshold_volume=0.70, # 高放量要求
threshold_convergence=0.50,
)
# ============================================================================
# 单维度测试模式每个维度50%其余各10%
# ============================================================================
# 突破幅度主导
CONFIG_TEST_PRICE = StrengthConfig(
name="突破主导",
w_price=0.50, # 主导维度
w_convergence=0.10,
w_volume=0.10,
w_geometry=0.10,
w_activity=0.10,
w_tilt=0.10,
)
# 收敛度主导
CONFIG_TEST_CONVERGENCE = StrengthConfig(
name="收敛主导",
w_price=0.10,
w_convergence=0.50, # 主导维度
w_volume=0.10,
w_geometry=0.10,
w_activity=0.10,
w_tilt=0.10,
)
# 成交量主导
CONFIG_TEST_VOLUME = StrengthConfig(
name="成交量主导",
w_price=0.10,
w_convergence=0.10,
w_volume=0.50, # 主导维度
w_geometry=0.10,
w_activity=0.10,
w_tilt=0.10,
)
# 形态规则主导
CONFIG_TEST_GEOMETRY = StrengthConfig(
name="形态主导",
w_price=0.10,
w_convergence=0.10,
w_volume=0.10,
w_geometry=0.50, # 主导维度
w_activity=0.10,
w_tilt=0.10,
)
# 活跃度主导
CONFIG_TEST_ACTIVITY = StrengthConfig(
name="活跃主导",
w_price=0.10,
w_convergence=0.10,
w_volume=0.10,
w_geometry=0.10,
w_activity=0.50, # 主导维度
w_tilt=0.10,
)
# 倾斜度主导
CONFIG_TEST_TILT = StrengthConfig(
name="倾斜主导",
w_price=0.10,
w_convergence=0.10,
w_volume=0.10,
w_geometry=0.10,
w_activity=0.10,
w_tilt=0.50, # 主导维度
)
# ============================================================================
# 筛选和计算函数
# ============================================================================
def calculate_strength(
df_normalized: pd.DataFrame,
config: StrengthConfig
) -> pd.Series:
"""
根据配置计算综合强度分
Args:
df_normalized: 标准化后的DataFrame需包含*_norm字段
config: 配置对象
Returns:
综合强度分序列
"""
config.validate()
# 选择方向
if config.direction == 'up':
price_col = 'price_score_up_norm'
elif config.direction == 'down':
price_col = 'price_score_down_norm'
else: # 'both'
# 取向上和向下的最大值
price_col = None
price_scores = df_normalized[['price_score_up_norm', 'price_score_down_norm']].max(axis=1)
# 加权计算
if price_col:
strength = (
config.w_price * df_normalized[price_col] +
config.w_convergence * df_normalized['convergence_score_norm'] +
config.w_volume * df_normalized['volume_score_norm'] +
config.w_geometry * df_normalized['geometry_score_norm'] +
config.w_activity * df_normalized['activity_score_norm'] +
config.w_tilt * df_normalized['tilt_score_norm']
)
else:
strength = (
config.w_price * price_scores +
config.w_convergence * df_normalized['convergence_score_norm'] +
config.w_volume * df_normalized['volume_score_norm'] +
config.w_geometry * df_normalized['geometry_score_norm'] +
config.w_activity * df_normalized['activity_score_norm'] +
config.w_tilt * df_normalized['tilt_score_norm']
)
return strength
def filter_signals(
df_normalized: pd.DataFrame,
config: StrengthConfig,
return_strength: bool = False
) -> pd.DataFrame:
"""
根据配置筛选信号
Args:
df_normalized: 标准化后的DataFrame
config: 配置对象
return_strength: 是否在结果中添加强度分列
Returns:
筛选后的DataFrame
"""
config.validate()
# 构建筛选条件
conditions = []
# 1. 突破幅度条件
if config.direction in ['up', 'both']:
conditions.append(
df_normalized['price_score_up_norm'] >= config.threshold_price
)
if config.direction in ['down', 'both']:
conditions.append(
df_normalized['price_score_down_norm'] >= config.threshold_price
)
# 2. 收敛度条件
if config.threshold_convergence > 0:
conditions.append(
df_normalized['convergence_score_norm'] >= config.threshold_convergence
)
# 3. 成交量条件(只有阈值>0.5时才启用,否则是放松条件)
if config.threshold_volume > 0.5:
conditions.append(
df_normalized['volume_score_norm'] >= config.threshold_volume
)
# 4. 形态规则度条件
if config.threshold_geometry > 0:
conditions.append(
df_normalized['geometry_score_norm'] >= config.threshold_geometry
)
# 5. 价格活跃度条件
if config.threshold_activity > 0:
conditions.append(
df_normalized['activity_score_norm'] >= config.threshold_activity
)
# 组合条件
if len(conditions) == 0:
# 没有任何筛选条件,返回全部
result = df_normalized
elif config.filter_mode == 'and':
# AND: 所有条件都满足
final_condition = conditions[0]
for cond in conditions[1:]:
final_condition = final_condition & cond
result = df_normalized[final_condition]
else: # 'or'
# OR: 任一条件满足
final_condition = conditions[0]
for cond in conditions[1:]:
final_condition = final_condition | cond
result = df_normalized[final_condition]
# 添加强度分
if return_strength:
result = result.copy()
result['strength'] = calculate_strength(result, config)
result = result.sort_values('strength', ascending=False)
return result
def filter_top_n(
df_normalized: pd.DataFrame,
config: StrengthConfig,
n: int = 100
) -> pd.DataFrame:
"""
筛选强度分Top N的信号
Args:
df_normalized: 标准化后的DataFrame
config: 配置对象
n: 返回前N个信号
Returns:
Top N的DataFrame包含strength列
"""
# 计算强度分
df_with_strength = df_normalized.copy()
df_with_strength['strength'] = calculate_strength(df_normalized, config)
# 排序并取Top N
result = df_with_strength.nlargest(n, 'strength')
return result
# ============================================================================
# 使用示例
# ============================================================================
if __name__ == "__main__":
import sys
import os
# 添加路径
script_dir = os.path.dirname(__file__)
sys.path.insert(0, script_dir)
from normalizer import normalize_all
# 加载数据
data_path = os.path.join(
os.path.dirname(__file__),
"..", "..", "outputs", "converging_triangles", "all_results_normalized.csv"
)
if os.path.exists(data_path):
print("=" * 80)
print("强度分配置模块测试")
print("=" * 80)
df = pd.read_csv(data_path)
print(f"\n加载数据: {len(df)} 条记录")
# 测试各种配置
configs = [
CONFIG_EQUAL,
CONFIG_AGGRESSIVE,
CONFIG_CONSERVATIVE,
CONFIG_VOLUME_FOCUS,
]
print("\n" + "=" * 80)
print("各配置筛选结果对比")
print("=" * 80)
for config in configs:
filtered = filter_signals(df, config, return_strength=False)
print(f"\n{config.name}:")
print(f" 信号数量: {len(filtered)} ({len(filtered)/len(df)*100:.1f}%)")
print(f" 权重: P{config.w_price:.0%}/C{config.w_convergence:.0%}/V{config.w_volume:.0%}")
print(f" 阈值: price≥{config.threshold_price:.2f}, vol≥{config.threshold_volume:.2f}")
# 测试Top N
print("\n" + "=" * 80)
print("Top 10 信号(等权模式)")
print("=" * 80)
top10 = filter_top_n(df, CONFIG_EQUAL, n=10)
print("\nstock_code | date | strength | price_up | convergence | volume")
print("-" * 80)
for _, row in top10.iterrows():
print(f"{row['stock_code']:10s} | {int(row['date'])} | "
f"{row['strength']:.4f} | "
f"{row['price_score_up_norm']:.4f} | "
f"{row['convergence_score_norm']:.4f} | "
f"{row['volume_score_norm']:.4f}")
print("\n测试通过!")
else:
print(f"数据文件不存在: {data_path}")
print("请先运行 verify_normalization.py 生成标准化数据")