Use Chinese for analyst reports

Request: - Make analyst reports Chinese by default and record the rule in the analyst skill. Changes: - Add a Simplified Chinese default-language rule to the analyst skill. - Update the single-IPO report generator to emit Chinese Markdown sections, labels, actions, risks, triggers, and exit plans. - Preserve ticker symbols, stage codes, rule ids, score buckets, and source paths as machine-readable identifiers. - Regenerate the 06106 T0 report in Chinese. - Document the Chinese report default in README and the rule change log. Verification: - Ran py_compile for scripts/generate_ipo_report.py. - Generated a 06106 dry-run report and checked Chinese section headings. - Regenerated reports/2026-06-15_06106_T0_prospectus_analysis.md. - Ran git diff --check. Next useful context: - Future analyst prediction and review reports should be written in Simplified Chinese unless the user explicitly requests another language.
2026-06-15 14:37:46 +00:00
parent 07d7a0064a
commit 907e30d9da
5 changed files with 214 additions and 188 deletions
@@ -104,49 +104,49 @@ def as_bool(value: Any) -> bool:

 def fmt_value(value: Any) -> str:
    if value in {None, ""}:
-        return "n/a"
+        return "未记录"
    return str(value)


 def fmt_num(value: float | None, suffix: str = "", decimals: int = 1) -> str:
    if value is None:
-        return "n/a"
+        return "未记录"
    return f"{value:,.{decimals}f}{suffix}"


 def fmt_pct_rate(value: float | None) -> str:
    if value is None:
-        return "n/a"
+        return "未记录"
    return f"{value * 100:.1f}%"


 def fmt_pct_points(value: float | None) -> str:
    if value is None:
-        return "n/a"
+        return "未记录"
    return f"{value:.1f}%"


 def fmt_money_m(value: float | None) -> str:
    if value is None:
-        return "n/a"
+        return "未记录"
    return f"HK${value:,.1f}m"


 def fmt_hkd(value: float | None) -> str:
    if value is None:
-        return "n/a"
+        return "未记录"
    return f"HK${value:,.2f}"


 def fmt_times(value: float | None) -> str:
    if value is None:
-        return "n/a"
+        return "未记录"
    return f"{value:,.2f}x"


 def fmt_int(value: int | None) -> str:
    if value is None:
-        return "n/a"
+        return "未记录"
    return f"{value:,}"


@@ -214,37 +214,37 @@ def t0_decision_band(score: int) -> str:

 def action_for_decision(decision: str) -> str:
    actions = {
-        "weak_or_avoid": "Avoid at T0 unless later T1 demand changes the setup.",
-        "neutral": "Wait for T1 allotment demand before subscribing.",
-        "positive_watch": "Watch positively, but wait for T1 confirmation before sizing for a T2/D1 exit.",
-        "strong_watch": "Strong watch at T0, still pending T1 demand confirmation for a T2/D1 exit.",
-        "avoid": "Avoid subscription.",
-        "avoid_or_wait": "Avoid or wait; do not size without a stronger catalyst.",
-        "watch_or_small": "Small subscription only if execution constraints support a T2/D1 exit.",
-        "selective_subscribe": "Selective subscription with disciplined T2/D1 sell sizing.",
-        "high_conviction_subscribe": "Subscribe, subject to allocation, liquidity, and T2/D1 sell discipline.",
+        "weak_or_avoid": "T0 阶段回避，除非后续 T1 认购热度明显改变格局。",
+        "neutral": "暂等 T1 分配结果，不在 T0 阶段主动下重注。",
+        "positive_watch": "正面观察，但需要等 T1 确认后再决定 T2/D1 退出仓位。",
+        "strong_watch": "T0 强关注，仍需等待 T1 认购热度确认后执行 T2/D1 退出纪律。",
+        "avoid": "回避申购。",
+        "avoid_or_wait": "回避或等待；没有更强催化前不放大仓位。",
+        "watch_or_small": "仅在执行条件支持 T2/D1 退出时小额参与。",
+        "selective_subscribe": "选择性申购，并严格按 T2/D1 卖出纪律控制仓位。",
+        "high_conviction_subscribe": "积极申购，但仍受分配、流动性和 T2/D1 卖出纪律约束。",
    }
    return actions[decision]


 def component_label(name: str) -> str:
    labels = {
-        "offer_size": "Offer size",
-        "public_pct": "Initial public offer percentage",
-        "min_subscription": "Minimum subscription",
-        "offer_price": "Offer price",
-        "over_allotment": "Over-allotment option",
-        "public_os": "Public oversubscription",
-        "international_os": "International oversubscription",
-        "valid_applications": "Valid applications",
-        "success_rate": "Application success rate",
-        "hk_reallocation": "HK public offer reallocation",
+        "offer_size": "发行规模",
+        "public_pct": "初始公开发售比例",
+        "min_subscription": "最低认购金额",
+        "offer_price": "发行价",
+        "over_allotment": "超额配股权",
+        "public_os": "公开认购倍数",
+        "international_os": "国际配售认购倍数",
+        "valid_applications": "有效申请数",
+        "success_rate": "申请成功率",
+        "hk_reallocation": "香港公开发售回拨",
    }
    return labels.get(name, name.replace("_", " ").title())


 def components_table(components: list[ScoreComponent]) -> str:
-    lines = ["| Component | Points | Reason |", "| --- | ---: | --- |"]
+    lines = ["| 评分项 | 分数 | 原因代码 |", "| --- | ---: | --- |"]
    for component in components:
        lines.append(f"| {component_label(component.name)} | {component.points} | `{component.reason}` |")
    return "\n".join(lines)
@@ -252,36 +252,36 @@ def components_table(components: list[ScoreComponent]) -> str:

 def facts_table(record: dict[str, str], stage: str) -> str:
    rows = [
-        ("Board", fmt_value(record["board"])),
-        ("Status", fmt_value(record["status"])),
-        ("Listing date", fmt_value(record["listing_date"])),
-        ("Application period", f"{fmt_value(record['application_start_date'])} to {fmt_value(record['application_end_date'])}"),
-        ("Allotment result date", fmt_value(record["allotment_results_expected_date"])),
-        ("Listing method", fmt_value(record["listing_method"])),
-        ("Industry", fmt_value(record["industry_label"])),
-        ("Sponsors", fmt_value(record["sponsors"])),
-        ("Offer price", fmt_hkd(as_float(record["offer_price_hkd"]))),
-        ("Offer size", fmt_money_m(as_float(record["offer_size_hkd_m"]))),
-        ("Market cap", fmt_money_m(as_float(record["market_cap_hkd_m"]))),
-        ("Board lot", fmt_int(as_int(record["board_lot"]))),
-        ("Minimum subscription", fmt_hkd(as_float(record["min_subscription_amount_hkd"]))),
-        ("Initial public offer percentage", fmt_pct_points(as_float(record["public_offer_pct_initial"]) * 100 if record["public_offer_pct_initial"] else None)),
-        ("Over-allotment shares", fmt_int(as_int(record["over_allotment_offer_shares"]))),
+        ("板块", fmt_value(record["board"])),
+        ("状态", fmt_value(record["status"])),
+        ("上市日期", fmt_value(record["listing_date"])),
+        ("申购期", f"{fmt_value(record['application_start_date'])} 至 {fmt_value(record['application_end_date'])}"),
+        ("分配结果日期", fmt_value(record["allotment_results_expected_date"])),
+        ("上市方式", fmt_value(record["listing_method"])),
+        ("行业", fmt_value(record["industry_label"])),
+        ("保荐人", fmt_value(record["sponsors"])),
+        ("发行价", fmt_hkd(as_float(record["offer_price_hkd"]))),
+        ("发行规模", fmt_money_m(as_float(record["offer_size_hkd_m"]))),
+        ("市值", fmt_money_m(as_float(record["market_cap_hkd_m"]))),
+        ("每手股数", fmt_int(as_int(record["board_lot"]))),
+        ("最低认购金额", fmt_hkd(as_float(record["min_subscription_amount_hkd"]))),
+        ("初始公开发售比例", fmt_pct_points(as_float(record["public_offer_pct_initial"]) * 100 if record["public_offer_pct_initial"] else None)),
+        ("超额配股权股数", fmt_int(as_int(record["over_allotment_offer_shares"]))),
    ]
    if stage == T1_STAGE:
        rows.extend(
            [
-                ("Public oversubscription", fmt_times(as_float(record["public_oversubscription_times"]))),
-                ("International oversubscription", fmt_times(as_float(record["international_oversubscription_times"]))),
-                ("Valid applications", fmt_int(as_int(record["valid_applications"]))),
-                ("Successful applications", fmt_int(as_int(record["successful_applications"]))),
-                ("Application success rate", fmt_pct_points(as_float(record["application_success_rate"]) * 100 if record["application_success_rate"] else None)),
-                ("International placees", fmt_int(as_int(record["international_placees"]))),
-                ("HK offer reallocation multiple", fmt_times(as_float(record["hk_offer_reallocation_multiple"]))),
+                ("公开认购倍数", fmt_times(as_float(record["public_oversubscription_times"]))),
+                ("国际配售认购倍数", fmt_times(as_float(record["international_oversubscription_times"]))),
+                ("有效申请数", fmt_int(as_int(record["valid_applications"]))),
+                ("成功申请数", fmt_int(as_int(record["successful_applications"]))),
+                ("申请成功率", fmt_pct_points(as_float(record["application_success_rate"]) * 100 if record["application_success_rate"] else None)),
+                ("国际配售承配人数", fmt_int(as_int(record["international_placees"]))),
+                ("香港公开发售回拨倍数", fmt_times(as_float(record["hk_offer_reallocation_multiple"]))),
            ]
        )

-    lines = ["| Field | Value |", "| --- | --- |"]
+    lines = ["| 字段 | 数值 |", "| --- | --- |"]
    for label, value in rows:
        lines.append(f"| {label} | {value} |")
    return "\n".join(lines)
@@ -292,36 +292,36 @@ def stage_calendar_table(record: dict[str, str]) -> str:
    application_end = fmt_value(record["application_end_date"])
    allotment_date = fmt_value(record["allotment_results_expected_date"])
    listing_date = fmt_value(record["listing_date"])
-    if allotment_date != "n/a":
-        t2_date = f"{allotment_date} after allotment results"
-    elif listing_date != "n/a":
-        t2_date = "trading day before D1; exact date not archived"
+    if allotment_date != "未记录":
+        t2_date = f"{allotment_date} 分配结果公布后"
+    elif listing_date != "未记录":
+        t2_date = "D1 前一个交易日；精确日期未归档"
    else:
-        t2_date = "n/a"
+        t2_date = "未记录"

    rows = [
        (
            "T0_prospectus",
-            f"{application_start} to {application_end}",
-            "Subscription window; use prospectus and offer terms only.",
+            f"{application_start} 至 {application_end}",
+            "申购前/申购中阶段；只使用招股书和发行条款。",
        ),
        (
            "T1_allotment",
            allotment_date,
-            "Allotment results day; use public demand, placing demand, and allocation facts.",
+            "分配结果日；使用公开认购热度、国际配售热度和分配事实。",
        ),
        (
            "T2_grey_market",
            t2_date,
-            "Pre-listing grey-market sell window if a reliable executable source exists.",
+            "上市前暗盘窗口；只有存在可靠且可执行的数据源时才作为卖出依据。",
        ),
        (
            "D1",
            listing_date,
-            "First official trading day; default sell window when T2 data is unavailable or unreliable.",
+            "正式上市首日；T2 数据不可用或不可靠时的默认卖出窗口。",
        ),
    ]
-    lines = ["| Stage | Concrete Date For This IPO | Meaning |", "| --- | --- | --- |"]
+    lines = ["| 阶段 | 本 IPO 对应日期 | 含义 |", "| --- | --- | --- |"]
    for stage, date_text, meaning in rows:
        lines.append(f"| `{stage}` | {date_text} | {meaning} |")
    return "\n".join(lines)
@@ -340,29 +340,29 @@ def reason_lines(components: list[ScoreComponent], positive: bool) -> list[str]:
    filtered = [component for component in components if (component.points > 0 if positive else component.points < 0)]
    filtered.sort(key=lambda component: component.points, reverse=positive)
    if not filtered:
-        return ["- No material positive scoring component." if positive else "- No material negative scoring component."]
-    return [f"- {component_label(component.name)}: {component.points:+d} (`{component.reason}`)." for component in filtered[:5]]
+        return ["- 没有明显正向评分项。" if positive else "- 没有明显负向评分项。"]
+    return [f"- {component_label(component.name)}：{component.points:+d} (`{component.reason}`)。" for component in filtered[:5]]


 def missing_field_lines(record: dict[str, str], stage: str) -> list[str]:
    required = [
-        ("industry_label", "industry label"),
-        ("market_cap_hkd_m", "market cap"),
-        ("min_subscription_amount_hkd", "minimum subscription"),
+        ("industry_label", "行业"),
+        ("market_cap_hkd_m", "市值"),
+        ("min_subscription_amount_hkd", "最低认购金额"),
    ]
    if stage == T1_STAGE:
        required.extend(
            [
-                ("public_oversubscription_times", "public oversubscription"),
-                ("international_oversubscription_times", "international oversubscription"),
-                ("valid_applications", "valid applications"),
-                ("successful_applications", "successful applications"),
+                ("public_oversubscription_times", "公开认购倍数"),
+                ("international_oversubscription_times", "国际配售认购倍数"),
+                ("valid_applications", "有效申请数"),
+                ("successful_applications", "成功申请数"),
            ]
        )
    missing = [label for key, label in required if not record.get(key)]
    if not missing:
-        return ["- No required report field is blank for this stage."]
-    return [f"- Missing or blank: {', '.join(missing)}."]
+        return ["- 本阶段必需字段没有明显空缺。"]
+    return [f"- 缺失或空白字段：{', '.join(missing)}。"]


 def build_report(record: dict[str, str], rows: list[dict[str, str]], stage: str, as_of: str) -> str:
@@ -376,85 +376,82 @@ def build_report(record: dict[str, str], rows: list[dict[str, str]], stage: str,
        decision = t0_decision_band(score)
        components = parse_components(record["t0_score_breakdown"])
        metric = bucket_metric(rows, "t0_score_bucket", bucket, require_t1=False)
-        score_label = "T0 score"
    else:
        score = as_int(record["total_score"]) or 0
        bucket = record["total_score_bucket"]
        decision = record["decision_band"]
        components = parse_components(record["t0_score_breakdown"]) + parse_components(record["t1_score_breakdown"])
        metric = bucket_metric(rows, "total_score_bucket", bucket, require_t1=True)
-        score_label = "Total score"

    paths = source_paths(record, stage)
-    source_lines = [f"- `{path}`" for path in paths] or ["- No source path recorded for this stage."]
+    source_lines = [f"- `{path}`" for path in paths] or ["- 本阶段没有记录来源路径。"]

    lines = [
-        f"# {ticker} IPO Analyst Report",
+        f"# {ticker} IPO 分析报告",
        "",
-        "## Summary",
+        "## 摘要",
        "",
-        f"- Ticker: `{ticker}`",
-        f"- Company: {fmt_value(record['company_name_en'])}",
-        f"- Stage: `{stage}`",
-        f"- Report as of: `{as_of}`",
-        f"- Model dataset as of: `{dataset_as_of}`",
-        f"- Rule version: `{model_version}`",
-        f"- Rule path: `{MODEL_RULE_PATH.as_posix()}`",
-        "- Strategy horizon: short IPO subscription trade; intended exit is T2 grey market if reliable, otherwise D1.",
-        f"- Decision: `{decision}`",
-        f"- PM action: {action_for_decision(decision)}",
-        f"- {score_label}: `{score}`",
-        f"- Score bucket: `{bucket}`",
-        f"- Calibrated D1 positive probability: {fmt_pct_rate(metric.d1_positive_rate)} from {metric.sample_size} historical D1 labels",
+        f"- 股票代码：`{ticker}`",
+        f"- 公司：{fmt_value(record['company_name_en'])}",
+        f"- 分析阶段：`{stage}`",
+        f"- 报告生成时间：`{as_of}`",
+        f"- 模型数据时间：`{dataset_as_of}`",
+        f"- 规则版本：`{model_version}`",
+        f"- 规则路径：`{MODEL_RULE_PATH.as_posix()}`",
+        "- 策略周期：短线 IPO 申购交易；优先在可靠 T2 暗盘卖出，否则默认 D1 卖出。",
+        f"- 结论代码：`{decision}`",
+        f"- 执行动作：{action_for_decision(decision)}",
+        f"- {'T0 分数' if stage == T0_STAGE else '总分'}：`{score}`",
+        f"- 分数分桶：`{bucket}`",
+        f"- 历史校准 D1 正收益概率：{fmt_pct_rate(metric.d1_positive_rate)}，样本数 {metric.sample_size}",
        "",
-        "## Stage Calendar",
+        "## 阶段日期表",
        "",
        stage_calendar_table(record),
        "",
-        "## Facts",
+        "## 基础事实",
        "",
        facts_table(record, stage),
        "",
-        "## Short-Exit Model Inference",
+        "## 短线退出模型推断",
        "",
-        f"- D1 positive probability: {fmt_pct_rate(metric.d1_positive_rate)}",
-        f"- D1 >= 10% probability: {fmt_pct_rate(metric.d1_strong_rate)}",
-        f"- Historical average D1 return for bucket: {fmt_num(metric.average_d1_return_pct, '%')}",
-        f"- Historical median D1 return for bucket: {fmt_num(metric.median_d1_return_pct, '%')}",
-        "- T2 sell return is not modeled until an approved grey-market data source exists.",
-        "- D5/D20/D60 outcomes are review labels only, not holding targets.",
+        f"- D1 正收益概率：{fmt_pct_rate(metric.d1_positive_rate)}",
+        f"- D1 涨幅不低于 10% 概率：{fmt_pct_rate(metric.d1_strong_rate)}",
+        f"- 同分桶历史 D1 平均收益：{fmt_num(metric.average_d1_return_pct, '%')}",
+        f"- 同分桶历史 D1 中位收益：{fmt_num(metric.median_d1_return_pct, '%')}",
+        "- T2 暗盘卖出收益暂未建模，直到项目确认可靠暗盘数据源。",
+        "- D5/D20/D60 只作为复盘标签，不是持仓目标。",
        "",
-        "## Score Breakdown",
+        "## 评分拆解",
        "",
        components_table(components),
        "",
-        "## Bull Points",
+        "## 正面因素",
        "",
        *reason_lines(components, positive=True),
        "",
-        "## Risks And Gaps",
+        "## 风险与缺口",
        "",
        *reason_lines(components, positive=False),
        *missing_field_lines(record, stage),
-        "- T2 grey-market signal is not used yet because the project has no approved reproducible source.",
-        "- Post-listing D5/D20/D60 outcomes are labels for later review only and are not holding-period targets.",
+        "- T2 暗盘信号暂未使用，因为项目还没有批准可复现的数据源。",
+        "- 上市后的 D5/D20/D60 表现只用于后续复盘，不是本模型的持仓周期目标。",
        "",
-        "## Triggers",
+        "## 触发条件",
        "",
-        "- Upgrade: stronger verified T1 demand, better allocation scarcity, or a new rule-backed positive catalyst.",
-        "- Downgrade: weak public or international demand, oversized supply, low-quality missing fields, or adverse market window.",
+        "- 上调：T1 认购热度显著更强、分配稀缺性更好，或出现有规则支持的新正面催化。",
+        "- 下调：公开或国际需求偏弱、供给过大、关键字段质量不足，或市场窗口明显转差。",
        "",
-        "## Exit Plan",
+        "## 退出计划",
        "",
-        "- If subscribed and allocated, plan to sell in T2 grey market when reliable executable data is available.",
-        "- If T2 is unavailable or unreliable, use D1 as the default exit window.",
-        "- Do not treat D5/D20/D60 as planned holding periods for this model.",
-        "- Record D1/D5/D20/D60 outcomes later as review labels, not as retroactive prediction inputs.",
+        "- 如果申购并获配，且 T2 暗盘数据可靠且可执行，优先按 T2 暗盘卖出计划处理。",
+        "- 如果 T2 不可用或不可靠，默认使用 D1 作为卖出窗口。",
+        "- 不把 D5/D20/D60 作为本模型的计划持仓周期。",
+        "- 后续记录 D1/D5/D20/D60 结果时，只作为复盘标签，不作为倒推预测输入。",
        "",
-        "## Source Paths",
+        "## 来源路径",
        "",
        *source_lines,
-        "",
    ]
    return "\n".join(lines)