Add T0.95 late-order heat stage

Request:
- Reflect that near-final market heat can be used when the user can still place an IPO order at T0.95.

Changes:
- Added T0_95_final_heat as a separate analyst decision stage with executability and no-leakage rules.
- Added an experimental T0.95 rule overlay for late-order heat scoring and calibration discipline.
- Updated archivist guidance and the market-heat archiver so snapshots can be explicitly stored as T0_95_final_heat.
- Added market_heat_stage to the analysis dataset and refreshed the model report to show T0.95 coverage separately.

Verification:
- Ran py_compile for the modified scripts.
- Checked archive_t0_5_market_heat.py --help for the new --stage option.
- Rebuilt data/snapshots/analysis_model_v0_dataset.csv and reports/2026-06-15_analysis_model_v0.md.
- Ran git diff --check.

Next useful context:
- Current archived heat rows remain T0_5_market_heat only; there are no true T0.95 rows yet.
- external_ipo_history.public_oversubscription_times is still calibration-only unless a comparable value is archived before the executable order cutoff.
This commit is contained in:
2026-06-15 16:28:26 +00:00
parent 5e35242a76
commit 19832ac5af
8 changed files with 479 additions and 312 deletions
+34 -7
View File
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
"""Archive T0.5 HK IPO market-heat snapshots from public web pages."""
"""Archive T0.5/T0.95 HK IPO market-heat snapshots from public web pages."""
from __future__ import annotations
@@ -20,6 +20,8 @@ DEFAULT_DB_PATH = Path("data/hk_ipo.sqlite")
DEFAULT_SCHEMA_PATH = Path("schema/hk_ipo.schema.sql")
SNAPSHOT_DIR = Path("data/snapshots")
VBKR_URL = "https://www.vbkr.com/ipo/hk/v2/ipo-hk-index"
T0_5_STAGE = "T0_5_market_heat"
T0_95_STAGE = "T0_95_final_heat"
@dataclass(frozen=True)
@@ -45,6 +47,12 @@ def parse_args() -> argparse.Namespace:
parser.add_argument("--as-of", help="Archive timestamp. Defaults to current UTC time.")
parser.add_argument("--url", default=VBKR_URL, help="Market-heat page URL.")
parser.add_argument("--tickers", help="Comma-separated tickers to archive. Defaults to all parsed tickers in ipo_master.")
parser.add_argument(
"--stage",
choices=[T0_5_STAGE, T0_95_STAGE],
default=T0_5_STAGE,
help="Decision stage represented by this snapshot. Use T0_95_final_heat only when the snapshot is still actionable before the user's order cutoff.",
)
return parser.parse_args()
@@ -62,6 +70,10 @@ def source_date(value: str) -> str:
return datetime.fromisoformat(value.replace("Z", "+00:00")).date().isoformat()
def stage_slug(stage: str) -> str:
return stage.lower()
def fetch_bytes(url: str) -> bytes:
request = Request(url, headers={"User-Agent": "Mozilla/5.0"})
with urlopen(request, timeout=60) as response:
@@ -191,12 +203,24 @@ def upsert_rows(
file_sha256: str,
url: str,
as_of: str,
stage: str,
) -> int:
written = 0
slug = stage_slug(stage)
title = (
"VBKR IPO near-deadline final heat snapshot"
if stage == T0_95_STAGE
else "VBKR IPO expected margin multiple snapshot"
)
source_notes = (
"Non-official near-deadline market heat snapshot archived before the user's order cutoff. Final HKEX subscription data must still come from allotment results."
if stage == T0_95_STAGE
else "Non-official subscription-period market heat snapshot. Final subscription data must come from HKEX allotment results."
)
for row in rows:
if row.ticker not in selected:
continue
source_id = f"{row.ticker}_t0_5_market_heat_vbkr_{compact_timestamp(as_of)}"
source_id = f"{row.ticker}_{slug}_vbkr_{compact_timestamp(as_of)}"
heat_id = source_id
conn.execute(
"""
@@ -216,14 +240,14 @@ def upsert_rows(
(
source_id,
row.ticker,
"t0_5_market_heat",
"VBKR IPO expected margin multiple snapshot",
slug,
title,
local_path,
url,
file_sha256,
source_date(as_of),
as_of,
"Non-official subscription-period market heat snapshot. Final subscription data must come from HKEX allotment results.",
source_notes,
),
)
conn.execute(
@@ -235,9 +259,10 @@ def upsert_rows(
min_subscription_amount_hkd, subscription_deadline,
result_announcement_date, listing_date, data_as_of, notes
)
VALUES (?, ?, ?, 'T0_5_market_heat', 'VBKR/Jieli', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
VALUES (?, ?, ?, ?, 'VBKR/Jieli', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(ticker, provider, observed_at) DO UPDATE SET
source_id = excluded.source_id,
stage = excluded.stage,
margin_subscription_multiple = excluded.margin_subscription_multiple,
margin_multiple_label = excluded.margin_multiple_label,
offer_price_low_hkd = excluded.offer_price_low_hkd,
@@ -254,6 +279,7 @@ def upsert_rows(
heat_id,
row.ticker,
source_id,
stage,
as_of,
row.margin_subscription_multiple,
row.margin_multiple_label,
@@ -285,7 +311,7 @@ def main() -> int:
with sqlite3.connect(db_path) as conn:
conn.executescript(schema_path.read_text(encoding="utf-8"))
selected = selected_tickers(args.tickers, parsed_rows, conn)
written = upsert_rows(conn, parsed_rows, selected, local_path, file_sha256, args.url, as_of)
written = upsert_rows(conn, parsed_rows, selected, local_path, file_sha256, args.url, as_of, args.stage)
export_snapshot(conn, "ipo_market_heat", "ticker, observed_at")
export_snapshot(conn, "source_refs", "source_id")
@@ -304,6 +330,7 @@ def main() -> int:
)
print("market heat archived")
print(f"as_of: {as_of}")
print(f"stage: {args.stage}")
print(f"raw_snapshot: {local_path}")
print(f"parsed_rows: {len(parsed_rows)}")
print(f"written_rows: {written}")
+18 -2
View File
@@ -415,6 +415,7 @@ def fetch_rows(conn: sqlite3.Connection) -> list[sqlite3.Row]:
p.d60_return_pct,
p.d1_turnover_hkd_m,
h.observed_at AS t0_5_observed_at,
h.stage AS market_heat_stage,
h.provider AS t0_5_provider,
h.margin_subscription_multiple AS t0_5_margin_subscription_multiple,
h.source_id AS t0_5_source_id,
@@ -487,6 +488,7 @@ def build_records(rows: list[sqlite3.Row], as_of: str) -> list[dict[str, Any]]:
"public_oversubscription_times": row["public_oversubscription_times"],
"international_oversubscription_times": row["international_oversubscription_times"],
"t0_5_observed_at": row["t0_5_observed_at"],
"market_heat_stage": row["market_heat_stage"],
"t0_5_provider": row["t0_5_provider"],
"t0_5_margin_subscription_multiple": row["t0_5_margin_subscription_multiple"],
"t0_5_source_id": row["t0_5_source_id"],
@@ -609,6 +611,7 @@ def write_dataset(records: list[dict[str, Any]], output_path: Path) -> None:
"public_oversubscription_times",
"international_oversubscription_times",
"t0_5_observed_at",
"market_heat_stage",
"t0_5_provider",
"t0_5_margin_subscription_multiple",
"t0_5_source_id",
@@ -705,8 +708,15 @@ def write_report(
total = len(records)
d1_records = [record for record in records if record["d1_return_pct"] is not None]
structured_t1 = [record for record in records if record["has_structured_t1"]]
structured_t0_5 = [record for record in records if record["t0_5_margin_subscription_multiple"] is not None]
structured_market_heat = [record for record in records if record["t0_5_margin_subscription_multiple"] is not None]
structured_t0_5 = [
record for record in structured_market_heat if record["market_heat_stage"] in {None, "", "T0_5_market_heat"}
]
structured_t0_95 = [
record for record in structured_market_heat if record["market_heat_stage"] == "T0_95_final_heat"
]
t0_5_with_d1 = [record for record in structured_t0_5 if record["d1_return_pct"] is not None]
t0_95_with_d1 = [record for record in structured_t0_95 if record["d1_return_pct"] is not None]
external_history_rows = [record for record in records if record["external_history_source_path"]]
external_oversub_rows = [record for record in records if record["external_public_oversubscription_times"] is not None]
external_oversub_with_d1 = [
@@ -746,8 +756,11 @@ def write_report(
f"- Rows with offer size: {count_present(records, 'offer_size_hkd_m')}",
f"- Rows with public oversubscription: {count_present(records, 'public_oversubscription_times')}",
f"- Rows with international oversubscription: {count_present(records, 'international_oversubscription_times')}",
f"- Rows with market heat snapshots: {len(structured_market_heat)}",
f"- Rows with T0.5 margin heat snapshots: {len(structured_t0_5)}",
f"- Rows with T0.95 late-order heat snapshots: {len(structured_t0_95)}",
f"- Rows with T0.5 margin heat and D1 labels: {len(t0_5_with_d1)}",
f"- Rows with T0.95 late-order heat and D1 labels: {len(t0_95_with_d1)}",
f"- Rows matched to external ipohk history: {len(external_history_rows)}",
f"- Rows with external final oversubscription: {len(external_oversub_rows)}",
f"- Rows with external final oversubscription and D1 labels: {len(external_oversub_with_d1)}",
@@ -769,10 +782,13 @@ def write_report(
"",
"## T0.5 Market Heat",
"",
"T0.5 uses archived subscription-period margin heat snapshots. These are non-official live signals and are kept separate from T1 allotment demand. The current archive is not yet a historical training set: it has too few rows and no D1 labels for calibration.",
"T0.5 uses archived subscription-period margin heat snapshots. T0.95 is the near-deadline subset that is still actionable before the user's order cutoff. These are non-official live signals and are kept separate from T1 allotment demand. The current archive is not yet a historical training set: it has too few rows and no D1 labels for calibration.",
"",
f"- Total market heat rows: {len(structured_market_heat)}",
f"- T0.5 margin rows: {len(structured_t0_5)}",
f"- T0.5 rows with D1 labels: {len(t0_5_with_d1)}",
f"- T0.95 late-order heat rows: {len(structured_t0_95)}",
f"- T0.95 rows with D1 labels: {len(t0_95_with_d1)}",
"",
"## External Final Heat Proxy",
"",