Add T0.95 late-order heat stage
Request: - Reflect that near-final market heat can be used when the user can still place an IPO order at T0.95. Changes: - Added T0_95_final_heat as a separate analyst decision stage with executability and no-leakage rules. - Added an experimental T0.95 rule overlay for late-order heat scoring and calibration discipline. - Updated archivist guidance and the market-heat archiver so snapshots can be explicitly stored as T0_95_final_heat. - Added market_heat_stage to the analysis dataset and refreshed the model report to show T0.95 coverage separately. Verification: - Ran py_compile for the modified scripts. - Checked archive_t0_5_market_heat.py --help for the new --stage option. - Rebuilt data/snapshots/analysis_model_v0_dataset.csv and reports/2026-06-15_analysis_model_v0.md. - Ran git diff --check. Next useful context: - Current archived heat rows remain T0_5_market_heat only; there are no true T0.95 rows yet. - external_ipo_history.public_oversubscription_times is still calibration-only unless a comparable value is archived before the executable order cutoff.
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Archive T0.5 HK IPO market-heat snapshots from public web pages."""
|
||||
"""Archive T0.5/T0.95 HK IPO market-heat snapshots from public web pages."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -20,6 +20,8 @@ DEFAULT_DB_PATH = Path("data/hk_ipo.sqlite")
|
||||
DEFAULT_SCHEMA_PATH = Path("schema/hk_ipo.schema.sql")
|
||||
SNAPSHOT_DIR = Path("data/snapshots")
|
||||
VBKR_URL = "https://www.vbkr.com/ipo/hk/v2/ipo-hk-index"
|
||||
T0_5_STAGE = "T0_5_market_heat"
|
||||
T0_95_STAGE = "T0_95_final_heat"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -45,6 +47,12 @@ def parse_args() -> argparse.Namespace:
|
||||
parser.add_argument("--as-of", help="Archive timestamp. Defaults to current UTC time.")
|
||||
parser.add_argument("--url", default=VBKR_URL, help="Market-heat page URL.")
|
||||
parser.add_argument("--tickers", help="Comma-separated tickers to archive. Defaults to all parsed tickers in ipo_master.")
|
||||
parser.add_argument(
|
||||
"--stage",
|
||||
choices=[T0_5_STAGE, T0_95_STAGE],
|
||||
default=T0_5_STAGE,
|
||||
help="Decision stage represented by this snapshot. Use T0_95_final_heat only when the snapshot is still actionable before the user's order cutoff.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@@ -62,6 +70,10 @@ def source_date(value: str) -> str:
|
||||
return datetime.fromisoformat(value.replace("Z", "+00:00")).date().isoformat()
|
||||
|
||||
|
||||
def stage_slug(stage: str) -> str:
|
||||
return stage.lower()
|
||||
|
||||
|
||||
def fetch_bytes(url: str) -> bytes:
|
||||
request = Request(url, headers={"User-Agent": "Mozilla/5.0"})
|
||||
with urlopen(request, timeout=60) as response:
|
||||
@@ -191,12 +203,24 @@ def upsert_rows(
|
||||
file_sha256: str,
|
||||
url: str,
|
||||
as_of: str,
|
||||
stage: str,
|
||||
) -> int:
|
||||
written = 0
|
||||
slug = stage_slug(stage)
|
||||
title = (
|
||||
"VBKR IPO near-deadline final heat snapshot"
|
||||
if stage == T0_95_STAGE
|
||||
else "VBKR IPO expected margin multiple snapshot"
|
||||
)
|
||||
source_notes = (
|
||||
"Non-official near-deadline market heat snapshot archived before the user's order cutoff. Final HKEX subscription data must still come from allotment results."
|
||||
if stage == T0_95_STAGE
|
||||
else "Non-official subscription-period market heat snapshot. Final subscription data must come from HKEX allotment results."
|
||||
)
|
||||
for row in rows:
|
||||
if row.ticker not in selected:
|
||||
continue
|
||||
source_id = f"{row.ticker}_t0_5_market_heat_vbkr_{compact_timestamp(as_of)}"
|
||||
source_id = f"{row.ticker}_{slug}_vbkr_{compact_timestamp(as_of)}"
|
||||
heat_id = source_id
|
||||
conn.execute(
|
||||
"""
|
||||
@@ -216,14 +240,14 @@ def upsert_rows(
|
||||
(
|
||||
source_id,
|
||||
row.ticker,
|
||||
"t0_5_market_heat",
|
||||
"VBKR IPO expected margin multiple snapshot",
|
||||
slug,
|
||||
title,
|
||||
local_path,
|
||||
url,
|
||||
file_sha256,
|
||||
source_date(as_of),
|
||||
as_of,
|
||||
"Non-official subscription-period market heat snapshot. Final subscription data must come from HKEX allotment results.",
|
||||
source_notes,
|
||||
),
|
||||
)
|
||||
conn.execute(
|
||||
@@ -235,9 +259,10 @@ def upsert_rows(
|
||||
min_subscription_amount_hkd, subscription_deadline,
|
||||
result_announcement_date, listing_date, data_as_of, notes
|
||||
)
|
||||
VALUES (?, ?, ?, 'T0_5_market_heat', 'VBKR/Jieli', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
VALUES (?, ?, ?, ?, 'VBKR/Jieli', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(ticker, provider, observed_at) DO UPDATE SET
|
||||
source_id = excluded.source_id,
|
||||
stage = excluded.stage,
|
||||
margin_subscription_multiple = excluded.margin_subscription_multiple,
|
||||
margin_multiple_label = excluded.margin_multiple_label,
|
||||
offer_price_low_hkd = excluded.offer_price_low_hkd,
|
||||
@@ -254,6 +279,7 @@ def upsert_rows(
|
||||
heat_id,
|
||||
row.ticker,
|
||||
source_id,
|
||||
stage,
|
||||
as_of,
|
||||
row.margin_subscription_multiple,
|
||||
row.margin_multiple_label,
|
||||
@@ -285,7 +311,7 @@ def main() -> int:
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
conn.executescript(schema_path.read_text(encoding="utf-8"))
|
||||
selected = selected_tickers(args.tickers, parsed_rows, conn)
|
||||
written = upsert_rows(conn, parsed_rows, selected, local_path, file_sha256, args.url, as_of)
|
||||
written = upsert_rows(conn, parsed_rows, selected, local_path, file_sha256, args.url, as_of, args.stage)
|
||||
export_snapshot(conn, "ipo_market_heat", "ticker, observed_at")
|
||||
export_snapshot(conn, "source_refs", "source_id")
|
||||
|
||||
@@ -304,6 +330,7 @@ def main() -> int:
|
||||
)
|
||||
print("market heat archived")
|
||||
print(f"as_of: {as_of}")
|
||||
print(f"stage: {args.stage}")
|
||||
print(f"raw_snapshot: {local_path}")
|
||||
print(f"parsed_rows: {len(parsed_rows)}")
|
||||
print(f"written_rows: {written}")
|
||||
|
||||
@@ -415,6 +415,7 @@ def fetch_rows(conn: sqlite3.Connection) -> list[sqlite3.Row]:
|
||||
p.d60_return_pct,
|
||||
p.d1_turnover_hkd_m,
|
||||
h.observed_at AS t0_5_observed_at,
|
||||
h.stage AS market_heat_stage,
|
||||
h.provider AS t0_5_provider,
|
||||
h.margin_subscription_multiple AS t0_5_margin_subscription_multiple,
|
||||
h.source_id AS t0_5_source_id,
|
||||
@@ -487,6 +488,7 @@ def build_records(rows: list[sqlite3.Row], as_of: str) -> list[dict[str, Any]]:
|
||||
"public_oversubscription_times": row["public_oversubscription_times"],
|
||||
"international_oversubscription_times": row["international_oversubscription_times"],
|
||||
"t0_5_observed_at": row["t0_5_observed_at"],
|
||||
"market_heat_stage": row["market_heat_stage"],
|
||||
"t0_5_provider": row["t0_5_provider"],
|
||||
"t0_5_margin_subscription_multiple": row["t0_5_margin_subscription_multiple"],
|
||||
"t0_5_source_id": row["t0_5_source_id"],
|
||||
@@ -609,6 +611,7 @@ def write_dataset(records: list[dict[str, Any]], output_path: Path) -> None:
|
||||
"public_oversubscription_times",
|
||||
"international_oversubscription_times",
|
||||
"t0_5_observed_at",
|
||||
"market_heat_stage",
|
||||
"t0_5_provider",
|
||||
"t0_5_margin_subscription_multiple",
|
||||
"t0_5_source_id",
|
||||
@@ -705,8 +708,15 @@ def write_report(
|
||||
total = len(records)
|
||||
d1_records = [record for record in records if record["d1_return_pct"] is not None]
|
||||
structured_t1 = [record for record in records if record["has_structured_t1"]]
|
||||
structured_t0_5 = [record for record in records if record["t0_5_margin_subscription_multiple"] is not None]
|
||||
structured_market_heat = [record for record in records if record["t0_5_margin_subscription_multiple"] is not None]
|
||||
structured_t0_5 = [
|
||||
record for record in structured_market_heat if record["market_heat_stage"] in {None, "", "T0_5_market_heat"}
|
||||
]
|
||||
structured_t0_95 = [
|
||||
record for record in structured_market_heat if record["market_heat_stage"] == "T0_95_final_heat"
|
||||
]
|
||||
t0_5_with_d1 = [record for record in structured_t0_5 if record["d1_return_pct"] is not None]
|
||||
t0_95_with_d1 = [record for record in structured_t0_95 if record["d1_return_pct"] is not None]
|
||||
external_history_rows = [record for record in records if record["external_history_source_path"]]
|
||||
external_oversub_rows = [record for record in records if record["external_public_oversubscription_times"] is not None]
|
||||
external_oversub_with_d1 = [
|
||||
@@ -746,8 +756,11 @@ def write_report(
|
||||
f"- Rows with offer size: {count_present(records, 'offer_size_hkd_m')}",
|
||||
f"- Rows with public oversubscription: {count_present(records, 'public_oversubscription_times')}",
|
||||
f"- Rows with international oversubscription: {count_present(records, 'international_oversubscription_times')}",
|
||||
f"- Rows with market heat snapshots: {len(structured_market_heat)}",
|
||||
f"- Rows with T0.5 margin heat snapshots: {len(structured_t0_5)}",
|
||||
f"- Rows with T0.95 late-order heat snapshots: {len(structured_t0_95)}",
|
||||
f"- Rows with T0.5 margin heat and D1 labels: {len(t0_5_with_d1)}",
|
||||
f"- Rows with T0.95 late-order heat and D1 labels: {len(t0_95_with_d1)}",
|
||||
f"- Rows matched to external ipohk history: {len(external_history_rows)}",
|
||||
f"- Rows with external final oversubscription: {len(external_oversub_rows)}",
|
||||
f"- Rows with external final oversubscription and D1 labels: {len(external_oversub_with_d1)}",
|
||||
@@ -769,10 +782,13 @@ def write_report(
|
||||
"",
|
||||
"## T0.5 Market Heat",
|
||||
"",
|
||||
"T0.5 uses archived subscription-period margin heat snapshots. These are non-official live signals and are kept separate from T1 allotment demand. The current archive is not yet a historical training set: it has too few rows and no D1 labels for calibration.",
|
||||
"T0.5 uses archived subscription-period margin heat snapshots. T0.95 is the near-deadline subset that is still actionable before the user's order cutoff. These are non-official live signals and are kept separate from T1 allotment demand. The current archive is not yet a historical training set: it has too few rows and no D1 labels for calibration.",
|
||||
"",
|
||||
f"- Total market heat rows: {len(structured_market_heat)}",
|
||||
f"- T0.5 margin rows: {len(structured_t0_5)}",
|
||||
f"- T0.5 rows with D1 labels: {len(t0_5_with_d1)}",
|
||||
f"- T0.95 late-order heat rows: {len(structured_t0_95)}",
|
||||
f"- T0.95 rows with D1 labels: {len(t0_95_with_d1)}",
|
||||
"",
|
||||
"## External Final Heat Proxy",
|
||||
"",
|
||||
|
||||
Reference in New Issue
Block a user