Files
hk-ipo/schema/hk_ipo.schema.sql
geometrybase 7cbdd533b0 Add A/H share-class mapping workflow
Request:
- Add a repeatable mechanism so HK IPO reports detect issuers that already have Mainland A shares.
- Include a third internet/official-exchange cross-check layer beyond structured history and prospectus scans.

Changes:
- Add listed_share_classes schema support for same-issuer A-share mappings and evidence links.
- Add scripts/archive_a_share_mappings.py to scan prospectus extracted text, reject sponsor/portfolio/cornerstone false positives, archive optional official web evidence and A-share/FX quote evidence, and export snapshots on write.
- Surface a_share_* fields in the analysis dataset and single-ticker report output.
- Update hk-ipo analyst/archivist skill rules and scheduled refresh prompt to require the three-layer A/H mapping check.

Verification:
- python3 -m py_compile scripts/archive_a_share_mappings.py scripts/build_analysis_dataset.py scripts/generate_ipo_report.py
- .venv/bin/python scripts/archive_a_share_mappings.py --as-of 2026-06-24T00:00:00Z --tickers 00668,01688,03661,09630 --dry-run
- .venv/bin/python scripts/build_analysis_dataset.py --db /tmp/hk_ipo_ah_dataset_test.sqlite --dataset /tmp/hk_ipo_ah_dataset_test.csv --report /tmp/hk_ipo_ah_model_test.md --as-of 2026-06-24T00:00:00Z
- .venv/bin/python scripts/generate_ipo_report.py 09630 --dataset /tmp/hk_ipo_ah_dataset_test.csv --stdout --as-of 2026-06-24T00:00:00Z
- git diff --check

Next useful context:
- Dry-run detected 00668->300866.SZ, 01688->002600.SZ, 03661->300661.SZ, and 09630->688630.SH.
- A false positive 01688->300476.SZ from a cornerstone investor parent was rejected by the issuer-context filter.
2026-06-24 07:21:21 +00:00

242 lines
6.8 KiB
SQL

PRAGMA foreign_keys = ON;
CREATE TABLE IF NOT EXISTS ipo_master (
ticker TEXT PRIMARY KEY,
company_name_en TEXT NOT NULL,
company_name_zh TEXT,
stock_short_name TEXT,
exchange TEXT NOT NULL DEFAULT 'HKEX',
board TEXT NOT NULL DEFAULT 'Main Board',
status TEXT NOT NULL,
listing_date TEXT,
application_start_date TEXT,
application_end_date TEXT,
allotment_results_expected_date TEXT,
industry_label TEXT,
data_as_of TEXT NOT NULL,
notes TEXT
);
CREATE TABLE IF NOT EXISTS offering_terms (
ticker TEXT PRIMARY KEY REFERENCES ipo_master(ticker),
source_id TEXT NOT NULL,
prospectus_date TEXT,
offer_price_hkd REAL,
board_lot INTEGER,
min_subscription_amount_hkd REAL,
global_offer_shares INTEGER,
hk_offer_shares_initial INTEGER,
international_offer_shares_initial INTEGER,
public_offer_pct_initial REAL,
over_allotment_offer_shares INTEGER,
offer_size_adjustment_offer_shares INTEGER,
market_cap_hkd_m REAL,
gross_proceeds_hkd_m REAL,
net_proceeds_hkd_m REAL,
issued_shares_upon_listing INTEGER,
data_as_of TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS ipo_demand (
demand_id TEXT PRIMARY KEY,
ticker TEXT NOT NULL REFERENCES ipo_master(ticker),
source_id TEXT NOT NULL,
stage_date TEXT NOT NULL,
valid_applications INTEGER,
successful_applications INTEGER,
public_oversubscription_times REAL,
international_placees INTEGER,
international_oversubscription_times REAL,
final_hk_offer_shares INTEGER,
final_international_offer_shares INTEGER,
data_as_of TEXT NOT NULL,
notes TEXT
);
CREATE TABLE IF NOT EXISTS ipo_market_heat (
heat_id TEXT PRIMARY KEY,
ticker TEXT NOT NULL REFERENCES ipo_master(ticker),
source_id TEXT NOT NULL REFERENCES source_refs(source_id),
stage TEXT NOT NULL DEFAULT 'T0_5_market_heat',
provider TEXT NOT NULL,
observed_at TEXT NOT NULL,
margin_subscription_multiple REAL,
margin_multiple_label TEXT,
offer_price_low_hkd REAL,
offer_price_high_hkd REAL,
board_lot INTEGER,
min_subscription_amount_hkd REAL,
subscription_deadline TEXT,
result_announcement_date TEXT,
listing_date TEXT,
data_as_of TEXT NOT NULL,
notes TEXT,
UNIQUE (ticker, provider, observed_at)
);
CREATE TABLE IF NOT EXISTS external_ipo_history (
history_id TEXT PRIMARY KEY,
ticker TEXT NOT NULL,
provider TEXT NOT NULL,
source_id TEXT NOT NULL,
stock_name TEXT,
listing_date TEXT,
price_range_low_hkd REAL,
price_range_high_hkd REAL,
issue_price_hkd REAL,
one_lot_capital_hkd REAL,
one_hand_win_rate_pct REAL,
public_oversubscription_times REAL,
total_fundraise_hkd_b REAL,
market_cap_at_listing_hkd_b REAL,
grey_market_return_pct REAL,
first_day_return_pct REAL,
sponsor TEXT,
source_date TEXT,
archived_at TEXT NOT NULL,
local_path TEXT NOT NULL,
url TEXT NOT NULL,
file_sha256 TEXT,
notes TEXT,
UNIQUE (ticker, provider, listing_date),
CHECK (local_path NOT LIKE '/%'),
CHECK (local_path NOT LIKE './%'),
CHECK (local_path NOT LIKE '%\%')
);
CREATE TABLE IF NOT EXISTS price_performance (
performance_id TEXT PRIMARY KEY,
ticker TEXT NOT NULL REFERENCES ipo_master(ticker),
stage TEXT NOT NULL,
source_id TEXT,
as_of_date TEXT NOT NULL,
open_price_hkd REAL,
high_price_hkd REAL,
low_price_hkd REAL,
close_price_hkd REAL,
return_pct REAL,
turnover_hkd_m REAL,
data_as_of TEXT NOT NULL,
notes TEXT,
UNIQUE (ticker, stage)
);
CREATE TABLE IF NOT EXISTS new_listing_report_entries (
report_entry_id TEXT PRIMARY KEY,
ticker TEXT NOT NULL REFERENCES ipo_master(ticker),
report_year INTEGER NOT NULL,
board TEXT NOT NULL,
source_id TEXT NOT NULL,
company_name_en TEXT NOT NULL,
prospectus_date TEXT,
listing_date TEXT NOT NULL,
offer_price_hkd REAL,
funds_raised_hkd REAL,
subscription_ratio_times REAL,
market_cap_hkd REAL,
outstanding_shares_at_listing INTEGER,
listing_method TEXT,
industry_label TEXT,
place_of_incorporation TEXT,
sponsors TEXT,
reporting_accountants TEXT,
valuers TEXT,
data_as_of TEXT NOT NULL,
notes TEXT,
UNIQUE (ticker, report_year, board)
);
CREATE TABLE IF NOT EXISTS source_refs (
source_id TEXT PRIMARY KEY,
ticker TEXT NOT NULL REFERENCES ipo_master(ticker),
source_type TEXT NOT NULL,
title TEXT NOT NULL,
path_base TEXT NOT NULL DEFAULT 'repo_root',
local_path TEXT NOT NULL,
url TEXT NOT NULL,
file_sha256 TEXT,
source_date TEXT,
archived_at TEXT NOT NULL,
notes TEXT,
CHECK (path_base = 'repo_root'),
CHECK (local_path NOT LIKE '/%'),
CHECK (local_path NOT LIKE './%'),
CHECK (local_path NOT LIKE '%\%')
);
CREATE TABLE IF NOT EXISTS listed_share_classes (
share_class_id TEXT PRIMARY KEY,
ticker TEXT NOT NULL REFERENCES ipo_master(ticker),
share_class_type TEXT NOT NULL,
related_ticker TEXT NOT NULL,
exchange TEXT NOT NULL,
board TEXT,
relationship TEXT NOT NULL,
company_name TEXT,
listed_date TEXT,
detection_method TEXT NOT NULL,
confidence TEXT NOT NULL,
prospectus_source_id TEXT REFERENCES source_refs(source_id),
web_source_id TEXT REFERENCES source_refs(source_id),
evidence_text TEXT,
data_as_of TEXT NOT NULL,
notes TEXT,
UNIQUE (ticker, share_class_type, related_ticker),
CHECK (share_class_type IN ('A_share', 'other_onshore_share')),
CHECK (relationship IN ('same_issuer', 'parent', 'subsidiary', 'affiliate', 'comparable')),
CHECK (confidence IN ('high', 'medium', 'low'))
);
CREATE TABLE IF NOT EXISTS data_gaps (
gap_id TEXT PRIMARY KEY,
ticker TEXT NOT NULL REFERENCES ipo_master(ticker),
stage TEXT NOT NULL,
field_name TEXT NOT NULL,
reason TEXT NOT NULL,
expected_resolution_date TEXT,
created_at TEXT NOT NULL,
notes TEXT
);
CREATE TABLE IF NOT EXISTS sync_runs (
sync_run_id TEXT PRIMARY KEY,
mode TEXT NOT NULL,
as_of TEXT NOT NULL,
started_at TEXT NOT NULL,
finished_at TEXT,
status TEXT NOT NULL,
notes TEXT,
CHECK (status IN ('running', 'complete', 'failed'))
);
CREATE TABLE IF NOT EXISTS ticker_sync_state (
ticker TEXT NOT NULL REFERENCES ipo_master(ticker),
stage TEXT NOT NULL,
status TEXT NOT NULL,
required INTEGER NOT NULL DEFAULT 1,
due_date TEXT,
completed_at TEXT,
last_source_id TEXT,
data_gap_id TEXT,
last_sync_run_id TEXT REFERENCES sync_runs(sync_run_id),
updated_at TEXT NOT NULL,
notes TEXT,
PRIMARY KEY (ticker, stage),
CHECK (status IN ('complete', 'pending_not_due', 'pending_due', 'blocked', 'not_applicable')),
CHECK (required IN (0, 1))
);
CREATE TABLE IF NOT EXISTS sync_tasks (
task_id TEXT PRIMARY KEY,
ticker TEXT NOT NULL REFERENCES ipo_master(ticker),
stage TEXT NOT NULL,
task_type TEXT NOT NULL,
task_status TEXT NOT NULL,
due_date TEXT,
data_gap_id TEXT,
last_sync_run_id TEXT REFERENCES sync_runs(sync_run_id),
updated_at TEXT NOT NULL,
notes TEXT,
CHECK (task_status IN ('open', 'waiting_until_due', 'blocked'))
);