Files
hk-ipo/schema/hk_ipo.schema.sql
T
geometrybase c65b20a1c4 Archive recent HKEX IPO targets
Request:
Use the project archivist workflow to update IPO target coverage for the most recent three-year window.

Changes:
- Add scripts/update_recent_ipo_list.py to discover HKEXnews annual new listing reports, archive XLSX sources, parse subscription-relevant IPO rows, and update SQLite plus snapshots.
- Add new_listing_report_entries to preserve annual report row-level evidence.
- Archive 2023-2026 Main Board new listing reports and 2024-2026 GEM new listing reports.
- Seed 290 report-backed IPO targets for 2023-06-15 through 2026-06-15, skipping 10 non-IPO rows without numeric offer prices.
- Refresh ipo_master, missing offering_terms fields, source_refs, ticker_sync_state, and sync_tasks.
- Add openpyxl as the XLSX parser dependency and document the archivist refresh flow.
- Limit sync summary output while keeping the full queue in SQLite and CSV snapshots.

Verification:
- Ran update_recent_ipo_list.py for 2023-06-15 to 2026-06-15 with as-of 2026-06-15T07:30:00Z.
- Parsed project Python scripts with ast.parse.
- Checked SQLite integrity and DB-to-snapshot row counts.
- Verified source_refs paths are repo-relative, files exist, and SHA-256 hashes match.
- Ran git diff --check and git diff --cached --check.
- Checked for Python cache and SQLite transient files.

Next useful context:
- ipo_master now has 293 tickers; new_listing_report_entries has 290 report-backed targets.
- Current sync queue has 2005 open tasks and 42 waiting_until_due tasks for deeper per-ticker archival stages.
2026-06-15 06:42:31 +00:00

168 lines
4.5 KiB
SQL

PRAGMA foreign_keys = ON;
CREATE TABLE IF NOT EXISTS ipo_master (
ticker TEXT PRIMARY KEY,
company_name_en TEXT NOT NULL,
company_name_zh TEXT,
stock_short_name TEXT,
exchange TEXT NOT NULL DEFAULT 'HKEX',
board TEXT NOT NULL DEFAULT 'Main Board',
status TEXT NOT NULL,
listing_date TEXT,
application_start_date TEXT,
application_end_date TEXT,
allotment_results_expected_date TEXT,
industry_label TEXT,
data_as_of TEXT NOT NULL,
notes TEXT
);
CREATE TABLE IF NOT EXISTS offering_terms (
ticker TEXT PRIMARY KEY REFERENCES ipo_master(ticker),
source_id TEXT NOT NULL,
prospectus_date TEXT,
offer_price_hkd REAL,
board_lot INTEGER,
min_subscription_amount_hkd REAL,
global_offer_shares INTEGER,
hk_offer_shares_initial INTEGER,
international_offer_shares_initial INTEGER,
public_offer_pct_initial REAL,
over_allotment_offer_shares INTEGER,
offer_size_adjustment_offer_shares INTEGER,
market_cap_hkd_m REAL,
gross_proceeds_hkd_m REAL,
net_proceeds_hkd_m REAL,
issued_shares_upon_listing INTEGER,
data_as_of TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS ipo_demand (
demand_id TEXT PRIMARY KEY,
ticker TEXT NOT NULL REFERENCES ipo_master(ticker),
source_id TEXT NOT NULL,
stage_date TEXT NOT NULL,
valid_applications INTEGER,
successful_applications INTEGER,
public_oversubscription_times REAL,
international_placees INTEGER,
international_oversubscription_times REAL,
final_hk_offer_shares INTEGER,
final_international_offer_shares INTEGER,
data_as_of TEXT NOT NULL,
notes TEXT
);
CREATE TABLE IF NOT EXISTS price_performance (
performance_id TEXT PRIMARY KEY,
ticker TEXT NOT NULL REFERENCES ipo_master(ticker),
stage TEXT NOT NULL,
source_id TEXT,
as_of_date TEXT NOT NULL,
open_price_hkd REAL,
high_price_hkd REAL,
low_price_hkd REAL,
close_price_hkd REAL,
return_pct REAL,
turnover_hkd_m REAL,
data_as_of TEXT NOT NULL,
notes TEXT,
UNIQUE (ticker, stage)
);
CREATE TABLE IF NOT EXISTS new_listing_report_entries (
report_entry_id TEXT PRIMARY KEY,
ticker TEXT NOT NULL REFERENCES ipo_master(ticker),
report_year INTEGER NOT NULL,
board TEXT NOT NULL,
source_id TEXT NOT NULL,
company_name_en TEXT NOT NULL,
prospectus_date TEXT,
listing_date TEXT NOT NULL,
offer_price_hkd REAL,
funds_raised_hkd REAL,
subscription_ratio_times REAL,
market_cap_hkd REAL,
outstanding_shares_at_listing INTEGER,
listing_method TEXT,
industry_label TEXT,
place_of_incorporation TEXT,
sponsors TEXT,
reporting_accountants TEXT,
valuers TEXT,
data_as_of TEXT NOT NULL,
notes TEXT,
UNIQUE (ticker, report_year, board)
);
CREATE TABLE IF NOT EXISTS source_refs (
source_id TEXT PRIMARY KEY,
ticker TEXT NOT NULL REFERENCES ipo_master(ticker),
source_type TEXT NOT NULL,
title TEXT NOT NULL,
path_base TEXT NOT NULL DEFAULT 'repo_root',
local_path TEXT NOT NULL,
url TEXT NOT NULL,
file_sha256 TEXT,
source_date TEXT,
archived_at TEXT NOT NULL,
notes TEXT,
CHECK (path_base = 'repo_root'),
CHECK (local_path NOT LIKE '/%'),
CHECK (local_path NOT LIKE './%'),
CHECK (local_path NOT LIKE '%\%')
);
CREATE TABLE IF NOT EXISTS data_gaps (
gap_id TEXT PRIMARY KEY,
ticker TEXT NOT NULL REFERENCES ipo_master(ticker),
stage TEXT NOT NULL,
field_name TEXT NOT NULL,
reason TEXT NOT NULL,
expected_resolution_date TEXT,
created_at TEXT NOT NULL,
notes TEXT
);
CREATE TABLE IF NOT EXISTS sync_runs (
sync_run_id TEXT PRIMARY KEY,
mode TEXT NOT NULL,
as_of TEXT NOT NULL,
started_at TEXT NOT NULL,
finished_at TEXT,
status TEXT NOT NULL,
notes TEXT,
CHECK (status IN ('running', 'complete', 'failed'))
);
CREATE TABLE IF NOT EXISTS ticker_sync_state (
ticker TEXT NOT NULL REFERENCES ipo_master(ticker),
stage TEXT NOT NULL,
status TEXT NOT NULL,
required INTEGER NOT NULL DEFAULT 1,
due_date TEXT,
completed_at TEXT,
last_source_id TEXT,
data_gap_id TEXT,
last_sync_run_id TEXT REFERENCES sync_runs(sync_run_id),
updated_at TEXT NOT NULL,
notes TEXT,
PRIMARY KEY (ticker, stage),
CHECK (status IN ('complete', 'pending_not_due', 'pending_due', 'blocked', 'not_applicable')),
CHECK (required IN (0, 1))
);
CREATE TABLE IF NOT EXISTS sync_tasks (
task_id TEXT PRIMARY KEY,
ticker TEXT NOT NULL REFERENCES ipo_master(ticker),
stage TEXT NOT NULL,
task_type TEXT NOT NULL,
task_status TEXT NOT NULL,
due_date TEXT,
data_gap_id TEXT,
last_sync_run_id TEXT REFERENCES sync_runs(sync_run_id),
updated_at TEXT NOT NULL,
notes TEXT,
CHECK (task_status IN ('open', 'waiting_until_due', 'blocked'))
);