08db218b6d
Request: Add archivist support for remembering which IPO archive stages have already been synced and which stages should be updated next. Changes: - Add sync_runs, ticker_sync_state, sync_tasks, and price_performance tables to the archive schema. - Add scripts/update_sync_state.py to derive per-ticker stage status and rebuild the next-sync task queue. - Export the new sync-state tables as Git-friendly CSV snapshots. - Document the incremental archive flow in the archivist skill and README. Verification: - Ran scripts/bootstrap_historical_data.py. - Ran scripts/update_sync_state.py with a deterministic as-of timestamp. - Checked SQLite integrity and DB-to-snapshot row counts with Python sqlite3. - Parsed Python scripts with ast.parse. - Ran git diff --check and checked for temporary SQLite/cache files. Next useful context: - Current derived queue has 2 open tasks for 06658 and 15 waiting_until_due tasks for future stages.
318 lines
12 KiB
Python
318 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""Bootstrap the project-local HK IPO historical archive."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import csv
|
|
import hashlib
|
|
import sqlite3
|
|
from pathlib import Path
|
|
|
|
|
|
ARCHIVE_AS_OF = "2026-06-15T06:15:00Z"
|
|
DB_PATH = Path("data/hk_ipo.sqlite")
|
|
SCHEMA_PATH = Path("schema/hk_ipo.schema.sql")
|
|
SNAPSHOT_DIR = Path("data/snapshots")
|
|
STALE_GAP_IDS = [
|
|
"06106_full_prospectus_classification_2026_06_15",
|
|
]
|
|
|
|
|
|
IPO_MASTER = [
|
|
{
|
|
"ticker": "06658",
|
|
"company_name_en": "Liuliumei Co., Ltd.",
|
|
"company_name_zh": "溜溜梅股份有限公司",
|
|
"stock_short_name": "LIULIUMEI",
|
|
"status": "listed",
|
|
"listing_date": "2026-06-15",
|
|
"application_start_date": "2026-06-05",
|
|
"application_end_date": "2026-06-10",
|
|
"allotment_results_expected_date": "2026-06-12",
|
|
"industry_label": "Snack food / preserved fruit",
|
|
"data_as_of": ARCHIVE_AS_OF,
|
|
"notes": "Seeded from HKEXnews prospectus and allotment results.",
|
|
},
|
|
{
|
|
"ticker": "06675",
|
|
"company_name_en": "SENASIC Electronics Technology Co., Ltd.",
|
|
"company_name_zh": "琻捷電子科技(江蘇)股份有限公司",
|
|
"stock_short_name": None,
|
|
"status": "pending_listing",
|
|
"listing_date": "2026-06-17",
|
|
"application_start_date": "2026-06-09",
|
|
"application_end_date": "2026-06-12",
|
|
"allotment_results_expected_date": "2026-06-16",
|
|
"industry_label": "Automotive wireless sensing SoC / semiconductors",
|
|
"data_as_of": ARCHIVE_AS_OF,
|
|
"notes": "Seeded from HKEXnews prospectus and global offering announcement; allotment results not yet archived.",
|
|
},
|
|
{
|
|
"ticker": "06106",
|
|
"company_name_en": "Shanghai Seer Intelligent Technology Co., Ltd.",
|
|
"company_name_zh": "上海仙工智能科技股份有限公司",
|
|
"stock_short_name": None,
|
|
"status": "open_for_subscription",
|
|
"listing_date": "2026-06-24",
|
|
"application_start_date": "2026-06-15",
|
|
"application_end_date": "2026-06-18",
|
|
"allotment_results_expected_date": "2026-06-23",
|
|
"industry_label": "Industrial intelligent robots / robot controllers",
|
|
"data_as_of": ARCHIVE_AS_OF,
|
|
"notes": "Seeded from HKEXnews global offering announcement; full prospectus source classification needs follow-up.",
|
|
},
|
|
]
|
|
|
|
|
|
OFFERING_TERMS = [
|
|
{
|
|
"ticker": "06658",
|
|
"source_id": "06658_prospectus_2026_06_05",
|
|
"prospectus_date": "2026-06-05",
|
|
"offer_price_hkd": 43.58,
|
|
"board_lot": 100,
|
|
"min_subscription_amount_hkd": 4401.96,
|
|
"global_offer_shares": 11464100,
|
|
"hk_offer_shares_initial": 1146500,
|
|
"international_offer_shares_initial": 10317600,
|
|
"public_offer_pct_initial": 0.10,
|
|
"over_allotment_offer_shares": None,
|
|
"offer_size_adjustment_offer_shares": None,
|
|
"market_cap_hkd_m": 3434.59,
|
|
"gross_proceeds_hkd_m": 499.6,
|
|
"net_proceeds_hkd_m": 440.1,
|
|
"issued_shares_upon_listing": 78811208,
|
|
"data_as_of": ARCHIVE_AS_OF,
|
|
},
|
|
{
|
|
"ticker": "06675",
|
|
"source_id": "06675_global_offering_announcement_2026_06_09",
|
|
"prospectus_date": "2026-06-09",
|
|
"offer_price_hkd": 18.36,
|
|
"board_lot": 200,
|
|
"min_subscription_amount_hkd": 3709.04,
|
|
"global_offer_shares": 53407000,
|
|
"hk_offer_shares_initial": 5340800,
|
|
"international_offer_shares_initial": 48066200,
|
|
"public_offer_pct_initial": 0.10,
|
|
"over_allotment_offer_shares": 8011000,
|
|
"offer_size_adjustment_offer_shares": None,
|
|
"market_cap_hkd_m": 6959.2,
|
|
"gross_proceeds_hkd_m": None,
|
|
"net_proceeds_hkd_m": 906.7,
|
|
"issued_shares_upon_listing": 379041820,
|
|
"data_as_of": ARCHIVE_AS_OF,
|
|
},
|
|
{
|
|
"ticker": "06106",
|
|
"source_id": "06106_prospectus_candidate_2026_06_15",
|
|
"prospectus_date": "2026-06-15",
|
|
"offer_price_hkd": 101.60,
|
|
"board_lot": 50,
|
|
"min_subscription_amount_hkd": 5131.24,
|
|
"global_offer_shares": 10497300,
|
|
"hk_offer_shares_initial": 524900,
|
|
"international_offer_shares_initial": 9972400,
|
|
"public_offer_pct_initial": 0.05,
|
|
"over_allotment_offer_shares": 1574550,
|
|
"offer_size_adjustment_offer_shares": 1574550,
|
|
"market_cap_hkd_m": 11226.52568,
|
|
"gross_proceeds_hkd_m": 1066.52568,
|
|
"net_proceeds_hkd_m": 995.4,
|
|
"issued_shares_upon_listing": 110497300,
|
|
"data_as_of": ARCHIVE_AS_OF,
|
|
},
|
|
]
|
|
|
|
|
|
IPO_DEMAND = [
|
|
{
|
|
"demand_id": "06658_allotment_2026_06_12",
|
|
"ticker": "06658",
|
|
"source_id": "06658_allotment_results_2026_06_12",
|
|
"stage_date": "2026-06-12",
|
|
"valid_applications": 180507,
|
|
"successful_applications": 11465,
|
|
"public_oversubscription_times": 6586.73,
|
|
"international_placees": 64,
|
|
"international_oversubscription_times": 2.64,
|
|
"final_hk_offer_shares": 1146500,
|
|
"final_international_offer_shares": 10317600,
|
|
"data_as_of": ARCHIVE_AS_OF,
|
|
"notes": "Claw-back shown as N/A in the HKEXnews allotment results.",
|
|
},
|
|
]
|
|
|
|
|
|
SOURCES = [
|
|
{
|
|
"source_id": "06658_prospectus_2026_06_05",
|
|
"ticker": "06658",
|
|
"source_type": "prospectus",
|
|
"title": "Liuliumei Co., Ltd. Prospectus",
|
|
"local_path": "data/raw/06658/prospectus_2026-06-05.pdf",
|
|
"url": "https://www1.hkexnews.hk/listedco/listconews/sehk/2026/0605/2026060500023.pdf",
|
|
"source_date": "2026-06-05",
|
|
"notes": "HKEXnews prospectus.",
|
|
},
|
|
{
|
|
"source_id": "06658_allotment_results_2026_06_12",
|
|
"ticker": "06658",
|
|
"source_type": "allotment_results",
|
|
"title": "Liuliumei Co., Ltd. Announcement of Allotment Results",
|
|
"local_path": "data/raw/06658/allotment_results_2026-06-12.pdf",
|
|
"url": "https://www1.hkexnews.hk/listedco/listconews/sehk/2026/0612/2026061202100.pdf",
|
|
"source_date": "2026-06-12",
|
|
"notes": "HKEXnews allotment results.",
|
|
},
|
|
{
|
|
"source_id": "06675_prospectus_2026_06_09",
|
|
"ticker": "06675",
|
|
"source_type": "prospectus",
|
|
"title": "SENASIC Electronics Technology Co., Ltd. Prospectus",
|
|
"local_path": "data/raw/06675/prospectus_2026-06-09.pdf",
|
|
"url": "https://www.hkexnews.hk/listedco/listconews/sehk/2026/0609/2026060900029.pdf",
|
|
"source_date": "2026-06-09",
|
|
"notes": "HKEXnews prospectus.",
|
|
},
|
|
{
|
|
"source_id": "06675_global_offering_announcement_2026_06_09",
|
|
"ticker": "06675",
|
|
"source_type": "global_offering_announcement",
|
|
"title": "SENASIC Electronics Technology Co., Ltd. Global Offering Announcement",
|
|
"local_path": "data/raw/06675/global_offering_announcement_2026-06-09.pdf",
|
|
"url": "https://www.hkexnews.hk/listedco/listconews/sehk/2026/0609/2026060900009.pdf",
|
|
"source_date": "2026-06-09",
|
|
"notes": "HKEXnews global offering announcement.",
|
|
},
|
|
{
|
|
"source_id": "06106_prospectus_notice_2026_06_15",
|
|
"ticker": "06106",
|
|
"source_type": "prospectus_notice",
|
|
"title": "Shanghai Seer Intelligent Technology Co., Ltd. Prospectus Notice",
|
|
"local_path": "data/raw/06106/prospectus_notice_2026-06-15.pdf",
|
|
"url": "https://www1.hkexnews.hk/listedco/listconews/sehk/2026/0615/2026061500011.pdf",
|
|
"source_date": "2026-06-15",
|
|
"notes": "HKEXnews announcement containing global offering terms and timetable.",
|
|
},
|
|
{
|
|
"source_id": "06106_prospectus_candidate_2026_06_15",
|
|
"ticker": "06106",
|
|
"source_type": "prospectus",
|
|
"title": "Shanghai Seer Intelligent Technology Co., Ltd. Prospectus",
|
|
"local_path": "data/raw/06106/prospectus_candidate_2026-06-15.pdf",
|
|
"url": "https://www1.hkexnews.hk/listedco/listconews/sehk/2026/0615/2026061500013.pdf",
|
|
"source_date": "2026-06-15",
|
|
"notes": "HKEXnews prospectus; verified by text extraction as a 424-page GLOBAL OFFERING document.",
|
|
},
|
|
]
|
|
|
|
|
|
DATA_GAPS = [
|
|
{
|
|
"gap_id": "06675_allotment_results_pending_2026_06_15",
|
|
"ticker": "06675",
|
|
"stage": "T1_allotment",
|
|
"field_name": "ipo_demand",
|
|
"reason": "Allotment results were expected on 2026-06-16 and were not available in this seed archive.",
|
|
"expected_resolution_date": "2026-06-16",
|
|
"created_at": ARCHIVE_AS_OF,
|
|
"notes": "Update after the HKEXnews allotment results announcement is published.",
|
|
},
|
|
{
|
|
"gap_id": "06106_allotment_results_pending_2026_06_15",
|
|
"ticker": "06106",
|
|
"stage": "T1_allotment",
|
|
"field_name": "ipo_demand",
|
|
"reason": "Allotment results were expected on 2026-06-23 and were not available in this seed archive.",
|
|
"expected_resolution_date": "2026-06-23",
|
|
"created_at": ARCHIVE_AS_OF,
|
|
"notes": "Update after the HKEXnews allotment results announcement is published.",
|
|
},
|
|
]
|
|
|
|
|
|
def repo_root() -> Path:
|
|
return Path.cwd()
|
|
|
|
|
|
def hash_file(relative_path: str) -> str:
|
|
path = repo_root() / relative_path
|
|
digest = hashlib.sha256()
|
|
with path.open("rb") as handle:
|
|
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
|
digest.update(chunk)
|
|
return digest.hexdigest()
|
|
|
|
|
|
def ensure_relative_path(relative_path: str) -> None:
|
|
path = Path(relative_path)
|
|
if path.is_absolute() or relative_path.startswith("./") or "\\" in relative_path:
|
|
raise ValueError(f"Path must be repo-relative POSIX style: {relative_path}")
|
|
if not (repo_root() / relative_path).exists():
|
|
raise FileNotFoundError(relative_path)
|
|
|
|
|
|
def upsert_rows(conn: sqlite3.Connection, table: str, rows: list[dict[str, object]]) -> None:
|
|
if not rows:
|
|
return
|
|
columns = list(rows[0])
|
|
placeholders = ", ".join("?" for _ in columns)
|
|
assignments = ", ".join(f"{column}=excluded.{column}" for column in columns)
|
|
sql = (
|
|
f"INSERT INTO {table} ({', '.join(columns)}) VALUES ({placeholders}) "
|
|
f"ON CONFLICT DO UPDATE SET {assignments}"
|
|
)
|
|
conn.executemany(sql, ([row[column] for column in columns] for row in rows))
|
|
|
|
|
|
def export_snapshot(conn: sqlite3.Connection, table: str) -> None:
|
|
SNAPSHOT_DIR.mkdir(parents=True, exist_ok=True)
|
|
cursor = conn.execute(f"SELECT * FROM {table} ORDER BY 1")
|
|
columns = [description[0] for description in cursor.description]
|
|
with (SNAPSHOT_DIR / f"{table}.csv").open("w", newline="", encoding="utf-8") as handle:
|
|
writer = csv.writer(handle, lineterminator="\n")
|
|
writer.writerow(columns)
|
|
writer.writerows(cursor.fetchall())
|
|
|
|
|
|
def main() -> None:
|
|
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
with sqlite3.connect(DB_PATH) as conn:
|
|
conn.executescript(SCHEMA_PATH.read_text(encoding="utf-8"))
|
|
conn.executemany("DELETE FROM data_gaps WHERE gap_id = ?", [(gap_id,) for gap_id in STALE_GAP_IDS])
|
|
upsert_rows(conn, "ipo_master", IPO_MASTER)
|
|
upsert_rows(conn, "offering_terms", OFFERING_TERMS)
|
|
upsert_rows(conn, "ipo_demand", IPO_DEMAND)
|
|
|
|
source_rows = []
|
|
for source in SOURCES:
|
|
ensure_relative_path(source["local_path"])
|
|
source_rows.append(
|
|
{
|
|
**source,
|
|
"path_base": "repo_root",
|
|
"file_sha256": hash_file(source["local_path"]),
|
|
"archived_at": ARCHIVE_AS_OF,
|
|
}
|
|
)
|
|
upsert_rows(conn, "source_refs", source_rows)
|
|
upsert_rows(conn, "data_gaps", DATA_GAPS)
|
|
|
|
for table in [
|
|
"ipo_master",
|
|
"offering_terms",
|
|
"ipo_demand",
|
|
"price_performance",
|
|
"source_refs",
|
|
"data_gaps",
|
|
"sync_runs",
|
|
"ticker_sync_state",
|
|
"sync_tasks",
|
|
]:
|
|
export_snapshot(conn, table)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|