#!/usr/bin/env python3 """Mark due T2 grey-market tasks as blocked until a reliable source is approved.""" from __future__ import annotations import argparse import csv import sqlite3 import subprocess import sys from datetime import datetime, timezone from pathlib import Path DEFAULT_DB_PATH = Path("data/hk_ipo.sqlite") DEFAULT_SCHEMA_PATH = Path("schema/hk_ipo.schema.sql") SNAPSHOT_DIR = Path("data/snapshots") def parse_as_of(value: str | None) -> str: if value: return datetime.fromisoformat(value.replace("Z", "+00:00")).isoformat().replace("+00:00", "Z") return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z") def export_snapshot(conn: sqlite3.Connection, table: str, order_by: str = "1") -> None: SNAPSHOT_DIR.mkdir(parents=True, exist_ok=True) cursor = conn.execute(f"SELECT * FROM {table} ORDER BY {order_by}") columns = [description[0] for description in cursor.description] with (SNAPSHOT_DIR / f"{table}.csv").open("w", newline="", encoding="utf-8") as handle: writer = csv.writer(handle, lineterminator="\n") writer.writerow(columns) writer.writerows(cursor.fetchall()) def refresh_sync_state(db_path: str, schema_path: str, as_of: str) -> None: subprocess.run( [ sys.executable, "scripts/update_sync_state.py", "--db", db_path, "--schema", schema_path, "--as-of", as_of, "--mode", "grey_market_gap_review", "--summary-limit", "25", ], check=True, ) def main() -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--db", default=str(DEFAULT_DB_PATH), help="Repo-relative SQLite database path.") parser.add_argument("--schema", default=str(DEFAULT_SCHEMA_PATH), help="Repo-relative schema path.") parser.add_argument("--as-of", help="Archive timestamp. Defaults to current UTC time.") parser.add_argument("--skip-sync-state", action="store_true", help="Do not refresh sync state after marking gaps.") args = parser.parse_args() as_of = parse_as_of(args.as_of) reason = ( "No reproducible and redistribution-safe grey-market data source has been approved. " "HKEX does not publish an official grey-market feed; broker and third-party grey-market feeds " "are platform-specific or proprietary." ) notes = ( "Keep T2 blocked until the project has a licensed export, user-provided evidence file, " "or public historical source with clear reuse terms. Do not scrape proprietary grey-market " "feeds into the repo." ) with sqlite3.connect(args.db) as conn: conn.executescript(Path(args.schema).read_text(encoding="utf-8")) tickers = [ row[0] for row in conn.execute( """ SELECT ticker FROM ticker_sync_state WHERE stage = 'T2_grey_market' AND status = 'pending_due' ORDER BY ticker """ ) ] rows = [ ( f"{ticker}_T2_grey_market_source_strategy_required", ticker, "T2_grey_market", "grey_market_price_performance", reason, None, as_of, notes, ) for ticker in tickers ] conn.executemany( """ INSERT INTO data_gaps ( gap_id, ticker, stage, field_name, reason, expected_resolution_date, created_at, notes ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(gap_id) DO UPDATE SET field_name = excluded.field_name, reason = excluded.reason, expected_resolution_date = excluded.expected_resolution_date, created_at = excluded.created_at, notes = excluded.notes """, rows, ) export_snapshot(conn, "data_gaps") if not args.skip_sync_state: refresh_sync_state(args.db, args.schema, as_of) print("grey-market gaps marked") print(f"tickers marked: {len(rows)}") return 0 if __name__ == "__main__": raise SystemExit(main())