hk-ipo/rules/ipo_score_v0.yaml

version: ipo_score_v0
effective_date: 2026-06-15
status: initial_baseline
owner_skill: analyst

purpose: >
  Build the first transparent Hong Kong IPO subscription scoring model from
  archived project data. The model is a rules-plus-calibration baseline, not a
  black-box predictor. It separates T0 prospectus information from T1 allotment
  information so later facts do not leak into earlier decisions.

targets:
  primary: D1 return versus IPO offer price
  secondary:
    - D5 return versus IPO offer price
    - D20 return versus IPO offer price
    - D60 return versus IPO offer price

stage_policy:
  T0_prospectus:
    allowed_inputs:
      - offer_price_hkd
      - gross_proceeds_hkd_m
      - funds_raised_hkd_m
      - min_subscription_amount_hkd
      - public_offer_pct_initial
      - over_allotment_offer_shares
    forbidden_inputs:
      - allotment_results
      - public_oversubscription_times
      - international_oversubscription_times
      - post_listing_price_performance
  T1_allotment:
    allowed_inputs:
      - all_T0_inputs
      - public_oversubscription_times
      - international_oversubscription_times
      - valid_applications
      - successful_applications
      - final_hk_offer_shares
      - hk_offer_shares_initial
    forbidden_inputs:
      - grey_market_result
      - post_listing_price_performance
  T2_grey_market:
    status: disabled_until_reliable_source_exists
    note: >
      T2 is excluded from v0 because no reproducible, redistribution-safe
      grey-market data source has been approved.

score_components:
  T0_prospectus:
    offer_size_hkd_m:
      source: gross_proceeds_hkd_m, falling back to funds_raised_hkd_m
      points:
        missing: 0
        lt_300: -2
        gte_300_lt_800: 1
        gte_800_lt_2000: 4
        gte_2000_lt_5000: 3
        gte_5000: 2
    public_offer_pct_initial:
      points:
        missing: 0
        lte_0_05: 3
        gt_0_05_lte_0_10: 1
        gt_0_10: -1
    min_subscription_amount_hkd:
      points:
        missing: -1
        lt_3500: -1
        gte_3500_lt_10000: 2
        gte_10000: 1
    offer_price_hkd:
      points:
        missing: 0
        lt_1: -2
        gte_1_lt_5: 0
        gte_5_lt_30: 1
        gte_30_lt_100: 2
        gte_100: 1
    over_allotment:
      points:
        present: 1
        missing_or_zero: 0

  T1_allotment:
    public_oversubscription_times:
      points:
        missing: 0
        lt_10: -4
        gte_10_lt_100: -2
        gte_100_lt_1000: 6
        gte_1000_lt_5000: 13
        gte_5000: 15
    international_oversubscription_times:
      points:
        missing: 0
        lt_1: -2
        gte_1_lt_3: -1
        gte_3_lt_10: 1
        gte_10_lt_30: 6
        gte_30: 8
    valid_applications:
      points:
        missing: 0
        lt_10000: -2
        gte_50000_lt_100000: 1
        gte_100000_lt_200000: 3
        gte_200000: 5
    application_success_rate:
      definition: successful_applications / valid_applications
      points:
        missing: 0
        lte_0_10: 4
        gt_0_10_lte_0_30: 2
        gt_0_80: -2
    hk_offer_reallocation_multiple:
      definition: final_hk_offer_shares / hk_offer_shares_initial
      points:
        missing: 0
        gte_2_lt_3: 2
        gte_3: 4

decision_bands:
  T0_prospectus:
    t0_score_lt_1: weak_or_avoid
    t0_score_1_to_4: neutral
    t0_score_5_to_7: positive_watch
    t0_score_gte_8: strong_watch
  T1_allotment:
    total_score_lt_0: avoid
    total_score_0_to_9: avoid_or_wait
    total_score_10_to_17: watch_or_small
    total_score_18_to_25: selective_subscribe
    total_score_gte_26: high_conviction_subscribe

calibration_policy:
  method: empirical_bucket_rate
  primary_label: D1_return_pct > 0
  strong_label: D1_return_pct >= 10
  report_path: reports/2026-06-15_analysis_model_v0.md
  dataset_path: data/snapshots/analysis_model_v0_dataset.csv

known_limitations:
  - T0 has weaker discrimination than T1 because demand data is not available yet.
  - T1 structured demand facts are incomplete for older HTML-only allotment notices.
  - Industry labels and issuer fundamentals are not yet sufficiently structured for v0.
  - T2 grey-market signal is intentionally disabled until a reliable source exists.
  - Current D1 outcomes are from archived market data and should be reviewed for extreme corporate-action or data-vendor anomalies.