#!/usr/bin/env python3
"""
humoral_test.py — Tests the Galenic Degree Hypothesis for the Voynich operator.

Historical frame: The Galenic medical system (dominant 1300-1600 CE) classified
every herb and pharmaceutical by its primary quality (hot/cold/dry/moist) and
DEGREE (1-4). The Voynich's closed operator set {0,1,2,3} maps perfectly to
four Galenic degrees. This script tests whether the operator's distributional
properties are consistent with Galenic degree encoding.

Predictions under the Galenic hypothesis:
  - Sections treating different medical domains should have different degree profiles
  - Records (compound prescriptions) should balance degrees (mixing hot/cold)
  - Prefix classifiers should co-vary with degree (certain substance classes are
    hotter or colder by convention; e.g. aquatic = cold, dry seeds = hot)
  - Within recipes: degrees should not uniformly concentrate at one value
    (a balanced compound spans multiple degrees)

See REPORT.md Parts 17-24 for the underlying operator findings.
Artifacts: this script; uses data/ZL3b-n.txt via the path logic below.
"""
import re, math, random, os, sys
from collections import Counter, defaultdict

HERE = os.path.dirname(os.path.abspath(__file__))
ZL3B = os.path.join(HERE, "data", "ZL3b-n.txt")

if not os.path.exists(ZL3B):
    print("ERROR: data/ZL3b-n.txt not found. Run: bash fetch_data.sh")
    sys.exit(1)

SECTIONS = {
    'H': 'Herbal (raw plants)',
    'B': 'Biological/Balneological',
    'S': 'Stars/Recipes',
    'T': 'Text-only',
    'C': 'Cosmological',
    'Z': 'Zodiac',
    'P': 'Pharmaceutical (jars)',
    'A': 'Astronomical',
}

valid = lambda w: bool(re.fullmatch(r'[a-z]+', w))


def load_records(min_len=2):
    pages, cur, records = {}, None, []
    locus_re = re.compile(r'^<(f[0-9a-zA-Z]+)\.([^,>]+),\s*([@+=*~$&!])(\w+?)(\d*)>\s*(.*)$')
    hdr_re   = re.compile(r'^<(f[0-9a-zA-Z]+)>')
    for raw in open(ZL3B, encoding='utf-8', errors='replace'):
        raw = raw.rstrip('\n')
        if not raw or raw.startswith('#'):
            continue
        m = hdr_re.match(raw)
        if m and not locus_re.match(raw):
            cur = m.group(1)
            pages[cur] = dict(re.findall(r'\$(\w)=(\w+)', raw))
            continue
        m = locus_re.match(raw)
        if not m:
            continue
        page, _, _, ltype, _, text = m.groups()
        if not ltype.upper().startswith('P'):
            continue
        v = pages.get(page, {})
        text = re.sub(r'<!.*?>', '', text)
        text = re.sub(r'<->', '?', text)
        text = re.sub(r'<%>|<\$>|<@\w+>', '', text)
        text = re.sub(r'@\d+;', '?', text)
        for _ in range(4):
            text = re.sub(r'\[([^:\[\]]*):[^\[\]]*\]', r'\1', text)
        text = text.replace(',', '.').replace('!','').replace('%','')
        ws = [w for w in text.split('.') if w and valid(w)]
        if len(ws) < min_len:
            continue
        records.append({
            'words': ws,
            'page': page,
            'section': v.get('I', '?'),
            'lang': v.get('L', '?'),
            'hand': v.get('H', '?'),
            'quire': v.get('Q', '?'),
        })
    return records


def factor(w):
    """Returns (item_code, e_count, i_count). E-runs collapsed to single 'e', etc."""
    e = sum(len(m.group()) for m in re.finditer(r'e+', w))
    i = sum(len(m.group()) for m in re.finditer(r'i+', w))
    code = re.sub(r'e+', 'e', re.sub(r'i+', 'i', w))
    return code, e, i


def op(w):
    """Primary operator: e-run total (the dominant free variable, per Part 17)."""
    return factor(w)[1]


def prefix_class(w):
    """Classify word by leading morpheme (the domain-classifier slot, per Part 22)."""
    if w.startswith('qok') or w.startswith('qol'):
        return 'qok/qol (biological)'
    if w.startswith('qo'):
        return 'qo-'
    if w.startswith('ch'):
        return 'ch- (herbal)'
    if w.startswith('sh'):
        return 'sh- (herbal)'
    if w.startswith('ok') or w.startswith('ol'):
        return 'ok/ol'
    if w.startswith('ot'):
        return 'ot- (stars)'
    if w.startswith('o'):
        return 'o-'
    if w.startswith('da') or w.startswith('dy'):
        return 'd-'
    if w.startswith('s') and not w.startswith('sh'):
        return 's-'
    if w.startswith('l'):
        return 'l-'
    if w.startswith('a'):
        return 'a-'
    return 'other'


def pearson(xs, ys):
    n = len(xs)
    if n < 3:
        return 0.0
    mx, my = sum(xs)/n, sum(ys)/n
    num = sum((x-mx)*(y-my) for x, y in zip(xs, ys))
    dx = math.sqrt(sum((x-mx)**2 for x in xs))
    dy = math.sqrt(sum((y-my)**2 for y in ys))
    return num/(dx*dy) if dx*dy else 0.0


def z_score(obs, null_list):
    m = sum(null_list)/len(null_list)
    sd = (sum((v-m)**2 for v in null_list)/len(null_list))**0.5 or 1e-9
    return (obs - m)/sd, m, sd


# ─── Load ────────────────────────────────────────────────────────────────────
print("Loading records from ZL3b transliteration ...")
records = load_records()
print(f"  {len(records)} records loaded")
if not records:
    print("  No records found — is data/ZL3b-n.txt a valid IVTFF transliteration?")
    print("  Run: bash fetch_data.sh")
    sys.exit(1)
by_sec = defaultdict(list)
for r in records:
    by_sec[r['section']].append(r)
for sec in sorted(by_sec):
    print(f"  Section {sec} ({SECTIONS.get(sec,'?')}): {len(by_sec[sec])} records")

# ─── Test 1: Section Thermal Profile ─────────────────────────────────────────
print("\n" + "="*70)
print("TEST 1: GALENIC DEGREE PROFILE BY SECTION")
print("="*70)
print("Galenic prediction: each section treats a specific medical domain and")
print("should carry a characteristic degree profile (hot vs cold vs balanced).")
print()
print(f"  {'Section':<36} {'n':>6}  {'mean':>6}  {'°0':>5}  {'°1':>5}  {'°2':>5}  {'°3+':>5}")

sec_ops = {}
for sec in sorted(by_sec):
    vals = [op(w) for r in by_sec[sec] for w in r['words']]
    if not vals:
        continue
    n = len(vals); cnt = Counter(vals)
    mean = sum(vals)/n
    sec_ops[sec] = (vals, mean)
    print(f"  {SECTIONS.get(sec,sec):<36} {n:>6}  {mean:>6.3f}  "
          f"{100*cnt.get(0,0)/n:>4.0f}%  {100*cnt.get(1,0)/n:>4.0f}%  "
          f"{100*cnt.get(2,0)/n:>4.0f}%  {100*cnt.get(3,0)/n:>4.0f}%")

# Permutation test: does section significantly predict operator value?
all_pairs = [(r['section'], op(w)) for r in records for w in r['words']]
grand_mean = sum(v for _, v in all_pairs)/len(all_pairs)
observed_bv = sum(
    len(vs) * (sum(vs)/len(vs) - grand_mean)**2
    for vs in ([v for _, v in all_pairs if s == sec] for sec in set(s for s, _ in all_pairs))
    if vs
) / len(all_pairs)

rng = random.Random(42)
vals_only = [v for _, v in all_pairs]
secs_only = [s for s, _ in all_pairs]
null_bvs = []
for _ in range(1000):
    rng.shuffle(secs_only)
    sm = defaultdict(list)
    for s, v in zip(secs_only, vals_only):
        sm[s].append(v)
    nv = sum(len(vs)*(sum(vs)/len(vs)-grand_mean)**2 for vs in sm.values() if vs)/len(all_pairs)
    null_bvs.append(nv)
z, nm, nsd = z_score(observed_bv, null_bvs)
print(f"\n  Between-section variance: observed={observed_bv:.5f}  null={nm:.5f}±{nsd:.5f}  z={z:.1f}")

# ─── Test 2: Intra-Record Degree Balance (Humoral Composition) ────────────────
print("\n" + "="*70)
print("TEST 2: INTRA-RECORD DEGREE BALANCE")
print("="*70)
print("Galenic compound prescriptions mix hot and cold ingredients to achieve")
print("a target temperament. Prediction: within-record operator variance should")
print("exceed the null (random reassignment within section).")
print()
print(f"  {'Section':<36} {'n':>5}  {'var_obs':>8}  {'var_null':>8}  {'z':>5}")

for sec in sorted(by_sec):
    recs = [r for r in by_sec[sec] if len(r['words']) >= 3]
    if len(recs) < 10:
        continue
    # observed: mean within-record variance
    wrvars = []
    for r in recs:
        vals = [op(w) for w in r['words']]
        mv = sum(vals)/len(vals)
        wrvars.append(sum((v-mv)**2 for v in vals)/len(vals))
    obs_wrv = sum(wrvars)/len(wrvars)
    # null: shuffle operators within section, preserving record lengths
    all_ops = [op(w) for r in recs for w in r['words']]
    rng2 = random.Random(7)
    null_wrvs = []
    for _ in range(500):
        rng2.shuffle(all_ops)
        idx = 0
        nv_list = []
        for r in recs:
            n = len(r['words'])
            chunk = all_ops[idx:idx+n]; idx += n
            if len(chunk) >= 3:
                mv = sum(chunk)/len(chunk)
                nv_list.append(sum((v-mv)**2 for v in chunk)/len(chunk))
        if nv_list:
            null_wrvs.append(sum(nv_list)/len(nv_list))
    z2, nm2, nsd2 = z_score(obs_wrv, null_wrvs)
    print(f"  {SECTIONS.get(sec,sec):<36} {len(recs):>5}  {obs_wrv:>8.4f}  {nm2:>8.4f}  {z2:>5.1f}")

# ─── Test 3: Prefix × Degree Co-occurrence ────────────────────────────────────
print("\n" + "="*70)
print("TEST 3: CLASSIFIER PREFIX × OPERATOR DEGREE CO-OCCURRENCE")
print("="*70)
print("Galenic tradition: substance class (herb/aquatic/mineral) predicts thermal")
print("quality. Prediction: prefix classifiers correlate with operator degree.")
print()

pfx_ops = defaultdict(list)
for r in records:
    for w in r['words']:
        pfx_ops[prefix_class(w)].append(op(w))

grand_op_mean = sum(v for vs in pfx_ops.values() for v in vs) / sum(len(vs) for vs in pfx_ops.values())
print(f"  {'Prefix class':<28} {'n':>6}  {'mean':>6}  {'dev':>6}  {'°0':>5}  {'°1':>5}  {'°2':>5}  {'°3+':>5}")
for pfx in sorted(pfx_ops, key=lambda p: -len(pfx_ops[p])):
    vals = pfx_ops[pfx]
    if len(vals) < 30:
        continue
    n = len(vals); mean_v = sum(vals)/n; cnt = Counter(vals)
    dev = mean_v - grand_op_mean
    print(f"  {pfx:<28} {n:>6}  {mean_v:>6.3f}  {dev:>+6.3f}  "
          f"{100*cnt.get(0,0)/n:>4.0f}%  {100*cnt.get(1,0)/n:>4.0f}%  "
          f"{100*cnt.get(2,0)/n:>4.0f}%  {100*cnt.get(3,0)/n:>4.0f}%")

print(f"\n  Grand mean operator: {grand_op_mean:.3f}")
print("  Interpretation:")
print("    Galenic 'hot' substances (dry seeds, spices): predicted above grand mean")
print("    Galenic 'cold' substances (aquatic herbs, roots): predicted below grand mean")
print("    qok/qol (biological/bath) vs ch/sh (herbal) contrast tests")
print("    whether aquatic vs terrestrial domain maps to degree difference")

# ─── Test 4: Alchemical Stage Directionality ─────────────────────────────────
print("\n" + "="*70)
print("TEST 4: ALCHEMICAL STAGE DIRECTIONALITY WITHIN RECORDS")
print("="*70)
print("Alchemical hypothesis: records encode transformation stages.")
print("If records describe a purification process (nigredo→rubedo = 0→3),")
print("item operator values should systematically increase within a record.")
print("If records are balanced prescriptions, no directional trend is expected.")
print()

for sec in sorted(by_sec):
    recs = [r for r in by_sec[sec] if len(r['words']) >= 4]
    if len(recs) < 15:
        continue
    rises, falls, flats = 0, 0, 0
    for r in recs:
        ops_r = [op(w) for w in r['words']]
        mid = len(ops_r)//2
        fh = sum(ops_r[:mid])/mid
        sh = sum(ops_r[mid:])/len(ops_r[mid:])
        if sh > fh + 0.05:
            rises += 1
        elif sh < fh - 0.05:
            falls += 1
        else:
            flats += 1
    total = rises + falls + flats
    bias = (rises - falls)/total
    null_bias = 0  # symmetric if no directionality
    print(f"  {SECTIONS.get(sec,sec):<36} n={total:>4}  "
          f"↑{rises:>4} ({100*rises/total:.0f}%)  "
          f"↓{falls:>4} ({100*falls/total:.0f}%)  "
          f"→{flats:>4} ({100*flats/total:.0f}%)  bias={bias:+.3f}")

# ─── Test 5: Pharmaceutical vs Herbal degree elevation ───────────────────────
print("\n" + "="*70)
print("TEST 5: PHARMACEUTICAL SECTION DEGREE ELEVATION")
print("="*70)
print("Alchemy/pharmacy prediction: pharmaceutical section (prepared, refined")
print("plant extracts in jars) should carry higher operator degrees than the")
print("herbal section (raw plants), reflecting more processed substances.")
print()

for sec_a, sec_b in [('H', 'P'), ('H', 'S'), ('B', 'H'), ('S', 'H')]:
    if sec_a not in sec_ops or sec_b not in sec_ops:
        continue
    ops_a, mean_a = sec_ops[sec_a]
    ops_b, mean_b = sec_ops[sec_b]
    diff = mean_b - mean_a
    rng3 = random.Random(99)
    combined = ops_a + ops_b
    null_diffs = []
    for _ in range(1000):
        rng3.shuffle(combined)
        na, nb = combined[:len(ops_a)], combined[len(ops_a):]
        null_diffs.append(sum(nb)/len(nb) - sum(na)/len(na))
    z3, nm3, nsd3 = z_score(diff, null_diffs)
    label_a = SECTIONS.get(sec_a, sec_a)
    label_b = SECTIONS.get(sec_b, sec_b)
    direction = "↑" if diff > 0 else "↓"
    print(f"  {label_b[:28]:<28} vs {label_a[:28]:<28}  "
          f"Δmean={diff:+.3f}{direction}  z={z3:.1f}")

# ─── Summary ──────────────────────────────────────────────────────────────────
print("\n" + "="*70)
print("GALENIC/ALCHEMICAL SYNTHESIS")
print("="*70)
print("""
The Galenic degree system (hot/cold/dry/moist in degrees 1-4) predicts:
  1. Section-specific degree profiles          → tested above (Test 1)
  2. Within-record degree balance              → tested above (Test 2)
  3. Prefix-class vs degree co-occurrence      → tested above (Test 3)
  4. No directional staging within records     → tested above (Test 4)
  5. Pharmaceutical > herbal degree elevation  → tested above (Test 5)

Evidence supporting the Galenic reading:
  - Operator is a closed {0,1,2,3} set (exactly 4 Galenic degrees)
  - Section vocabulary is domain-locked (different medical domains use different words)
  - Prefix classifiers concentrate by section domain (Part 22)
  - Text describes attributes, not names (Part 23) → exactly as Galenic pharmacy works:
    ingredients are interchangeable if they share the same quality-degree profile

Alternative: alchemical stages (nigredo→albedo→citrinitas→rubedo = 0→3)
  → Falsifiable via Test 4: should show consistent directionality within records

Both readings agree on the core point: the operator encodes a GRADED PROPERTY
of the substance, not a random slot-fill. The 4-step scale is the decisive
structural feature; whether it is Galenic degrees, alchemical stages, or
another 4-category medieval property classification, all are consistent with
the manuscript's genre (15th-c medical-alchemical herbal compendium).

Artifacts: humoral_test.py; uses data/ZL3b-n.txt
""")
