{
  "title": "Ruchey listening comparisons",
  "description": "Replace the placeholder asset paths with exported WAV/MP3 files to turn this into a milestone-style listening presentation.",
  "comparisons": [
    {
      "kicker": "Reconstruction Quality",
      "title": "Same source phrase through successive reconstruction pipelines",
      "summary": "Use this row to show how the basic autoencoding path improved before adding prompt or translation behavior.",
      "takeaway": "noise floor, consonant sharpness, pitch stability, and whether the speaker still feels like the source",
      "clips": [
        {
          "label": "Source recording",
          "era": "anchor",
          "source": "assets/reconstruction/source.wav",
          "notes": "Original clean phrase or target reference."
        },
        {
          "label": "Early Ruchey decode",
          "era": "baseline",
          "source": "assets/reconstruction/early-ruchey.wav",
          "notes": "Keep a known rough checkpoint here so the quality delta is obvious."
        },
        {
          "label": "Ruchey + BridgeDiT + Flow2GAN",
          "era": "current",
          "source": "assets/reconstruction/current-flow2gan.wav",
          "notes": "Current best reconstruction stack for listening-test-quality exports."
        }
      ]
    },
    {
      "kicker": "Prompt Control",
      "title": "Prompt-conditioned voice transformation progression",
      "summary": "Use this row for transformations such as whispering, shouting, tired, nervous, or child/old-person prompts.",
      "takeaway": "prompt strength without identity collapse, rhythm drift, or over-smoothed consonants",
      "clips": [
        {
          "label": "Source",
          "era": "anchor",
          "source": "assets/prompt/source.wav",
          "notes": "Unedited input phrase."
        },
        {
          "label": "v13c CLSP SDEdit",
          "era": "demo",
          "source": "assets/prompt/v13c-clsp-sdedit.wav",
          "notes": "Export from the VoiceTransformDiT demo with chosen prompt and settings."
        },
        {
          "label": "Next checkpoint",
          "era": "next",
          "source": "assets/prompt/next-checkpoint.wav",
          "notes": "Drop the newer run here to show what changed."
        }
      ]
    },
    {
      "kicker": "Translation / Identity",
      "title": "Source, target, identity swap, and translator output",
      "summary": "Use this row to explain whether the translator is preserving content while moving identity or language cues.",
      "takeaway": "content intelligibility, target identity pull, source leakage, and unnatural formant shifts",
      "clips": [
        {
          "label": "Source reconstruction",
          "era": "source",
          "source": "assets/translation/source-recon.wav",
          "notes": "Source content reconstructed through the current stack."
        },
        {
          "label": "Target reconstruction",
          "era": "target",
          "source": "assets/translation/target-recon.wav",
          "notes": "Target reference or target speaker reconstruction."
        },
        {
          "label": "Translator output",
          "era": "model",
          "source": "assets/translation/translator-output.wav",
          "notes": "Model-generated output for the same content transfer test."
        }
      ]
    }
  ]
}