From 95a5153590f3e36d2325e9aab899e05f48b05f0b Mon Sep 17 00:00:00 2001 From: Callan Barrett Date: Wed, 10 Jun 2026 17:00:56 +0800 Subject: [PATCH 1/6] fix: widen native video centering range --- menu.sv | 19 ++++++++----------- rtl/native_video_timing.sv | 17 +++++++++-------- rtl/native_video_top.sv | 4 ++-- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/menu.sv b/menu.sv index 3c983925..ec9e4fcb 100644 --- a/menu.sv +++ b/menu.sv @@ -208,14 +208,13 @@ assign LED_POWER[0]= FB ? led[2] : act_cnt2[26] ? act_cnt2[25:18] > act_cnt2[7:0 `include "build_id.v" -// Image centering: 4-bit signed in OSD ordering 0,+1..+7,-8..-1 so that the -// power-on default (status bits = 0) maps to "no shift". Bit pattern matches -// 4-bit two's complement when reinterpreted as signed. +// Image centering options use signed two's-complement ordering with 0 first, +// so the power-on default (status bits = 0) selects the calibrated base timing. localparam CONF_STR = { "MENU;UART31250,MIDI;", "-;", - "O[13:10],H Offset,0,+1,+2,+3,+4,+5,+6,+7,-8,-7,-6,-5,-4,-3,-2,-1;", - "O[17:14],V Offset,0,+1,+2,+3,+4,+5,+6,+7,-8,-7,-6,-5,-4,-3,-2,-1;", + "O[15:10],H Offset,0,+1,+2,+3,+4,+5,+6,+7,+8,+9,+10,+11,+12,+13,+14,+15,+16,+17,+18,+19,+20,+21,+22,+23,+24,+25,+26,+27,+28,+29,+30,+31,-32,-31,-30,-29,-28,-27,-26,-25,-24,-23,-22,-21,-20,-19,-18,-17,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1;", + "O[19:16],V Offset,0,+1,+2,+3,+4,+5,+6,+7,-8,-7,-6,-5,-4,-3,-2,-1;", "-;", "V,v",`BUILD_DATE }; @@ -522,12 +521,10 @@ native_video_top native_video .enable (mode_zaparoo), .active (native_active), - // status[13:10] / status[17:14] are 4-bit fields whose bit pattern - // matches signed two's complement when the OSD enum is ordered - // 0,+1..+7,-8..-1 (see CONF_STR). $signed() makes the reinterpretation - // explicit at the port boundary. - .h_offset ($signed(status[13:10])), - .v_offset ($signed(status[17:14])) + // Status bit patterns match signed two's complement because CONF_STR orders + // each OSD enum as 0,+1..max,min..-1. $signed() makes that explicit here. + .h_offset ($signed(status[15:10])), + .v_offset ($signed(status[19:16])) ); // Cosine + LFSR fallback noise pattern, painted into the 320x240 active area diff --git a/rtl/native_video_timing.sv b/rtl/native_video_timing.sv index 6d46c8d3..56d44002 100644 --- a/rtl/native_video_timing.sv +++ b/rtl/native_video_timing.sv @@ -1,7 +1,6 @@ // Zaparoo native video timing: 320x240 at 15.734 kHz from 27 MHz / 4. -// h_offset/v_offset (signed -8..+7) shift the image by repartitioning -// front porch and back porch. H_TOTAL/V_TOTAL are invariant, so line -// rate and frame rate are unchanged regardless of offset values. +// h_offset/v_offset shift the image by repartitioning front porch and back +// porch. H_TOTAL/V_TOTAL are invariant, so line/frame rates stay unchanged. module native_video_timing ( @@ -10,8 +9,8 @@ module native_video_timing input wire reset, // Image centering: positive = shift right/down (FP shrinks, BP grows). - input wire signed [3:0] h_offset, // -8..+7 pixels (budget H_FP=14 / H_BP=63) - input wire signed [3:0] v_offset, // -8..+7 lines (budget V_FP=8 / V_BP=11) + input wire signed [5:0] h_offset, // -32..+31 pixels (budget H_FP=38 / H_BP=39) + input wire signed [3:0] v_offset, // -8..+7 lines (budget V_FP=8 / V_BP=11) output reg hsync, output reg vsync, @@ -25,9 +24,11 @@ module native_video_timing ); localparam [9:0] H_ACTIVE = 10'd320; -localparam [9:0] H_FP = 10'd14; +// 38/32/39 keeps total blanking fixed while moving the default image 24 px +// left from the earlier CRT-specific 14/32/63 porch split. +localparam [9:0] H_FP = 10'd38; localparam [5:0] H_SYNC = 6'd32; -localparam [9:0] H_BP = 10'd63; +localparam [9:0] H_BP = 10'd39; localparam [9:0] H_TOTAL = 10'd429; // V blanking rebalanced from 6/3/13 to 8/3/11 to give symmetric ±8 budget @@ -40,7 +41,7 @@ localparam [8:0] V_TOTAL = 9'd262; // Sync starts shift with the offset; two's-complement subtraction in // unsigned arithmetic yields the correct result at both ends of the range. -wire [9:0] H_SYNC_START = H_ACTIVE + (H_FP - {{6{h_offset[3]}}, h_offset}); +wire [9:0] H_SYNC_START = H_ACTIVE + (H_FP - {{4{h_offset[5]}}, h_offset}); wire [9:0] H_SYNC_END = H_SYNC_START + H_SYNC; wire [8:0] V_SYNC_START = V_ACTIVE + (V_FP - {{5{v_offset[3]}}, v_offset}); wire [8:0] V_SYNC_END = V_SYNC_START + V_SYNC; diff --git a/rtl/native_video_top.sv b/rtl/native_video_top.sv index 3e3127fa..897b1c84 100644 --- a/rtl/native_video_top.sv +++ b/rtl/native_video_top.sv @@ -31,8 +31,8 @@ module native_video_top input wire enable, output wire active, - // OSD image centering: signed -8..+7 pixels/lines, 0 = no shift. - input wire signed [3:0] h_offset, + // OSD image centering: 0 = default porch split. + input wire signed [5:0] h_offset, input wire signed [3:0] v_offset ); From 3a19e6db7e6de55053b3f410615cc6643cfe4e2e Mon Sep 17 00:00:00 2001 From: Callan Barrett Date: Wed, 10 Jun 2026 18:03:12 +0800 Subject: [PATCH 2/6] fix: use safer native video centering steps --- menu.sv | 12 ++++++------ rtl/native_video_timing.sv | 10 +++++----- rtl/native_video_top.sv | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/menu.sv b/menu.sv index ec9e4fcb..5f90dac9 100644 --- a/menu.sv +++ b/menu.sv @@ -213,8 +213,8 @@ assign LED_POWER[0]= FB ? led[2] : act_cnt2[26] ? act_cnt2[25:18] > act_cnt2[7:0 localparam CONF_STR = { "MENU;UART31250,MIDI;", "-;", - "O[15:10],H Offset,0,+1,+2,+3,+4,+5,+6,+7,+8,+9,+10,+11,+12,+13,+14,+15,+16,+17,+18,+19,+20,+21,+22,+23,+24,+25,+26,+27,+28,+29,+30,+31,-32,-31,-30,-29,-28,-27,-26,-25,-24,-23,-22,-21,-20,-19,-18,-17,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1;", - "O[19:16],V Offset,0,+1,+2,+3,+4,+5,+6,+7,-8,-7,-6,-5,-4,-3,-2,-1;", + "O[13:10],H Offset,0,+2,+4,+6,+8,+10,+12,+14,-16,-14,-12,-10,-8,-6,-4,-2;", + "O[17:14],V Offset,0,+1,+2,+3,+4,+5,+6,+7,-8,-7,-6,-5,-4,-3,-2,-1;", "-;", "V,v",`BUILD_DATE }; @@ -521,10 +521,10 @@ native_video_top native_video .enable (mode_zaparoo), .active (native_active), - // Status bit patterns match signed two's complement because CONF_STR orders - // each OSD enum as 0,+1..max,min..-1. $signed() makes that explicit here. - .h_offset ($signed(status[15:10])), - .v_offset ($signed(status[19:16])) + // H offset is a 4-bit signed OSD value doubled into 2-pixel steps. + // V offset is a normal 4-bit signed OSD value in 1-line steps. + .h_offset ($signed({status[13:10], 1'b0})), + .v_offset ($signed(status[17:14])) ); // Cosine + LFSR fallback noise pattern, painted into the 320x240 active area diff --git a/rtl/native_video_timing.sv b/rtl/native_video_timing.sv index 56d44002..02ea58da 100644 --- a/rtl/native_video_timing.sv +++ b/rtl/native_video_timing.sv @@ -9,7 +9,7 @@ module native_video_timing input wire reset, // Image centering: positive = shift right/down (FP shrinks, BP grows). - input wire signed [5:0] h_offset, // -32..+31 pixels (budget H_FP=38 / H_BP=39) + input wire signed [4:0] h_offset, // -16..+14 pixels (budget H_FP=26 / H_BP=51) input wire signed [3:0] v_offset, // -8..+7 lines (budget V_FP=8 / V_BP=11) output reg hsync, @@ -24,11 +24,11 @@ module native_video_timing ); localparam [9:0] H_ACTIVE = 10'd320; -// 38/32/39 keeps total blanking fixed while moving the default image 24 px +// 26/32/51 keeps total blanking fixed while moving the default image 12 px // left from the earlier CRT-specific 14/32/63 porch split. -localparam [9:0] H_FP = 10'd38; +localparam [9:0] H_FP = 10'd26; localparam [5:0] H_SYNC = 6'd32; -localparam [9:0] H_BP = 10'd39; +localparam [9:0] H_BP = 10'd51; localparam [9:0] H_TOTAL = 10'd429; // V blanking rebalanced from 6/3/13 to 8/3/11 to give symmetric ±8 budget @@ -41,7 +41,7 @@ localparam [8:0] V_TOTAL = 9'd262; // Sync starts shift with the offset; two's-complement subtraction in // unsigned arithmetic yields the correct result at both ends of the range. -wire [9:0] H_SYNC_START = H_ACTIVE + (H_FP - {{4{h_offset[5]}}, h_offset}); +wire [9:0] H_SYNC_START = H_ACTIVE + (H_FP - {{5{h_offset[4]}}, h_offset}); wire [9:0] H_SYNC_END = H_SYNC_START + H_SYNC; wire [8:0] V_SYNC_START = V_ACTIVE + (V_FP - {{5{v_offset[3]}}, v_offset}); wire [8:0] V_SYNC_END = V_SYNC_START + V_SYNC; diff --git a/rtl/native_video_top.sv b/rtl/native_video_top.sv index 897b1c84..36cbedb3 100644 --- a/rtl/native_video_top.sv +++ b/rtl/native_video_top.sv @@ -32,7 +32,7 @@ module native_video_top output wire active, // OSD image centering: 0 = default porch split. - input wire signed [5:0] h_offset, + input wire signed [4:0] h_offset, input wire signed [3:0] v_offset ); From 5db804a02a3b17505ceada5d4aaf269be2352d09 Mon Sep 17 00:00:00 2001 From: Callan Barrett Date: Thu, 11 Jun 2026 11:37:22 +0800 Subject: [PATCH 3/6] feat: broadcast-geometry native video with PAL and 480i modes Implements the FPGA side of docs/native-video-plan.md (all phases; the zaparoo-launcher side comes separately). - PLL output 1 retargeted 27.027027 -> 27.000000 MHz, giving exact NTSC (15734.27 Hz) and PAL (15625.00 Hz) line rates. - native_video_timing rebuilt around per-mode parameter sets: 352x240p60 (Switchres ntsc porches), 720x480i60 (CEA-861, 262+263-line fields with half-line vsync offset on the odd field), 352x288p50 (Switchres pal). Mode and trims latch at the field wrap; offsets clamp to -8..+8 px / -8..+2 lines. Field flips at the start of vblank so the reader's line preload always fetches the parity about to be displayed. - native_video_reader parses DDR control word1: magic 0x5A50 selects the v2 layout (buffers +0x1000/+0x180000, tight stride) and carries mode and h/v offsets; without magic the legacy 320x240 layout is scanned centered with 16-px side bars. word0 == 0 and DDR timeouts now clear frame_ready so the core reverts to the noise pattern instead of scanning a dead buffer. 480i fetches source line 2*line+field as two 180-beat bursts; line FIFO deepened to 1024 words for the interlaced 2-line preload. - menu.sv: OSD video options removed (CONF_STR back to stock), ce_pix divider switches /4 / /2 by mode, VGA_F1 driven by the field bit. - Self-checking iverilog testbenches in tb/ (run via tb/run.sh) verify all mode timings in exact pixel ticks, the half-line interlace (both vsync intervals exactly 262.5 lines), offset clamping, the v2/legacy fetch sequences, double buffering, writer-stop reversion, and timeout recovery. - Readme documents the native video output and the forced_scandoubler / vga_scaler=1 note. --- Readme.md | 13 + docs/native-video-plan.md | 482 +++++++++++++++++++++++++++++++++++ menu.sv | 61 +++-- rtl/native_video_reader.sv | 206 +++++++++++---- rtl/native_video_timing.sv | 141 +++++++--- rtl/native_video_top.sv | 71 ++++-- rtl/pll/pll_0002.v | 2 +- tb/dcfifo_sim.sv | 70 +++++ tb/native_video_reader_tb.sv | 291 +++++++++++++++++++++ tb/native_video_timing_tb.sv | 229 +++++++++++++++++ tb/run.sh | 13 + 11 files changed, 1449 insertions(+), 130 deletions(-) create mode 100644 docs/native-video-plan.md create mode 100644 tb/dcfifo_sim.sv create mode 100644 tb/native_video_reader_tb.sv create mode 100644 tb/native_video_timing_tb.sv create mode 100755 tb/run.sh diff --git a/Readme.md b/Readme.md index 224bb29d..e6c5d4ce 100644 --- a/Readme.md +++ b/Readme.md @@ -1,5 +1,18 @@ # Startup core for MiSTer +## Native CRT video (this fork) + +This fork drives the analog output with a native 15 kHz signal generated by +the core itself: 352x240p60 (NTSC) by default, with 720x480i60 and 352x288p50 +(PAL) selectable by the ARM-side launcher through a DDR control block (see +`docs/native-video-plan.md`). There are no video options in the OSD — the +mode and the H/V centering trims are owned by the launcher; the core shows +its noise pattern until the launcher publishes frames. + +Note: `forced_scandoubler` (the "Forced scandoubler" MiSTer.ini setting) is +ignored by this core — the analog output is always 15 kHz. If your VGA output +feeds a 31 kHz-only monitor, set `vga_scaler=1` in MiSTer.ini instead. + * **ESC** - Back/Options * **Enter** - OK * **F1** - Cycle Background/Wallpaper diff --git a/docs/native-video-plan.md b/docs/native-video-plan.md new file mode 100644 index 00000000..8894b4fd --- /dev/null +++ b/docs/native-video-plan.md @@ -0,0 +1,482 @@ +# Native CRT video: findings, recommendations, and implementation plan + +**Status:** proposal — agreed direction, not yet implemented +**Scope:** this core (Menu_MiSTer fork) + `zaparoo-launcher` (the ARM-side writer) +**Date:** 2026-06-11 + +This document explains why the native video output currently only looks right +on the CRT it was calibrated on, what a "standard" 15 kHz signal actually is, +and a phased plan to fix geometry (240p), add PAL (288p50), and add a +high-resolution interlaced mode (480i60). + +--- + +## 1. Current architecture + +The fork replaces the Menu core's noise-pattern video with: + +| Piece | File | Role | +|---|---|---| +| Timing generator | `rtl/native_video_timing.sv` | Produces hsync/vsync/blanking/DE at 15 kHz from a 27.027 MHz clock ÷ 4 | +| DDR reader | `rtl/native_video_reader.sv` | Polls a control word in DDR3 each vblank, streams the active framebuffer line-by-line through a clock-crossing FIFO | +| Wrapper | `rtl/native_video_top.sv` | Wires the two together | +| Mode mux | `menu.sv` | `status[9]` selects noise pattern vs. framebuffer; OSD H/V offset trims | + +ARM side (`zaparoo-launcher/src/app/native_video_writer.cpp`): Qt renders the +UI to `/dev/fb0` (320x240 RGBX8888, set up via `vmode`), and a copy thread +memcpys each frame into one of two DDR buffers, then publishes it: + +``` +0x3A000000 control word: (frame_counter << 2) | active_buffer +0x3A000100 buffer 0: 320x240 RGBX8888, tight stride (1280 B) +0x3A04B100 buffer 1 +mmap region: 0xA0000 (640 KB) +``` + +The FPGA reads the control word at the start of each vblank; when the counter +changes it switches to the published buffer (double buffering, no tearing). +Byte order is swapped in RTL (`output_pixel`) so the app can memcpy linuxfb +BGRX rows without repacking. + +This path deliberately bypasses MiSTer's scaler (`docs/native-core-poc.md` in +zaparoo-launcher): analog output comes straight from the core's `VGA_*` +signals. The framework (`sys/vga_out.sv`) only applies gamma/csync — it does +not retime anything — so **whatever timing this core generates is exactly what +the CRT receives.** The HDMI side is unaffected; ascal rescales any input +timing. + +--- + +## 2. Background: how a CRT decides where and how big the picture is + +A CRT has no concept of pixels or resolutions. Each scanline of the signal is: + +``` + sync pulse → back porch → active video → front porch → next sync + (4.7 µs) (delay) (the picture) (delay) +``` + +The sync pulse is the only positional reference the TV has. The set's +deflection circuitry is factory-adjusted so that the **broadcast-standard** +active region — about **52.7 µs** of the 63.6 µs NTSC line — slightly +*overfills* the visible tube. That deliberate overfill is **overscan**: +typically 3–8% of the picture is cropped at each edge, varying from set to set +and drifting with age. The same applies vertically, measured in scanlines. + +Consequences: + +- If your active video is **shorter than 52.7 µs**, the picture is narrower + than the tube — black side borders that no porch adjustment can remove. +- If your active video starts **later than ~9.4 µs after the sync edge** + (4.7 µs sync + 4.7 µs back porch), the picture sits right of center. +- Because overscan varies per set, anything important drawn near the edges + will be cut off on *some* sets no matter what you do. + +Broadcasters solved the per-set variation problem decades ago: **fill the full +standard active area, and keep important content inside "safe areas"** +(SMPTE SD guidelines: *action safe* = the central 90%, *title safe* = the +central 80%). The picture bleeds past every tube's edges; the content never +does. This is the "safe values" approach this plan adopts. + +For calibration intuition: real consoles (NES/SNES/Genesis) output ~47.7 µs of +active video — about 10% narrower than broadcast — which is why console games +show small side borders on a well-calibrated set. GroovyMAME/Switchres, the +de-facto reference for driving CRTs from emulators, instead generates +modelines that stretch the emulated image across the full 52.7 µs. We follow +the Switchres model. + +--- + +## 3. Findings: the current signal vs. the standard + +Measured from HEAD of `fix/native-video-centering` +(pixel clock = 27.027 MHz ÷ 4 = 6.757 MHz, H total 429 px, V total 262 lines): + +| Parameter | Current (HEAD) | NTSC standard | Verdict | +|---|---|---|---| +| Line rate | **15 750 Hz** | 15 734.26 Hz | Wrong PLL: 27.027 MHz is the 1.001 NTSC factor applied *backwards*. Plain **27.000 MHz** with the same ÷4 and 429-px line gives exactly 15 734.27 Hz (27 000 000 / 1716). The "15.734 kHz" comments in the code are aspirational, not true. | +| Field rate | 60.12 Hz | 59.94–60.05 | Follows from the PLL error. Harmless on CRTs but off-spec. | +| H active | 320 px = **47.4 µs** | **52.66 µs** | ~10% too narrow. This is the "too small" complaint, and it is unfixable by porch tuning. | +| H sync→active delay | 83 px = 12.3 µs | 9.4 µs | For a 47.4 µs-wide image, *centered* would be 12.0 µs — so HEAD is now roughly centered. The original Codex porches (FP/sync/BP = 14/32/63) gave **14.1 µs ≈ 5% right shift** — the "offset right on everyone else's CRT" complaint. | +| V geometry | 240 active; vsync at line 248 (FP 8) | ~241 visible; vsync at line 243 (FP 3) | Picture sits ~5 lines high of standard. | + +History of the H porch split (FP/sync/BP in pixels): + +- `061a888` (Codex original): **14/32/63** — calibrated to one specific CRT, + ~5% right of standard for everyone else. +- `95a5153`: 38/32/39 — overcorrected ~9 px left of centered. +- `3a19e6d` (HEAD): **26/32/51** — within ~2 px of centered *for a 47.4 µs + image*. Centering is now fine; width is not. + +These commits also widened the OSD H offset range to ±16 px in 2-px steps to +chase per-CRT centering. That widening is deliberately reverted by this plan: +once the geometry is standard, the trim is a nicety, and the supported range +goes back to **±8 px in 1-px steps** (carried in the control word, not the +OSD — see section 5.1). + +**Key insight:** the OSD H/V offset options treat the symptom. A 47.4 µs +picture can never fill a tube calibrated for 52.7 µs, and any porch split +that's perfect for one CRT is wrong on the next. The fix is broadcast +geometry (section 4) plus safe-area UI rules (section 6). + +### Other defects found during review + +1. **PAL is silently dropped.** `wire PAL = status[4]` in `menu.sv` is now + dead; the old noise generator honored it. 50 Hz-only CRTs get an NTSC + signal. (Addressed by Phase B below.) +2. **`forced_scandoubler` is ignored.** Users whose VGA output feeds a + 31 kHz-only monitor previously got a doubled signal from the menu core; + now they must set `vga_scaler=1` in MiSTer.ini. Acceptable for a + CRT-targeted fork, but it should be stated in the README. +3. **Reader never falls back when the writer stops.** `stopNativeVideoWriter()` + zeroes the control word, but `frame_ready` stays latched and the core scans + the stale (black) buffer forever instead of reverting to the noise pattern. + `ctrl == 0` should clear `frame_ready`. +4. **FIFO preload is at its safe maximum already.** The reader preloads 2 + lines during vblank then fetches one line per scanline. Note for Phase A: + at the new 176-word line length, preloading a 3rd line would overflow the + 512-word FIFO mid-frame (peak occupancy ~368 words with 2-line preload; + 3-line preload peaks above 512 and `overflow_checking` silently drops + writes). Keep 2 lines, or deepen the FIFO to 1024 if more margin is wanted. +5. Reader timeout paths (`ST_WAIT_CTRL`/`ST_WAIT_LINE` → `ST_IDLE`) also leave + `frame_ready` stale; same fix as (3). + +--- + +## 4. Target timings + +Everything derives from **one PLL change**: output 1 of `rtl/pll/pll_0002.v` +goes from 27.027027 MHz to **27.000000 MHz** — the universal SD video clock +(it is exactly 1716 × NTSC line rate and 1728 × PAL line rate). + +| Mode | ce_pix | H total | H active / FP / sync / BP (px) | V total | V active / FP / sync / BP (lines) | Line rate | Refresh | +|---|---|---|---|---|---|---|---| +| **0: 240p60** (default) | 27 ÷ 4 = 6.75 MHz | 429 | **352** / 12 / 32 / 33 | 262 | **240** / 3 / 3 / 16 | 15 734.27 Hz | 60.05 Hz | +| **1: 480i60** | 27 ÷ 2 = 13.5 MHz | 858 | **720** / 19 / 62 / 57 | 525 (262+263 fields) | 240 / 4 / 3 / 15–16 per field | 15 734.27 Hz | 59.94 Hz interlaced | +| **2: 288p50** (PAL) | 6.75 MHz | 432 | **352** / 11 / 32 / 37 | 312 | **288** / 3 / 3 / 18 | 15 625.00 Hz | 50.08 Hz | + +Where these numbers come from: + +- **Switchres monitor presets** (`monitor.cpp`, the GroovyMAME engine): + - `ntsc`: 15 734.26 Hz; H porches 1.5 / 4.7 / 4.7 µs; V 3 / 3 / 15 lines. + - `pal`: 15 625 Hz; H porches 1.5 / 4.7 / 5.8 µs. +- **CEA-861 720x480i**: H total 858 @ 13.5 MHz, FP 19 / sync 62 / BP 57. +- **SMPTE 170M**: 63.556 µs line, 10.9 µs blanking, 52.66 µs active. + +The H porch pixel values above are the preset µs values converted at the pixel +clock, nudged by ≤ 0.3 µs so the active width sits centered in the standard +window. Sanity checks: 352+12+32+33 = 429; 352+11+32+37 = 432; 720+19+62+57 = 858. + +Why these active sizes: + +- **352 px @ 6.75 MHz = 52.15 µs ≈ 99% of NTSC standard active width** (and + ~100% of PAL's 52 µs). The picture fills every screen edge-to-edge with + normal overscan crop. 352x240 / 352x288 are standard SIF resolutions; + pixel aspect ratio is the BT.601-classic **10:11** (~9% narrower than + square — fine to ignore for a UI, but stated for completeness). +- **PAL gets 288 active lines, not 240.** PAL tubes show ~288 lines; a + 240-line picture at 50 Hz would be visibly undersized vertically. The app + renders 352x288 in PAL mode. +- **480i uses the CEA-861 numbers verbatim** — the most universally accepted + SD interlaced timing in existence. + +### 480i specifics + +Interlacing is *not* just doubling the line count. The 525-line frame is two +fields of 262 and 263 lines, and the **odd field's vsync must be asserted half +a scanline (429 ce_pix clocks at 13.5 MHz) later** than the even field's. +That half-line offset is what makes the CRT draw the second field's lines +*between* the first field's — without it both fields land on the same +scanlines ("line pairing") and you get 240p with combing. + +MiSTer framework support is already there: + +- `VGA_F1` (field number) is a standard core output — currently hardwired to + `0` in `menu.sv`. It must toggle per field in 480i. +- `sys/sys_top.v` wires `VGA_F1` → ascal's `i_fl`; ascal auto-detects + interlace and deinterlaces for HDMI, so HDMI users keep working. +- The analog path passes core sync through untouched; csync generation in + `sys_top.v` handles interlaced cores today (PSX, Saturn, Genesis all output + real 480i this way). +- Reference implementation for the half-line trick: + `MiSTer-devel/PSX_MiSTer rtl/gpu_videoout_async.vhd` (search "half line + later"). + +--- + +## 5. DDR contract v2 + +Designed now so Phases A–C don't break each other or deployed frontends. +Versioned via a magic value; layout sized for the largest mode: + +``` +0x3A000000 word0: (frame_counter << 2) | active_buffer (unchanged) +0x3A000004 word1: [31:16] magic 0x5A50 ("ZP") + [15:8] h_offset, signed, pixels (+ = right; core honors −8…+8) + [7:4] v_offset, signed, lines (+ = down; core honors −8…+2) + [3:0] mode: 0 = 352x240 @ 60p (NTSC) + 1 = 720x480 @ 60i + 2 = 352x288 @ 50p (PAL) +0x3A001000 buffer 0 (page-aligned; sized for max mode: 720*480*4 = 1.35 MB) +0x3A180000 buffer 1 +mmap region: 0x300000 (3 MB) +stride: always tight, width * 4 bytes +``` + +Design points: + +- The reader already fetches the control word as one 64-bit DDR beat and + discards the top half — **word1 costs nothing extra to read**. One read per + vblank picks up frame counter, buffer index, mode, and offset trims + atomically. +- **The control block replaces every OSD video option** (see section 5.1): + there is no CRT-mode toggle and no OSD offset menu. A valid magic plus a + changing frame counter *is* the mode signal — the core shows the noise + pattern until the launcher publishes frames and reverts when word0 clears. + Offsets come from word1 and are owned by a calibration screen in the + launcher. +- Offsets and mode cross from the DDR clock domain into the video timing + domain as quasi-static values: two-flop synchronize and latch them at the + frame boundary (`new_frame`) so a mid-frame update can't corrupt sync. RTL + clamps offsets to the porch budget of the active mode (effective FP/BP + never < 2 px / 1 line), so a buggy or out-of-range value degrades to a + saturated shift, never a broken signal. +- **Legacy compatibility:** if word1 has no magic, the core treats the region + as today's layout (320x240 buffers at +0x100 / +0x4B100) and scans it + centered in the 352-px active area with black side bars (16 px each side). + An already-deployed launcher keeps working against the new core; the new + launcher's fb-geometry validation already self-disables cleanly against an + old core. No flag day. +- Mode changes apply at frame boundaries. Modes 0↔1 keep the same line rate, + so the CRT re-locks almost instantly; switching to/from PAL is a bigger + retune (50↔60 Hz) and takes a moment, as on real hardware. +- Address-space safety: MiSTer reserves 0x20000000+ of DDR for the FPGA side; + 0x30000000–0x3FFFFFFF is core-owned and the menu core uses none of it + elsewhere. The 3 MB region at 0x3A000000 conflicts with nothing (the + framework scaler framebuffers live at 0x20000000+). +- In 480i the FPGA reads source line `vcount*2 + field`, so the app renders + one normal progressive 720x480 frame — no field-splitting on the ARM side. + +DDR bandwidth is a non-issue: worst case (480i) is 720×480×4 B × 60 ≈ 80 MB/s +of sequential bursts against a multi-GB/s DDR3 port that nothing else in the +menu core touches. + +### 5.1 Removing the OSD video options entirely + +Question raised during review: can the "Video" section / second OSD page go +away, with the CRT mode toggle and the H/V offset trims moving into the ARM +launcher? **Yes — and it simplifies the core.** Every OSD video option maps +onto something the v2 control block already carries: + +| OSD option today | Replacement | +|---|---| +| CRT/native mode toggle (`status[9]`) | Implicit: valid magic + advancing frame counter in the control block ⇒ native scanout; word0 = 0 or stale ⇒ noise pattern. The launcher "turns on CRT mode" simply by publishing frames. `status[9]` and its CONF_STR entry are deleted. | +| H Offset list (`status[13:10]`) | `word1[15:8]` signed pixel trim (−8…+8, 1-px steps), set from a calibration screen in the launcher (arrow keys, live preview), persisted in the launcher's own config. | +| V Offset list (`status[17:14]`) | `word1[7:4]` signed line trim (−8…+2), same screen. | + +`CONF_STR` shrinks back to the stock menu core entry +(`"MENU;UART31250,MIDI;-;V,v"` + build date): one page, no Video section. The +core stops using `status[]` for video entirely. + +Why this is the right direction, beyond decluttering: + +- **One contract, one owner.** Mode and trims live next to the frames they + describe, set by the same process that renders them, read atomically in the + same 64-bit beat. No second control path through hps_io status bits. +- **Better calibration UX.** The launcher can draw a border test pattern + *while* the user nudges offsets — the OSD lists couldn't show the effect on + a full-bleed image, and 16-entry enum lists are a clumsy way to express + "nudge left a bit". Per-device persistence lives with the rest of the + launcher's config instead of MiSTer's core-config blob. +- **Fewer moving parts in RTL.** The offset inputs move from + `status`-decoding in `menu.sv` to the reader's already-synchronized control + parse; the OSD enum↔signed-value mapping tricks disappear. + +Trade-offs / notes: + +- A user running the **legacy (pre-v2) launcher** gets no trims (word1 absent + → offsets = 0). Acceptable: the new default timing is standard, trims are a + nicety, and legacy mode is compat-only. +- If the framebuffer path is off (noise pattern), there is nothing to + calibrate against — also fine, calibration belongs in the app. +- The MiSTer OSD overlay itself (main menu, file browser) is untouched; this + removes only the core's *option entries*, not the OSD. + +**Rejected alternative:** having the ARM app poke core status bits through a +patched Main_MiSTer (the Zaparoo_MiSTer fork could add a command for it). +Works, but spreads the video contract across three codebases and a Main fork +that must track upstream, for zero functional gain over the DDR words the +core already reads every vblank. + +--- + +## 6. App-side rules (zaparoo-launcher) + +These are as much a part of the fix as the RTL — geometry alone doesn't solve +"every CRT crops differently": + +1. **Render full-bleed.** Background art/color must reach all four edges of + the framebuffer; the outer few percent will be cropped on most sets and + visible on a few. +2. **Safe areas** (SMPTE SD guidelines): + - All interactive/meaningful content inside the central **90%** + (*action safe*: ~317x216 of 352x240, ~317x259 of 352x288, ~648x432 of + 720x480). + - Text you must be able to read inside the central **80%** + (*title safe*: ~282x192 / ~282x230 / ~576x384). +3. **Pixel aspect ratio is 10:11** (pixels slightly narrower than square) in + all three modes. A perfect circle needs ~10% more width in pixels. Safe to + ignore for boxes-and-text UI; matters if rendering logos/art that must not + look squished. +4. **480i flicker discipline:** every scanline is repainted 30 times/second, + so 1-px horizontal lines and fine text shimmer. Use ≥2 px horizontal + strokes, avoid hard 1-px horizontal edges, or apply a mild vertical blur + (the standard trick in console-era 480i dashboards). The existing CRT + typography rules in `docs/native-core-poc.md` (integer snapping, bitmap + fonts) stay in force. +5. **Own the centering trims** (section 5.1): a calibration screen that draws + an edge/border test pattern and lets the user nudge H/V offsets with live + preview, publishing them via control word1 and persisting them in the + launcher config. Defaults are zero — the standard timing is the centering + mechanism; trims only compensate for miscentered sets. + +--- + +## 7. Implementation plan + +### Phase A — broadcast-geometry 240p (the main fix) + +FPGA (this repo): + +1. **`rtl/pll/pll_0002.v`**: `output_clock_frequency1` 27.027027 MHz → + `27.000000 MHz`. (Same single-line style as the earlier 20→27.027 change; + no other PLL params move.) +2. **`rtl/native_video_timing.sv`**: mode-0 constants — H 352/12/32/33, + V 240/3/3/16. Structure the constants as per-mode parameter sets selected + by a `mode` input (tied to 0 until Phases B/C) so later modes are additive. + Offset budgets change: positive H offset eats the now-small 12-px front + porch. **Trim range is deliberately reverted to ±8 px H, 1-px steps** + (this branch had widened it to ±16 in 2-px steps while the porches were + the centering mechanism — with broadcast-fill geometry the trim is a + nicety, and ±8 px ≈ ±1.2 µs is plenty). V range −8…+2 lines. RTL clamps + to these ranges and additionally never lets effective FP/BP drop below + 2 px / 1 line, so out-of-range word1 values saturate instead of breaking + sync. +3. **`rtl/native_video_reader.sv`**: + - Parse word1 (`ddr_dout[63:32]`) in `ST_WAIT_CTRL`: magic present → v2 + layout (buffers at word addresses 0x07400200 / 0x07430000, line burst + 176 words) and extract mode + h/v offsets; absent → legacy layout + (0x07400020 / 0x07409620, 160 words, offsets 0) displayed centered with + 16-px black bars (needs `hcount` from the timing module, already + exported but unconnected). Forward the synchronized offsets to the + timing module, latched at `new_frame`. + - `word0 == 0` → clear `frame_ready` and `first_frame_loaded` → core + reverts to the noise pattern (fixes defects 3/5 in section 3). + - Keep the 2-line preload (see defect 4 — it's already at the FIFO's safe + maximum); optionally deepen the FIFO to 1024 words for margin. +4. **`menu.sv`**: remove all video options from `CONF_STR` (back to the stock + `"MENU;UART31250,MIDI;-;V,v"` + build date — one OSD page, no Video + section); delete `status[9]` / `status[17:10]` decoding and the offset + wiring from `hps_io` (offsets now arrive via the reader's control parse, + section 5.1); correct the stale "15.734 kHz" comments (true again after + the PLL fix); README note about `forced_scandoubler`/`vga_scaler`. +5. **Testbench before synthesis** (see section 8). + +Frontend (`zaparoo-launcher`): + +6. `--crt` path sets fb0 to **352x240** 32bpp (`vmode -r 352 240 rgb32` + equivalent); writer constants: width 352, stride 1408, frame size 0x52800, + buffers at +0x1000 / +0x180000, region 0x300000; write magic + mode + + offset word on init (offsets from saved config, default 0) and clear both + words on stop. +7. UI safe-area pass per section 6; calibration screen for the H/V trims + (border test pattern + arrow-key nudge within ±8 px / −8…+2 lines, + persisted in launcher config). +8. Release coordination: core's legacy mode covers old-launcher/new-core; the + launcher's existing fb-geometry validation covers new-launcher/old-core + (writer disables itself, core shows noise — obvious, not subtle breakage). + +### Phase B — PAL 288p50 + +9. Timing mode 2: H total 432 (352/11/32/37), V total 312 (288/3/3/18). + Same 6.75 MHz clock; line rate exactly 15 625 Hz. +10. Reader: 288-line frame, same stride; fits existing buffer slots + (352×288×4 = 396 KB < 1.35 MB slot). +11. Launcher: a "video standard: NTSC / PAL" user setting → renders 352x288 + and publishes mode 2. PAL sets that accept 60 Hz RGB ("PAL-60", most of + them via SCART) can simply stay on mode 0; mode 2 is for strict-50 Hz + sets and correct-speed feel in PAL regions. + +### Phase C — 480i60 (after A/B verified on real CRTs) + +12. Timing mode 1: ce_pix ÷2 (13.5 MHz); H 858 total (720/19/62/57); 525-line + dual-field vertical counter; **half-line (429-clock) vsync offset on the + odd field**; field bit out → `VGA_F1` (replace the hardwired 0 in + `menu.sv`). +13. Reader: source line = `vcount*2 + field`; 720 px = 360 DDR words/line + exceeds the 8-bit burst counter, so fetch each line as **2×180-beat + bursts**; FIFO sizing: 360-word lines × 2-line preload = 720 words → + deepen FIFO to 1024. +14. Launcher: 720x480 rendering path; per-screen mode selection (e.g. launcher + UI in 240p, text-heavy screens in 480i); flicker styling per section 6. + +### Explicitly out of scope / rejected + +- **Using MiSTer's scaler framebuffer instead** — already rejected by the + project (`native-core-poc.md`): the whole point is core-owned, low-latency, + exact 15 kHz output. +- **31 kHz / 480p output** for VGA PC monitors — different audience; the + framework's `vga_scaler=1` path already serves it. +- **Changing the pixel clock to stretch 320 px across 52.7 µs** (the literal + Switchres approach, ~6.1 MHz dot clock) — works, but leaves the 27 MHz + family for no benefit; widening the framebuffer is cleaner on every axis. + +--- + +## 8. Verification + +1. **Simulation first** (no Quartus needed): a small testbench on + `native_video_timing` that measures, in µs/lines against section 4's table: + line period, sync width, sync→active delay, active width, frame period — + and for 480i: field alternation, the half-line vsync offset, and total + 525 lines/frame. This is cheap and catches every off-by-one that matters. +2. **CI build** (existing GitHub Actions Quartus workflow) for timing closure + and resource sanity. +3. **Hardware checklist** (per phase): + - Launcher renders a cross-hatch + border test pattern (240p-test-suite + style: 1-px frame at the extreme edge, safe-area rectangles at 90%/80%). + - Verify fill/centering on **at least 2–3 different CRTs** plus a capture + device (OSSC/RetroTINK profile or capture card reporting measured line + rate — should read 15.734 kHz exactly after the PLL fix). + - Legacy-compat check: old launcher against new core → centered 320x240 + with side bars. + - Writer-stop check: kill the launcher → noise pattern returns. + - Trim check: launcher calibration screen nudges the picture live in both + axes; values survive a launcher restart; out-of-range word1 values + saturate without disturbing sync. + - OSD check: core options reduced to a single page (no Video section); + the OSD overlay itself still renders and is usable in every mode. + - HDMI side still locks (ascal) in every mode. + - 480i: confirm real interlacing (no line pairing) — fine horizontal lines + should shimmer, not stack; capture device should report 480i, not 240p. + +--- + +## 9. References + +- Switchres monitor presets (GroovyMAME): + `github.com/antonioginer/switchres` `monitor.cpp` — `ntsc`, `pal`, + `arcade_15` ranges (porch values in µs/ms). +- SMPTE 170M / standard NTSC line structure: 63.556 µs line, 10.9 µs + blanking, 52.66 µs active, 9.4 µs sync→active. +- CEA-861 720x480i timing: 858/19/62/57 @ 13.5 MHz, 525 lines. +- SMPTE safe areas (SD practice): action safe 90%, title safe 80% + (HD-era ST 2046-1 relaxed these to 93%/90% — use the SD numbers for + consumer CRTs). +- PSX_MiSTer `rtl/gpu_videoout_async.vhd` — half-line vsync offset reference. +- MiSTer framework: `sys/sys_top.v` (`VGA_F1` → ascal `i_fl`; csync), + `sys/vga_out.sv` (analog path is timing-transparent). +- ARM writer: `zaparoo-launcher/src/app/native_video_writer.cpp`, + `zaparoo-launcher/docs/native-core-poc.md`. +- Pixel aspect ratio / SIF background: BT.601 (704x480 → PAR 10:11; 352x240 + inherits it). diff --git a/menu.sv b/menu.sv index 5f90dac9..f6b84ea2 100644 --- a/menu.sv +++ b/menu.sv @@ -185,7 +185,9 @@ assign DDRAM_CLK = clk_sys; assign CE_PIXEL = ce_pix; assign VGA_SL = 0; -assign VGA_F1 = 0; +// Field number for 480i: ascal (HDMI) keys deinterlacing off this, and the +// analog csync path passes it through. 0 in the progressive modes. +assign VGA_F1 = native_field; assign VIDEO_ARX = 0; assign VIDEO_ARY = 0; assign VGA_SCALER= 0; @@ -207,15 +209,12 @@ wire [26:0] act_cnt2 = {~act_cnt[26],act_cnt[25:0]}; assign LED_POWER[0]= FB ? led[2] : act_cnt2[26] ? act_cnt2[25:18] > act_cnt2[7:0] : act_cnt2[25:18] <= act_cnt2[7:0]; -`include "build_id.v" -// Image centering options use signed two's-complement ordering with 0 first, -// so the power-on default (status bits = 0) selects the calibrated base timing. +`include "build_id.v" +// No video options here: native video mode and centering trims arrive via +// the DDR control block written by the launcher (see rtl/native_video_reader.sv). localparam CONF_STR = { "MENU;UART31250,MIDI;", "-;", - "O[13:10],H Offset,0,+2,+4,+6,+8,+10,+12,+14,-16,-14,-12,-10,-8,-6,-4,-2;", - "O[17:14],V Offset,0,+1,+2,+3,+4,+5,+6,+7,-8,-7,-6,-5,-4,-3,-2,-1;", - "-;", "V,v",`BUILD_DATE }; @@ -348,8 +347,9 @@ always @(posedge clk_sys) begin end // DDR clear loop removed: native_video_reader owns DDRAM_* signals. -// When status[9]=0 the reader is held in idle (rd=0, we=0) and DDR is unused; -// when status[9]=1 the reader takes over to fetch the linux-rendered framebuffer. +// The reader polls the launcher's control block once per vblank; until the +// launcher publishes frames it issues a single 64-bit read per frame and the +// core shows the noise pattern. //////////////////////////// MT32pi ////////////////////////////////// @@ -460,27 +460,28 @@ end localparam lfsr_n = 63; -wire PAL = status[4]; wire FB = status[5]; wire [2:0] led = status[8:6]; -// Pixel clock: CLK_VIDEO = 27.027 MHz; ce_pix /4 = ~6.756 MHz, which gives -// an NTSC-spec 15.734 kHz line rate when fed into native_video_timing -// (H_TOTAL=429). Both the cosine fallback and the FB reader use this ce_pix. +// Pixel clock: CLK_VIDEO = 27.000 MHz (the universal SD video clock). +// ce_pix /4 = 6.75 MHz gives exactly 15734.27 Hz (NTSC, 429-px line) and +// 15625.00 Hz (PAL, 432-px line); the 480i mode runs /2 = 13.5 MHz with an +// 858-px line for the same 15734.27 Hz. Both the cosine fallback and the FB +// reader use this ce_pix. +wire [1:0] native_mode; reg [1:0] ce_div; reg ce_pix; always @(posedge CLK_VIDEO) begin if (RESET) ce_div <= 2'd0; else ce_div <= ce_div + 2'd1; - ce_pix <= (ce_div == 2'd0); + ce_pix <= (native_mode == 2'd1) ? ce_div[0] : (ce_div == 2'd0); end // Native video timing + DDR reader. Timing outputs (sync, DE, vcount, frame // edge) are the SINGLE source of truth for VGA scanout in both modes — that's -// what guarantees the CRT sees a clean 15.734 kHz line rate whether we're -// painting cosine noise or reading a Linux-rendered framebuffer. -wire mode_zaparoo = status[9]; - +// what guarantees the CRT sees a clean 15 kHz line rate whether we're +// painting cosine noise or reading a Linux-rendered framebuffer. Mode and +// centering trims come from the launcher's DDR control block, not the OSD. wire [7:0] native_r; wire [7:0] native_g; wire [7:0] native_b; @@ -489,6 +490,7 @@ wire native_vs; wire native_de; wire [8:0] native_vcount; wire native_new_frame; +wire native_field; wire native_active; native_video_top native_video @@ -518,17 +520,13 @@ native_video_top native_video .vga_vblank (), .vga_vcount (native_vcount), .vga_new_frame (native_new_frame), - .enable (mode_zaparoo), - .active (native_active), - - // H offset is a 4-bit signed OSD value doubled into 2-pixel steps. - // V offset is a normal 4-bit signed OSD value in 1-line steps. - .h_offset ($signed({status[13:10], 1'b0})), - .v_offset ($signed(status[17:14])) + .vga_mode (native_mode), + .vga_field (native_field), + .active (native_active) ); -// Cosine + LFSR fallback noise pattern, painted into the 320x240 active area -// of the shared native timing. vvc steps once per frame; the LFSR walks every +// Cosine + LFSR fallback noise pattern, painted into the active area of the +// shared native timing (352x240 when no launcher is publishing frames). vvc steps once per frame; the LFSR walks every // pixel; cos LUT is indexed by vvc + vcount so the pattern shifts vertically // over time. Outside the active area we drive black to keep sync clean. reg [9:0] vvc; @@ -550,11 +548,12 @@ cos cos(vvc + {native_vcount, 2'b00}, cos_out); wire [7:0] comp_v = (cos_g >= rnd_c) ? {cos_g - rnd_c, 2'b00} : 8'd0; -// Mode A (default): cosine pattern paints into the native active area. -// Mode B (status[9]=1, frame ready): DDR-read RGB replaces the cosine pattern. +// Default: cosine pattern paints into the native active area. Once the +// launcher publishes frames (valid control block, advancing counter), the +// DDR-read RGB replaces the cosine pattern; it reverts when the writer stops. // Sync/DE come from the same native timing in both cases — the CRT sees one -// continuous, NTSC-spec signal regardless of which RGB source is selected. -wire use_native = mode_zaparoo & native_active; +// continuous, broadcast-spec signal regardless of which RGB source is selected. +wire use_native = native_active; assign VGA_DE = native_de; assign VGA_HS = native_hs; diff --git a/rtl/native_video_reader.sv b/rtl/native_video_reader.sv index 19310280..07055b20 100644 --- a/rtl/native_video_reader.sv +++ b/rtl/native_video_reader.sv @@ -1,8 +1,21 @@ // Zaparoo native video DDR reader. -// DDR contract: -// 0x3A000000: control word, (frame_counter << 2) | active_buffer -// 0x3A000100: buffer 0, 320x240 RGBX8888 -// 0x3A04B100: buffer 1, 320x240 RGBX8888 +// +// DDR contract v2 (one 64-bit beat at 0x3A000000, read each vblank): +// word0 [31:0]: (frame_counter << 2) | active_buffer; 0 = writer stopped +// word1 [63:32]: [31:16] magic 0x5A50 ("ZP") +// [15:8] h_offset, signed pixels (+ = right) +// [7:4] v_offset, signed lines (+ = down) +// [3:0] mode: 0 = 352x240p60, 1 = 720x480i60, 2 = 352x288p50 +// 0x3A001000: buffer 0 0x3A180000: buffer 1 (tight stride, width*4 B) +// +// Legacy contract (word1 magic absent): 320x240 buffers at 0x3A000100 / +// 0x3A04B100; the picture is scanned centered in the 352-px active area +// with 16-px black bars each side, offsets 0, mode 0. +// +// In 480i the app publishes one progressive 720x480 frame; this reader +// fetches source line vcount*2 + field, so no field-splitting on the ARM +// side. 720 px = 360 words exceeds the 8-bit burst counter, so 480i lines +// are fetched as two 180-beat bursts. module native_video_reader ( @@ -24,12 +37,18 @@ module native_video_reader input wire vblank, input wire new_frame, input wire new_line, - input wire [8:0] vcount, + input wire field, + input wire [9:0] hcount, + + // Quasi-static, ddr_clk domain: caller synchronizes into the video + // domain; the timing module latches them at the field wrap. + output reg [1:0] mode_out, + output reg signed [7:0] h_offset_out, + output reg signed [3:0] v_offset_out, output reg [7:0] r_out, output reg [7:0] g_out, output reg [7:0] b_out, - input wire enable, output wire frame_ready ); @@ -38,19 +57,16 @@ assign ddr_be = 8'hFF; assign ddr_we = 1'b0; localparam [28:0] CTRL_ADDR = 29'h07400000; -localparam [28:0] BUF0_ADDR = 29'h07400020; -localparam [28:0] BUF1_ADDR = 29'h07409620; -localparam [7:0] LINE_BURST = 8'd160; -localparam [28:0] LINE_STRIDE = 29'd160; -localparam [8:0] V_ACTIVE = 9'd240; +localparam [28:0] BUF0_LEGACY = 29'h07400020; +localparam [28:0] BUF1_LEGACY = 29'h07409620; +localparam [28:0] BUF0_V2 = 29'h07400200; +localparam [28:0] BUF1_V2 = 29'h07430000; +localparam [15:0] MAGIC_V2 = 16'h5A50; localparam [19:0] TIMEOUT_MAX = 20'hF_FFFF; -reg [1:0] enable_sync; -always @(posedge ddr_clk) begin - if(reset) enable_sync <= 2'b0; - else enable_sync <= {enable_sync[0], enable}; -end -wire enable_ddr = enable_sync[1]; +// Legacy 320-px picture centered in the 352-px active area. +localparam [9:0] LEGACY_BAR_L = 10'd16; +localparam [9:0] LEGACY_BAR_R = 10'd336; reg [1:0] new_frame_sync; always @(posedge ddr_clk) begin @@ -73,6 +89,15 @@ always @(posedge ddr_clk) begin end wire vblank_ddr = vblank_sync[1]; +// Field is stable for a whole field; the reader samples it only while +// scanning, long after the edge. +reg [1:0] field_sync; +always @(posedge ddr_clk) begin + if(reset) field_sync <= 2'b0; + else field_sync <= {field_sync[0], field}; +end +wire field_ddr = field_sync[1]; + reg [1:0] reset_vid_sync; always @(posedge clk_vid or posedge reset) begin if(reset) reset_vid_sync <= 2'b11; @@ -89,6 +114,14 @@ end wire frame_ready_vid = frame_ready_sync[1]; assign frame_ready = frame_ready_vid; +reg legacy_mode; +reg [1:0] legacy_sync; +always @(posedge clk_vid) begin + if(reset_vid) legacy_sync <= 2'b0; + else legacy_sync <= {legacy_sync[0], legacy_mode}; +end +wire legacy_vid = legacy_sync[1]; + localparam [3:0] ST_IDLE = 4'd0; localparam [3:0] ST_POLL_CTRL = 4'd1; localparam [3:0] ST_WAIT_CTRL = 4'd2; @@ -100,10 +133,12 @@ localparam [3:0] ST_WAIT_DISPLAY = 4'd7; reg [3:0] state; reg [31:0] ctrl_word; +reg [31:0] ctrl_word1; reg [29:0] prev_frame_counter; reg [28:0] buf_base_addr; reg [8:0] cur_line; reg [7:0] beat_count; +reg burst_idx; reg first_frame_loaded; reg preloading; reg [19:0] timeout_cnt; @@ -111,6 +146,21 @@ reg fifo_wr; reg [63:0] fifo_wr_data; wire fifo_full; +// Per-frame fetch geometry, registered in ST_CHECK_CTRL from the parsed +// control block: line length in 64-bit words, line count, interlace flag. +reg [8:0] line_words; +reg [8:0] scan_lines; +reg scan_interlaced; +reg two_bursts; + +wire magic_ok = (ctrl_word1[31:16] == MAGIC_V2); +wire [1:0] ctrl_mode = (ctrl_word1[3:0] > 4'd2) ? 2'd0 : ctrl_word1[1:0]; + +// 480i: source line = displayed line * 2 + field, from one progressive frame. +wire [8:0] src_line = scan_interlaced ? ({cur_line[7:0], 1'b0} + {8'd0, field_ddr}) : cur_line; +wire [28:0] line_base = buf_base_addr + src_line * line_words; +wire [7:0] burst_len = two_bursts ? 8'd180 : line_words[7:0]; + reg [3:0] fifo_aclr_cnt; wire fifo_aclr_ddr_active = (fifo_aclr_cnt != 4'd0); wire fifo_aclr = reset | fifo_aclr_ddr_active; @@ -122,10 +172,12 @@ always @(posedge ddr_clk) begin ddr_burstcnt <= 8'd1; ddr_addr <= 29'd0; ctrl_word <= 32'd0; + ctrl_word1 <= 32'd0; prev_frame_counter <= 30'd0; - buf_base_addr <= BUF0_ADDR; + buf_base_addr <= BUF0_LEGACY; cur_line <= 9'd0; beat_count <= 8'd0; + burst_idx <= 1'b0; first_frame_loaded <= 1'b0; frame_ready_reg <= 1'b0; preloading <= 1'b0; @@ -133,6 +185,14 @@ always @(posedge ddr_clk) begin fifo_wr <= 1'b0; fifo_wr_data <= 64'd0; fifo_aclr_cnt <= 4'd0; + legacy_mode <= 1'b0; + mode_out <= 2'd0; + h_offset_out <= 8'sd0; + v_offset_out <= 4'sd0; + line_words <= 9'd160; + scan_lines <= 9'd240; + scan_interlaced <= 1'b0; + two_bursts <= 1'b0; end else begin fifo_wr <= 1'b0; @@ -148,7 +208,7 @@ always @(posedge ddr_clk) begin case(state) ST_IDLE: begin - if(enable_ddr && new_frame_ddr) state <= ST_POLL_CTRL; + if(new_frame_ddr) state <= ST_POLL_CTRL; end ST_POLL_CTRL: begin @@ -164,38 +224,71 @@ always @(posedge ddr_clk) begin ST_WAIT_CTRL: begin if(ddr_dout_ready) begin ctrl_word <= ddr_dout[31:0]; + ctrl_word1 <= ddr_dout[63:32]; timeout_cnt <= 20'd0; state <= ST_CHECK_CTRL; end - else if(timeout_cnt == TIMEOUT_MAX) state <= ST_IDLE; + else if(timeout_cnt == TIMEOUT_MAX) begin + // Stale frame_ready would scan a dead buffer forever; + // drop back to the noise pattern instead. + frame_ready_reg <= 1'b0; + first_frame_loaded <= 1'b0; + state <= ST_IDLE; + end else timeout_cnt <= timeout_cnt + 20'd1; end ST_CHECK_CTRL: begin - if(ctrl_word[31:2] != prev_frame_counter) begin - prev_frame_counter <= ctrl_word[31:2]; - buf_base_addr <= ctrl_word[0] ? BUF1_ADDR : BUF0_ADDR; - cur_line <= 9'd0; - preloading <= 1'b1; - fifo_aclr_cnt <= 4'd8; - if(first_frame_loaded) frame_ready_reg <= 1'b1; - state <= ST_READ_LINE; - end - else if(first_frame_loaded) begin - cur_line <= 9'd0; - preloading <= 1'b1; - fifo_aclr_cnt <= 4'd8; - state <= ST_READ_LINE; + if(ctrl_word == 32'd0) begin + // Writer stopped (or never started): revert to the noise + // pattern and forget the previous session. + frame_ready_reg <= 1'b0; + first_frame_loaded <= 1'b0; + prev_frame_counter <= 30'd0; + legacy_mode <= 1'b0; + mode_out <= 2'd0; + h_offset_out <= 8'sd0; + v_offset_out <= 4'sd0; + state <= ST_IDLE; end else begin - state <= ST_IDLE; + legacy_mode <= ~magic_ok; + mode_out <= magic_ok ? ctrl_mode : 2'd0; + h_offset_out <= magic_ok ? $signed(ctrl_word1[15:8]) : 8'sd0; + v_offset_out <= magic_ok ? $signed(ctrl_word1[7:4]) : 4'sd0; + line_words <= magic_ok ? ((ctrl_mode == 2'd1) ? 9'd360 : 9'd176) : 9'd160; + scan_lines <= (magic_ok && ctrl_mode == 2'd2) ? 9'd288 : 9'd240; + scan_interlaced <= magic_ok && (ctrl_mode == 2'd1); + two_bursts <= magic_ok && (ctrl_mode == 2'd1); + + if(ctrl_word[31:2] != prev_frame_counter) begin + prev_frame_counter <= ctrl_word[31:2]; + buf_base_addr <= ctrl_word[0] ? (magic_ok ? BUF1_V2 : BUF1_LEGACY) + : (magic_ok ? BUF0_V2 : BUF0_LEGACY); + cur_line <= 9'd0; + burst_idx <= 1'b0; + preloading <= 1'b1; + fifo_aclr_cnt <= 4'd8; + if(first_frame_loaded) frame_ready_reg <= 1'b1; + state <= ST_READ_LINE; + end + else if(first_frame_loaded) begin + cur_line <= 9'd0; + burst_idx <= 1'b0; + preloading <= 1'b1; + fifo_aclr_cnt <= 4'd8; + state <= ST_READ_LINE; + end + else begin + state <= ST_IDLE; + end end end ST_READ_LINE: begin if(!ddr_busy && !fifo_aclr_ddr_active) begin - ddr_addr <= buf_base_addr + (cur_line * LINE_STRIDE); - ddr_burstcnt <= LINE_BURST; + ddr_addr <= line_base + (burst_idx ? 29'd180 : 29'd0); + ddr_burstcnt <= burst_len; ddr_rd <= 1'b1; beat_count <= 8'd0; timeout_cnt <= 20'd0; @@ -204,14 +297,27 @@ always @(posedge ddr_clk) begin end ST_WAIT_LINE: begin - if(beat_count == LINE_BURST) state <= ST_LINE_DONE; - else if(timeout_cnt == TIMEOUT_MAX) state <= ST_IDLE; + if(beat_count == burst_len) begin + if(two_bursts && !burst_idx) begin + burst_idx <= 1'b1; + state <= ST_READ_LINE; + end + else begin + burst_idx <= 1'b0; + state <= ST_LINE_DONE; + end + end + else if(timeout_cnt == TIMEOUT_MAX) begin + frame_ready_reg <= 1'b0; + first_frame_loaded <= 1'b0; + state <= ST_IDLE; + end else if(!ddr_dout_ready) timeout_cnt <= timeout_cnt + 20'd1; end ST_LINE_DONE: begin cur_line <= cur_line + 9'd1; - if(cur_line == V_ACTIVE - 9'd1) begin + if(cur_line == scan_lines - 9'd1) begin first_frame_loaded <= 1'b1; frame_ready_reg <= 1'b1; preloading <= 1'b0; @@ -227,7 +333,7 @@ always @(posedge ddr_clk) begin end ST_WAIT_DISPLAY: begin - if(cur_line < V_ACTIVE && new_line_ddr && !vblank_ddr) state <= ST_READ_LINE; + if(cur_line < scan_lines && new_line_ddr && !vblank_ddr) state <= ST_READ_LINE; end default: state <= ST_IDLE; @@ -239,13 +345,15 @@ wire [63:0] fifo_rd_data; wire fifo_empty; reg fifo_rd; +// 1024 words: 480i preloads 2 x 360-word lines (720 words peak); the +// progressive modes peak around 368 words with their 176-word lines. dcfifo #( .intended_device_family ("Cyclone V"), - .lpm_numwords (512), + .lpm_numwords (1024), .lpm_showahead ("ON"), .lpm_type ("dcfifo"), .lpm_width (64), - .lpm_widthu (9), + .lpm_widthu (10), .overflow_checking ("ON"), .rdsync_delaypipe (4), .underflow_checking ("ON"), @@ -275,6 +383,10 @@ reg pixel_word_valid; wire [31:0] pixel_low = pixel_word[31:0]; wire [31:0] pixel_high_word = pixel_word[63:32]; +// Legacy frames are 320 px wide inside the 352-px active area: black bars +// for the first/last 16 px, FIFO pixels in between. +wire fetch_active = de && (!legacy_vid || (hcount >= LEGACY_BAR_L && hcount < LEGACY_BAR_R)); + task automatic output_pixel; input [31:0] pixel; begin @@ -300,7 +412,7 @@ always @(posedge clk_vid) begin fifo_rd <= 1'b0; if(ce_pix) begin - if(de && frame_ready_vid) begin + if(fetch_active && frame_ready_vid) begin if(pixel_word_valid) begin if(pixel_high) begin output_pixel(pixel_high_word); @@ -325,6 +437,12 @@ always @(posedge clk_vid) begin b_out <= 8'd0; end end + else if(de) begin + // Legacy side bars: keep the partially consumed word. + r_out <= 8'd0; + g_out <= 8'd0; + b_out <= 8'd0; + end else begin r_out <= 8'd0; g_out <= 8'd0; diff --git a/rtl/native_video_timing.sv b/rtl/native_video_timing.sv index 02ea58da..0d442cde 100644 --- a/rtl/native_video_timing.sv +++ b/rtl/native_video_timing.sv @@ -1,6 +1,15 @@ -// Zaparoo native video timing: 320x240 at 15.734 kHz from 27 MHz / 4. -// h_offset/v_offset shift the image by repartitioning front porch and back -// porch. H_TOTAL/V_TOTAL are invariant, so line/frame rates stay unchanged. +// Zaparoo native video timing: standard-definition CRT modes from 27 MHz. +// +// mode 0: 352x240p60 (NTSC) ce_pix = 27/4 = 6.75 MHz, 429x262, 15734.27 Hz +// mode 1: 720x480i60 (CEA-861) ce_pix = 27/2 = 13.5 MHz, 858x525, 15734.27 Hz +// mode 2: 352x288p50 (PAL) ce_pix = 27/4 = 6.75 MHz, 432x312, 15625.00 Hz +// +// mode_in/h_offset_in/v_offset_in are quasi-static (two-flop synchronized by +// the caller) and are latched here at the field wrap so a mid-frame update +// can't corrupt sync. Offsets shift the image by repartitioning front/back +// porch; totals are invariant, so line/frame rates never move. Out-of-range +// offsets are clamped to the supported -8..+8 px / -8..+2 line window, which +// keeps every mode's effective porches at or above 2 px / 1 line. module native_video_timing ( @@ -8,36 +17,69 @@ module native_video_timing input wire ce_pix, input wire reset, - // Image centering: positive = shift right/down (FP shrinks, BP grows). - input wire signed [4:0] h_offset, // -16..+14 pixels (budget H_FP=26 / H_BP=51) - input wire signed [3:0] v_offset, // -8..+7 lines (budget V_FP=8 / V_BP=11) + input wire [1:0] mode_in, + input wire signed [7:0] h_offset_in, // + = right, honored -8..+8 px + input wire signed [3:0] v_offset_in, // + = down, honored -8..+2 lines + output reg [1:0] mode, // latched active mode; selects the ce_pix divider + output reg field, // 480i field number, 0 in progressive modes output reg hsync, output reg vsync, output reg hblank, output reg vblank, output reg de, output reg [9:0] hcount, - output reg [8:0] vcount, + output reg [8:0] vcount, // line within the current field output reg new_frame, output reg new_line ); -localparam [9:0] H_ACTIVE = 10'd320; -// 26/32/51 keeps total blanking fixed while moving the default image 12 px -// left from the earlier CRT-specific 14/32/63 porch split. -localparam [9:0] H_FP = 10'd26; -localparam [5:0] H_SYNC = 6'd32; -localparam [9:0] H_BP = 10'd51; -localparam [9:0] H_TOTAL = 10'd429; - -// V blanking rebalanced from 6/3/13 to 8/3/11 to give symmetric ±8 budget -// while preserving V_TOTAL=262 (and thus 59.94 Hz refresh). -localparam [8:0] V_ACTIVE = 9'd240; -localparam [8:0] V_FP = 9'd8; -localparam [4:0] V_SYNC = 5'd3; -localparam [8:0] V_BP = 9'd11; -localparam [8:0] V_TOTAL = 9'd262; +localparam [1:0] MODE_NTSC = 2'd0; +localparam [1:0] MODE_480I = 2'd1; +localparam [1:0] MODE_PAL = 2'd2; + +// Per-mode parameter sets (Switchres ntsc/pal presets, CEA-861 for 480i). +// 480i: 525-line frame as two fields of 262 (field 0) and 263 (field 1) +// lines; field 1 additionally asserts vsync half a line late (see below). +reg [9:0] H_ACTIVE, H_FP, H_BP, H_TOTAL; +reg [6:0] H_SYNC; +reg [8:0] V_ACTIVE, V_FP, V_BP, V_TOTAL; +reg [4:0] V_SYNC; + +always @* begin + case(mode) + MODE_480I: begin + H_ACTIVE = 10'd720; H_FP = 10'd19; H_SYNC = 7'd62; H_BP = 10'd57; H_TOTAL = 10'd858; + V_ACTIVE = 9'd240; V_FP = 9'd4; V_SYNC = 5'd3; + V_BP = field ? 9'd16 : 9'd15; + V_TOTAL = field ? 9'd263 : 9'd262; + end + MODE_PAL: begin + H_ACTIVE = 10'd352; H_FP = 10'd11; H_SYNC = 7'd32; H_BP = 10'd37; H_TOTAL = 10'd432; + V_ACTIVE = 9'd288; V_FP = 9'd3; V_SYNC = 5'd3; V_BP = 9'd18; V_TOTAL = 9'd312; + end + default: begin // MODE_NTSC + H_ACTIVE = 10'd352; H_FP = 10'd12; H_SYNC = 7'd32; H_BP = 10'd33; H_TOTAL = 10'd429; + V_ACTIVE = 9'd240; V_FP = 9'd3; V_SYNC = 5'd3; V_BP = 9'd16; V_TOTAL = 9'd262; + end + endcase +end + +wire [1:0] next_mode = (mode_in == 2'd3) ? MODE_NTSC : mode_in; + +function automatic signed [4:0] clamp_h(input signed [7:0] v); + if (v > 8'sd8) clamp_h = 5'sd8; + else if (v < -8'sd8) clamp_h = -5'sd8; + else clamp_h = v[4:0]; +endfunction + +function automatic signed [3:0] clamp_v(input signed [3:0] v); + if (v > 4'sd2) clamp_v = 4'sd2; + else clamp_v = v; +endfunction + +reg signed [4:0] h_offset; +reg signed [3:0] v_offset; // Sync starts shift with the offset; two's-complement subtraction in // unsigned arithmetic yields the correct result at both ends of the range. @@ -46,8 +88,25 @@ wire [9:0] H_SYNC_END = H_SYNC_START + H_SYNC; wire [8:0] V_SYNC_START = V_ACTIVE + (V_FP - {{5{v_offset[3]}}, v_offset}); wire [8:0] V_SYNC_END = V_SYNC_START + V_SYNC; +// In 480i the odd field's vsync transitions half a scanline (H_TOTAL/2 +// ce_pix clocks) after the line boundary, interleaving its scanlines +// between the even field's. Without this both fields land on the same +// scanlines (line pairing). vs_step fires once per line at the point a +// vsync edge may occur; vs_line is the line whose start (field 0) or +// midpoint (field 1) that edge aligns to. +wire vs_step = field ? (hcount == (H_TOTAL >> 1) - 10'd1) + : (hcount == H_TOTAL - 10'd1); +wire [8:0] vs_line = field ? vcount : (vcount + 9'd1); + +wire line_wrap = (hcount == H_TOTAL - 10'd1); +wire field_wrap = line_wrap && (vcount == V_TOTAL - 9'd1); + always @(posedge clk) begin if(reset) begin + mode <= MODE_NTSC; + field <= 1'b0; + h_offset <= 5'sd0; + v_offset <= 4'sd0; hcount <= 10'd0; vcount <= 9'd0; hsync <= 1'b0; @@ -65,7 +124,7 @@ always @(posedge clk) begin new_frame <= 1'b0; new_line <= 1'b0; - if(hcount == H_TOTAL - 10'd1) begin + if(line_wrap) begin hcount <= 10'd0; if(vcount == V_TOTAL - 9'd1) vcount <= 9'd0; else vcount <= vcount + 9'd1; @@ -74,29 +133,49 @@ always @(posedge clk) begin hcount <= hcount + 10'd1; end + // Mode and trims apply only at the field wrap, with counters at + // zero, so every line of a field is cut from one parameter set. + if(field_wrap) begin + mode <= next_mode; + if(next_mode != MODE_480I) field <= 1'b0; + h_offset <= clamp_h(h_offset_in); + v_offset <= clamp_v(v_offset_in); + end + if(hcount == H_ACTIVE - 10'd1) hblank <= 1'b1; - else if(hcount == H_TOTAL - 10'd1) hblank <= 1'b0; + else if(line_wrap) hblank <= 1'b0; if(hcount == H_SYNC_START - 10'd1) hsync <= 1'b1; else if(hcount == H_SYNC_END - 10'd1) hsync <= 1'b0; - if(hcount == H_TOTAL - 10'd1) begin + if(vs_step) begin + if(vs_line == V_SYNC_START) vsync <= 1'b1; + else if(vs_line == V_SYNC_END) vsync <= 1'b0; + end + + if(line_wrap) begin if(vcount == V_ACTIVE - 9'd1) vblank <= 1'b1; else if(vcount == V_TOTAL - 9'd1) vblank <= 1'b0; - - if(vcount == V_SYNC_START - 9'd1) vsync <= 1'b1; - else if(vcount == V_SYNC_END - 9'd1) vsync <= 1'b0; end if(hcount == H_ACTIVE - 10'd1) new_line <= 1'b1; - if(hcount == H_TOTAL - 10'd1 && vcount == V_ACTIVE - 9'd1) new_frame <= 1'b1; + // Field flips at the START of vblank, not the field wrap: the + // reader preloads the next field's first lines right after + // new_frame, so the field it reads must already be the one about + // to be displayed. The vsync inside this blanking interval then + // uses the new field's phase, which keeps the half-line + // alternation intact (intervals stay exactly 262.5 lines). + if(line_wrap && vcount == V_ACTIVE - 9'd1) begin + new_frame <= 1'b1; + field <= (mode == MODE_480I) ? ~field : 1'b0; + end next_hblank = hblank; if(hcount == H_ACTIVE - 10'd1) next_hblank = 1'b1; - else if(hcount == H_TOTAL - 10'd1) next_hblank = 1'b0; + else if(line_wrap) next_hblank = 1'b0; next_vblank = vblank; - if(hcount == H_TOTAL - 10'd1) begin + if(line_wrap) begin if(vcount == V_ACTIVE - 9'd1) next_vblank = 1'b1; else if(vcount == V_TOTAL - 9'd1) next_vblank = 1'b0; end diff --git a/rtl/native_video_top.sv b/rtl/native_video_top.sv index 36cbedb3..0de69a9b 100644 --- a/rtl/native_video_top.sv +++ b/rtl/native_video_top.sv @@ -1,4 +1,9 @@ // Zaparoo native video wrapper: timing + RGBX8888 DDR reader. +// +// Mode and centering trims arrive from the launcher through the DDR control +// block (parsed by the reader in the ddr_clk domain). They are quasi-static: +// two-flop synchronized here into the video clock, then latched by the +// timing module at the field wrap. module native_video_top ( @@ -28,12 +33,9 @@ module native_video_top output wire [8:0] vga_vcount, output wire vga_new_frame, - input wire enable, - output wire active, - - // OSD image centering: 0 = default porch split. - input wire signed [4:0] h_offset, - input wire signed [3:0] v_offset + output wire [1:0] vga_mode, // active timing mode; selects ce_pix divider + output wire vga_field, // 480i field number (VGA_F1) + output wire active ); wire tim_hs; @@ -41,26 +43,44 @@ wire tim_vs; wire tim_hblank; wire tim_vblank; wire tim_de; +wire [9:0] tim_hcount; wire [8:0] tim_vcount; wire tim_new_frame; wire tim_new_line; +wire tim_field; + +wire [1:0] rd_mode; +wire signed [7:0] rd_h_offset; +wire signed [3:0] rd_v_offset; + +reg [1:0] mode_sync [1:0]; +reg [7:0] h_offset_sync [1:0]; +reg [3:0] v_offset_sync [1:0]; +always @(posedge clk_vid) begin + mode_sync[0] <= rd_mode; mode_sync[1] <= mode_sync[0]; + h_offset_sync[0] <= rd_h_offset; h_offset_sync[1] <= h_offset_sync[0]; + v_offset_sync[0] <= rd_v_offset; v_offset_sync[1] <= v_offset_sync[0]; +end native_video_timing timing ( - .clk (clk_vid), - .ce_pix (ce_pix), - .reset (reset), - .h_offset (h_offset), - .v_offset (v_offset), - .hsync (tim_hs), - .vsync (tim_vs), - .hblank (tim_hblank), - .vblank (tim_vblank), - .de (tim_de), - .hcount (), - .vcount (tim_vcount), - .new_frame (tim_new_frame), - .new_line (tim_new_line) + .clk (clk_vid), + .ce_pix (ce_pix), + .reset (reset), + .mode_in (mode_sync[1]), + .h_offset_in ($signed(h_offset_sync[1])), + .v_offset_in ($signed(v_offset_sync[1])), + .mode (vga_mode), + .field (tim_field), + .hsync (tim_hs), + .vsync (tim_vs), + .hblank (tim_hblank), + .vblank (tim_vblank), + .de (tim_de), + .hcount (tim_hcount), + .vcount (tim_vcount), + .new_frame (tim_new_frame), + .new_line (tim_new_line) ); wire frame_ready; @@ -85,12 +105,16 @@ native_video_reader reader .vblank (tim_vblank), .new_frame (tim_new_frame), .new_line (tim_new_line), - .vcount (tim_vcount), + .field (tim_field), + .hcount (tim_hcount), + + .mode_out (rd_mode), + .h_offset_out (rd_h_offset), + .v_offset_out (rd_v_offset), .r_out (vga_r), .g_out (vga_g), .b_out (vga_b), - .enable (enable), .frame_ready (frame_ready) ); @@ -101,6 +125,7 @@ assign vga_hblank = tim_hblank; assign vga_vblank = tim_vblank; assign vga_vcount = tim_vcount; assign vga_new_frame = tim_new_frame; -assign active = enable & frame_ready; +assign vga_field = tim_field; +assign active = frame_ready; endmodule diff --git a/rtl/pll/pll_0002.v b/rtl/pll/pll_0002.v index 4c7ed140..833cc2b9 100644 --- a/rtl/pll/pll_0002.v +++ b/rtl/pll/pll_0002.v @@ -25,7 +25,7 @@ module pll_0002( .output_clock_frequency0("100.000000 MHz"), .phase_shift0("0 ps"), .duty_cycle0(50), - .output_clock_frequency1("27027027 Hz"), + .output_clock_frequency1("27.000000 MHz"), .phase_shift1("0 ps"), .duty_cycle1(50), .output_clock_frequency2("0 MHz"), diff --git a/tb/dcfifo_sim.sv b/tb/dcfifo_sim.sv new file mode 100644 index 00000000..47b29b95 --- /dev/null +++ b/tb/dcfifo_sim.sv @@ -0,0 +1,70 @@ +// Behavioral stand-in for the Altera dcfifo megafunction (showahead mode), +// simulation only. No real CDC modeling — pointers are plain integers. + +module dcfifo #( + parameter intended_device_family = "", + parameter lpm_numwords = 1024, + parameter lpm_showahead = "ON", + parameter lpm_type = "dcfifo", + parameter lpm_width = 64, + parameter lpm_widthu = 10, + parameter overflow_checking = "ON", + parameter rdsync_delaypipe = 4, + parameter underflow_checking = "ON", + parameter use_eab = "ON", + parameter wrsync_delaypipe = 4 +)( + input wire aclr, + input wire [lpm_width-1:0] data, + input wire rdclk, + input wire rdreq, + input wire wrclk, + input wire wrreq, + output wire [lpm_width-1:0] q, + output wire rdempty, + output wire wrfull, + output wire [1:0] eccstatus, + output wire rdfull, + output wire [lpm_widthu-1:0] rdusedw, + output wire wrempty, + output wire [lpm_widthu-1:0] wrusedw +); + +reg [lpm_width-1:0] mem [0:lpm_numwords-1]; +integer wptr = 0, rptr = 0; +integer peak_used = 0; // TB-visible; may be reset hierarchically +integer overflow_count = 0; // writes dropped (overflow_checking semantics) +integer underflow_count = 0; + +wire [31:0] used = wptr - rptr; + +assign q = mem[rptr % lpm_numwords]; +assign rdempty = (used == 0); +assign wrfull = (used >= lpm_numwords); +assign eccstatus = 2'b00; +assign rdfull = wrfull; +assign wrempty = rdempty; +assign rdusedw = used[lpm_widthu-1:0]; +assign wrusedw = used[lpm_widthu-1:0]; + +always @(posedge wrclk or posedge aclr) begin + if (aclr) wptr <= 0; + else if (wrreq) begin + if (wrfull) overflow_count = overflow_count + 1; + else begin + mem[wptr % lpm_numwords] <= data; + wptr <= wptr + 1; + if (wptr + 1 - rptr > peak_used) peak_used = wptr + 1 - rptr; + end + end +end + +always @(posedge rdclk or posedge aclr) begin + if (aclr) rptr <= 0; + else if (rdreq) begin + if (rdempty) underflow_count = underflow_count + 1; + else rptr <= rptr + 1; + end +end + +endmodule diff --git a/tb/native_video_reader_tb.sv b/tb/native_video_reader_tb.sv new file mode 100644 index 00000000..b51faeb4 --- /dev/null +++ b/tb/native_video_reader_tb.sv @@ -0,0 +1,291 @@ +// System-level testbench for native_video_top (timing + reader) against a +// behavioral DDR model and tb/dcfifo_sim.sv. Verifies the v2 DDR contract: +// +// - no writer (word0 == 0): core stays inactive, only control polls issued +// - v2 frames: correct buffer base, 176-word line bursts, mode + h/v +// offsets parsed from word1 and latched by the timing module +// - double buffering: counter change switches buffer base +// - writer stop: word0 -> 0 drops active (frame_ready) again +// - legacy contract (no magic): 160-word lines from the legacy buffers, +// picture centered with 16-px black side bars +// - PAL (mode 2): 288 line fetches +// - 480i (mode 1): two 180-beat bursts per line, source line = 2*line+field, +// alternating per field; FIFO never overflows +// - DDR timeout: unresponsive bus drops active instead of latching stale +// +// Run: tb/run.sh + +`timescale 1ns/1ps + +module native_video_reader_tb; + +reg clk_sys = 0; always #5 clk_sys = ~clk_sys; // 100 MHz DDR-side +reg clk_vid = 0; always #18.5185 clk_vid = ~clk_vid; // 27 MHz +reg reset = 1; + +wire [1:0] vmode; +wire vfield; + +// ce_pix divider mirrors menu.sv. +reg [1:0] ce_div = 0; +reg ce_pix = 0; +always @(posedge clk_vid) begin + if (reset) ce_div <= 2'd0; + else ce_div <= ce_div + 2'd1; + ce_pix <= (vmode == 2'd1) ? ce_div[0] : (ce_div == 2'd0); +end + +// ---- DDR model ------------------------------------------------------------- +localparam [28:0] CTRL_ADDR = 29'h07400000; +localparam [28:0] BUF0_LEGACY = 29'h07400020; +localparam [28:0] BUF1_LEGACY = 29'h07409620; +localparam [28:0] BUF0_V2 = 29'h07400200; +localparam [28:0] BUF1_V2 = 29'h07430000; +localparam [63:0] PIX_DATA = 64'h00FFAA55_00FFAA55; // B,G,R,X = 55,AA,FF,00 + +wire ddr_rd; +wire [28:0] ddr_addr; +wire [7:0] ddr_burstcnt; +reg [63:0] ddr_dout = 0; +reg ddr_dout_ready = 0; + +reg [63:0] ctrl_q = 64'd0; // {word1, word0} as published by the "writer" +reg respond_en = 1; + +integer req_n = 0; +reg [28:0] req_addr [0:199999]; +reg [7:0] req_cnt [0:199999]; + +reg [28:0] cur_addr = 0; +integer cur_left = 0; +integer lat = 0; + +always @(posedge clk_sys) begin + ddr_dout_ready <= 0; + if (ddr_rd) begin + req_addr[req_n] = ddr_addr; + req_cnt[req_n] = ddr_burstcnt; + req_n = req_n + 1; + if (respond_en) begin + cur_addr <= ddr_addr; + cur_left <= ddr_burstcnt; + lat <= 3; + end + end + else if (cur_left != 0) begin + if (lat != 0) lat <= lat - 1; + else begin + ddr_dout <= (cur_addr == CTRL_ADDR) ? ctrl_q : PIX_DATA; + ddr_dout_ready <= 1; + cur_addr <= cur_addr + 29'd1; + cur_left <= cur_left - 1; + end + end +end + +// ---- DUT -------------------------------------------------------------------- +wire [7:0] vga_r, vga_g, vga_b; +wire new_frame, active; + +native_video_top dut +( + .clk_sys (clk_sys), + .clk_vid (clk_vid), + .ce_pix (ce_pix), + .reset (reset), + + .ddr_busy (1'b0), + .ddr_burstcnt (ddr_burstcnt), + .ddr_addr (ddr_addr), + .ddr_dout (ddr_dout), + .ddr_dout_ready (ddr_dout_ready), + .ddr_rd (ddr_rd), + .ddr_din (), + .ddr_be (), + .ddr_we (), + + .vga_r (vga_r), + .vga_g (vga_g), + .vga_b (vga_b), + .vga_hs (), + .vga_vs (), + .vga_de (), + .vga_hblank (), + .vga_vblank (), + .vga_vcount (), + .vga_new_frame (new_frame), + .vga_mode (vmode), + .vga_field (vfield), + .active (active) +); + +// ---- helpers ----------------------------------------------------------------- +integer errors = 0; + +task check(input string name, input integer got, input integer exp); + begin + if (got !== exp) begin + errors = errors + 1; + $display("FAIL %-36s got %0d (0x%0h), expected %0d (0x%0h)", name, got, got, exp, exp); + end + else $display("pass %-36s %0d", name, got); + end +endtask + +task wait_frames(input integer n); + repeat (n) @(posedge new_frame); +endtask + +// Publish a control block: word0 = (counter << 2) | buffer. +task publish(input bit magic, input [3:0] pmode, input signed [7:0] hoff, + input signed [3:0] voff, input integer counter, input bit buffer); + begin + ctrl_q = {magic ? 16'h5A50 : 16'h0000, hoff, voff, pmode, + counter[29:0], 1'b0, buffer}; + end +endtask + +// Verify one whole frame's DDR request sequence: a control poll followed by +// nlines line fetches of bpl bursts of blen beats each, line l fetched from +// base + src*stride + b*blen, where src = l (progressive) or 2*l + field +// (intl set). Also asserts the FIFO never overflowed during the frame. +task check_frame_fetch(input string tag, input [28:0] base, input integer stride, + input integer nlines, input integer blen, input integer bpl, + input bit intl, output reg fld); + integer s, l, b, idx, src; + begin + @(posedge new_frame); + @(negedge clk_vid); + fld = vfield; + s = req_n; + dut.reader.line_fifo.peak_used = 0; + dut.reader.line_fifo.overflow_count = 0; + @(posedge new_frame); + check({tag, " fifo overflow-free"}, dut.reader.line_fifo.overflow_count, 0); + $display("info %s fifo peak occupancy: %0d / 1024 words", tag, dut.reader.line_fifo.peak_used); + check({tag, " ctrl poll addr"}, req_addr[s], CTRL_ADDR); + check({tag, " ctrl poll burst"}, req_cnt[s], 1); + check({tag, " requests/frame"}, req_n >= s + 1 + nlines*bpl, 1); + for (l = 0; l < nlines; l = l + 1) begin + src = intl ? (2*l + fld) : l; + for (b = 0; b < bpl; b = b + 1) begin + idx = s + 1 + l*bpl + b; + if (req_addr[idx] !== base + src*stride + b*blen || req_cnt[idx] !== blen[7:0]) begin + errors = errors + 1; + $display("FAIL %s line %0d burst %0d: got addr 0x%0h cnt %0d, expected addr 0x%0h cnt %0d", + tag, l, b, req_addr[idx], req_cnt[idx], base + src*stride + b*blen, blen); + l = nlines; b = bpl; // bail after first mismatch + end + end + end + $display("pass %s frame fetch sequence (%0d lines x %0d bursts, field %0d)", tag, nlines, bpl, fld); + end +endtask + +// Sample vga_r at a given (hcount, vcount) pixel tick. +task sample_r(input integer hx, input integer vx, output [7:0] r); + begin + @(posedge clk_vid); + while (!(ce_pix && dut.timing.hcount == hx[9:0] && dut.timing.vcount == vx[8:0] && dut.timing.de)) + @(posedge clk_vid); + r = vga_r; + end +endtask + +reg [7:0] rs; +reg fld_a, fld_b; + +// ---- test sequence ------------------------------------------------------------ +initial begin + repeat (20) @(posedge clk_sys); + reset = 0; + + // Phase 0: no writer. + $display("--- phase 0: no writer ---"); + wait_frames(3); + check("idle: active low", {31'd0, active}, 0); + check("idle: only ctrl polls", req_cnt[req_n-1], 1); + check("idle: poll addr", req_addr[req_n-1], CTRL_ADDR); + + // Phase 1: v2 writer, mode 0, offsets +5/-3, buffer 0. + $display("--- phase 1: v2 mode 0 ---"); + publish(1, 4'd0, 8'sd5, -4'sd3, 1, 0); + wait (active === 1'b1); + wait_frames(2); + check("v2: mode latched", vmode, 0); + check("v2: h_offset latched", dut.timing.h_offset, 5); + check("v2: v_offset latched", dut.timing.v_offset, -3); + check_frame_fetch("v2-buf0", BUF0_V2, 176, 240, 176, 1, 0, fld_a); + sample_r(100, 100, rs); check("v2: interior pixel R", rs, 8'hFF); + sample_r(6, 100, rs); check("v2: no left bar", rs, 8'hFF); + + // Phase 2: counter advances with buffer 1. + $display("--- phase 2: double buffer ---"); + publish(1, 4'd0, 8'sd5, -4'sd3, 2, 1); + wait_frames(2); + check_frame_fetch("v2-buf1", BUF1_V2, 176, 240, 176, 1, 0, fld_a); + + // Phase 3: writer stops. + $display("--- phase 3: writer stop ---"); + ctrl_q = 64'd0; + wait_frames(3); + check("stop: active drops", {31'd0, active}, 0); + check("stop: mode reverts", vmode, 0); + + // Phase 4: legacy writer (no magic). + $display("--- phase 4: legacy contract ---"); + publish(0, 4'd0, 8'sd0, 4'sd0, 3, 0); + wait (active === 1'b1); + wait_frames(2); + check("legacy: offsets zero", dut.timing.h_offset, 0); + check_frame_fetch("legacy", BUF0_LEGACY, 160, 240, 160, 1, 0, fld_a); + sample_r(6, 100, rs); check("legacy: left bar black", rs, 8'h00); + sample_r(345, 100, rs); check("legacy: right bar black", rs, 8'h00); + sample_r(100, 100, rs); check("legacy: interior pixel", rs, 8'hFF); + + // Phase 5: PAL. + $display("--- phase 5: v2 mode 2 (PAL) ---"); + publish(1, 4'd2, 8'sd0, 4'sd0, 4, 0); + wait (vmode === 2'd2); + wait_frames(2); + check_frame_fetch("pal", BUF0_V2, 176, 288, 176, 1, 0, fld_a); + + // Phase 6: 480i. + $display("--- phase 6: v2 mode 1 (480i) ---"); + publish(1, 4'd1, 8'sd0, 4'sd0, 5, 0); + wait (vmode === 2'd1); + wait_frames(2); + check_frame_fetch("480i-a", BUF0_V2, 360, 240, 180, 2, 1, fld_a); + wait_frames(1); // realign so the next measured field has opposite parity + check_frame_fetch("480i-b", BUF0_V2, 360, 240, 180, 2, 1, fld_b); + check("480i: both field parities seen", {31'd0, fld_a ^ fld_b}, 1); + + // Phase 7: DDR stops responding mid-session. + $display("--- phase 7: DDR timeout ---"); + publish(1, 4'd0, 8'sd0, 4'sd0, 6, 0); + wait (vmode === 2'd0); + wait_frames(3); + check("pre-timeout: active", {31'd0, active}, 1); + respond_en = 0; + wait (active === 1'b0); + $display("pass timeout: active dropped"); + respond_en = 1; + publish(1, 4'd0, 8'sd0, 4'sd0, 7, 0); + wait (active === 1'b1); + $display("pass timeout: recovered after writer republish"); + + if (errors == 0) $display("ALL CHECKS PASSED"); + else begin + $display("%0d CHECK(S) FAILED", errors); + $fatal(1); + end + $finish; +end + +initial begin + #3_000_000_000; + $display("TIMEOUT"); + $fatal(1); +end + +endmodule diff --git a/tb/native_video_timing_tb.sv b/tb/native_video_timing_tb.sv new file mode 100644 index 00000000..dd174ff6 --- /dev/null +++ b/tb/native_video_timing_tb.sv @@ -0,0 +1,229 @@ +// Self-checking testbench for native_video_timing (docs/native-video-plan.md §8). +// +// Measures, in exact ce_pix ticks, per mode: line period, hsync width, +// sync→active delay, active width, field period, vsync width, active lines +// per field — and for 480i: field alternation and the half-line vsync offset +// (both vsync intervals must be exactly 262.5 lines = 225225 ticks, which is +// impossible without the offset). Also exercises the h/v offset trims and +// their clamping. +// +// Run: tb/run.sh + +`timescale 1ns/1ps + +module native_video_timing_tb; + +reg clk = 0; +always #18.5185 clk = ~clk; // 27 MHz + +reg reset = 1; + +reg [1:0] mode_in = 2'd0; +reg signed [7:0] h_offset_in = 8'sd0; +reg signed [3:0] v_offset_in = 4'sd0; + +wire [1:0] mode; +wire field, hsync, vsync, hblank, vblank, de, new_frame, new_line; +wire [9:0] hcount; +wire [8:0] vcount; + +// ce_pix divider mirrors menu.sv: /4 (6.75 MHz) progressive, /2 (13.5 MHz) 480i. +reg [1:0] ce_div = 0; +reg ce_pix = 0; +always @(posedge clk) begin + if (reset) ce_div <= 2'd0; + else ce_div <= ce_div + 2'd1; + ce_pix <= (mode == 2'd1) ? ce_div[0] : (ce_div == 2'd0); +end + +native_video_timing dut +( + .clk (clk), + .ce_pix (ce_pix), + .reset (reset), + .mode_in (mode_in), + .h_offset_in (h_offset_in), + .v_offset_in (v_offset_in), + .mode (mode), + .field (field), + .hsync (hsync), + .vsync (vsync), + .hblank (hblank), + .vblank (vblank), + .de (de), + .hcount (hcount), + .vcount (vcount), + .new_frame (new_frame), + .new_line (new_line) +); + +// ---- monitor: everything in ce_pix ticks --------------------------------- +// DUT outputs are sampled one tick late, uniformly, so intervals are exact. +integer tick = 0; +reg hs_d = 0, vs_d = 0, de_d = 0; +reg await_de = 0; +reg vs_field = 0, vs_field_d = 0; + +integer hs_rise_tick = 0, vs_rise_tick = 0, de_rise_tick = 0; +integer hs_period = 0, hs_width = 0; +integer de_width = 0, hs_to_de = 0, vs_to_de = 0; +integer vs_period = 0, vs_period_d = 0, vs_width = 0; +integer de_lines = 0, de_lines_last = 0; +integer vs_count = 0; + +always @(posedge clk) begin + if (ce_pix) begin + if (hsync & ~hs_d) begin + hs_period <= tick - hs_rise_tick; + hs_rise_tick <= tick; + end + if (~hsync & hs_d) hs_width <= tick - hs_rise_tick; + + if (de & ~de_d) begin + de_rise_tick <= tick; + hs_to_de <= tick - hs_rise_tick; + de_lines <= de_lines + 1; + if (await_de) begin + vs_to_de <= tick - vs_rise_tick; + await_de <= 0; + end + end + if (~de & de_d) de_width <= tick - de_rise_tick; + + if (vsync & ~vs_d) begin + vs_period_d <= vs_period; + vs_period <= tick - vs_rise_tick; + vs_rise_tick <= tick; + de_lines_last <= de_lines; + de_lines <= 0; + vs_field_d <= vs_field; + vs_field <= field; + await_de <= 1; + vs_count <= vs_count + 1; + end + if (~vsync & vs_d) vs_width <= tick - vs_rise_tick; + + hs_d <= hsync; + vs_d <= vsync; + de_d <= de; + tick <= tick + 1; + end +end + +// ---- helpers --------------------------------------------------------------- +integer errors = 0; + +task check(input string name, input integer got, input integer exp); + begin + if (got !== exp) begin + errors = errors + 1; + $display("FAIL %-32s got %0d, expected %0d", name, got, exp); + end + else $display("pass %-32s %0d", name, got); + end +endtask + +// Wait n vsync rising edges (plenty for the field-wrap latch + a full +// measurable frame after any control change). +task settle(input integer n); + integer target; + begin + target = vs_count + n; + wait (vs_count >= target); + @(posedge clk); + end +endtask + +// ---- test sequence --------------------------------------------------------- +initial begin + repeat (8) @(posedge clk); + reset = 0; + + // ---- mode 0: 352x240p60 NTSC -------------------------------------- + settle(3); + $display("--- mode 0: 352x240p60 (line 63.556us, 15734.27 Hz, 60.05 Hz) ---"); + check("ntsc line period (px)", hs_period, 429); + check("ntsc hsync width (px)", hs_width, 32); + check("ntsc sync->active (px)", hs_to_de, 65); // 32 sync + 33 BP + check("ntsc active width (px)", de_width, 352); + check("ntsc field period (px)", vs_period, 429*262); + check("ntsc vsync width (px)", vs_width, 429*3); + check("ntsc active lines", de_lines_last, 240); + check("ntsc vsync->active (px)", vs_to_de, 429*19); // 3 sync + 16 BP + check("ntsc field flat", {31'd0, vs_field}, 0); + + // ---- mode 2: 352x288p50 PAL ---------------------------------------- + mode_in = 2'd2; + settle(4); + $display("--- mode 2: 352x288p50 (line 64.000us, 15625.00 Hz, 50.08 Hz) ---"); + check("pal line period (px)", hs_period, 432); + check("pal hsync width (px)", hs_width, 32); + check("pal sync->active (px)", hs_to_de, 69); // 32 sync + 37 BP + check("pal active width (px)", de_width, 352); + check("pal field period (px)", vs_period, 432*312); + check("pal vsync width (px)", vs_width, 432*3); + check("pal active lines", de_lines_last, 288); + + // ---- mode 1: 720x480i60 --------------------------------------------- + mode_in = 2'd1; + settle(5); + $display("--- mode 1: 720x480i60 (line 63.556us, 15734.27 Hz, 59.94 Hz) ---"); + check("480i line period (px)", hs_period, 858); + check("480i hsync width (px)", hs_width, 62); + check("480i sync->active (px)", hs_to_de, 119); // 62 sync + 57 BP + check("480i active width (px)", de_width, 720); + // Both vsync intervals = 262.5 lines exactly: proves the half-line + // offset (integer field lengths would give 262*858 / 263*858). + check("480i field period A (px)", vs_period, 225225); + check("480i field period B (px)", vs_period_d, 225225); + check("480i vsync width (px)", vs_width, 858*3); + check("480i active lines/field", de_lines_last, 240); + check("480i fields alternate", {31'd0, vs_field ^ vs_field_d}, 1); + + // ---- offset trims + clamping (mode 0) ------------------------------- + mode_in = 2'd0; + settle(4); + $display("--- offset trims (mode 0) ---"); + + h_offset_in = 8'sd8; settle(4); + check("h=+8 sync->active", hs_to_de, 73); + check("h=+8 active width", de_width, 352); + check("h=+8 line period", hs_period, 429); + h_offset_in = -8'sd8; settle(4); + check("h=-8 sync->active", hs_to_de, 57); + h_offset_in = 8'sd100; settle(4); + check("h=+100 clamps to +8", hs_to_de, 73); + h_offset_in = -8'sd100; settle(4); + check("h=-100 clamps to -8", hs_to_de, 57); + h_offset_in = 8'sd0; + + v_offset_in = 4'sd2; settle(4); + check("v=+2 vsync->active", vs_to_de, 429*21); + check("v=+2 field period", vs_period, 429*262); + v_offset_in = -4'sd8; settle(4); + check("v=-8 vsync->active", vs_to_de, 429*11); + v_offset_in = 4'sd7; settle(4); + check("v=+7 clamps to +2", vs_to_de, 429*21); + v_offset_in = 4'sd0; + + // ---- mode change sanity: back to NTSC after everything -------------- + settle(4); + check("ntsc restore line period", hs_period, 429); + check("ntsc restore sync->active", hs_to_de, 65); + check("ntsc restore field flat", {31'd0, vs_field}, 0); + + if (errors == 0) $display("ALL CHECKS PASSED"); + else begin + $display("%0d CHECK(S) FAILED", errors); + $fatal(1); + end + $finish; +end + +initial begin + #2_000_000_000; // 2 s simulated-time guard + $display("TIMEOUT"); + $fatal(1); +end + +endmodule diff --git a/tb/run.sh b/tb/run.sh new file mode 100755 index 00000000..8de90d0b --- /dev/null +++ b/tb/run.sh @@ -0,0 +1,13 @@ +#!/bin/sh +# Simulate the native video testbenches with Icarus Verilog. +set -e +cd "$(dirname "$0")" + +iverilog -g2012 -o native_video_timing_tb.vvp \ + ../rtl/native_video_timing.sv native_video_timing_tb.sv +vvp native_video_timing_tb.vvp + +iverilog -g2012 -o native_video_reader_tb.vvp \ + ../rtl/native_video_timing.sv ../rtl/native_video_reader.sv \ + ../rtl/native_video_top.sv dcfifo_sim.sv native_video_reader_tb.sv +vvp native_video_reader_tb.vvp From 23bcb88f3b031f82a47f06c0bf7b297d237cecb2 Mon Sep 17 00:00:00 2001 From: Callan Barrett Date: Thu, 11 Jun 2026 11:40:20 +0800 Subject: [PATCH 4/6] docs: add frontend implementation brief for native video v2 - Self-contained handoff document for the zaparoo-launcher team: the v2 DDR contract (word layout, buffer addresses, publish/stop protocol, write ordering), the 352x240 writer changes, safe-area and 480i flicker rules, the calibration screen spec, and the hardware verification checklist. --- docs/native-video-frontend-brief.md | 174 ++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 docs/native-video-frontend-brief.md diff --git a/docs/native-video-frontend-brief.md b/docs/native-video-frontend-brief.md new file mode 100644 index 00000000..2c0bd3c3 --- /dev/null +++ b/docs/native-video-frontend-brief.md @@ -0,0 +1,174 @@ +# Frontend implementation brief: native CRT video v2 (zaparoo-launcher) + +**Audience:** the zaparoo-launcher team / an implementation agent with no prior +context. This document is self-contained; `docs/native-video-plan.md` (same +repo) has the full background and rationale if you want it. +**Counterpart:** Menu_MiSTer fork, branch `fix/native-video-centering` — the +FPGA side of everything below is implemented, simulated, and pushed. The +launcher work in this brief is the only remaining piece. +**Existing code this modifies:** `src/app/native_video_writer.cpp` and the +`--crt` startup path in zaparoo-launcher (see also its `docs/native-core-poc.md`). + +--- + +## 1. What changed and why you're doing this + +The menu core no longer outputs a 320x240 picture with hand-tuned porches, and +it no longer has any OSD video options. It now generates broadcast-standard +15 kHz timing in three modes, and **everything the launcher used to rely on +the OSD for (CRT mode on/off, H/V centering) now travels through the DDR +control block you already write**. Key consequences for the app: + +- The framebuffer is now **352x240** (not 320x240). 352 px fills a standard + NTSC/PAL active line edge-to-edge; the old 320 was ~10% too narrow on every + correctly calibrated CRT. +- The picture now *overscans* like broadcast TV: the outer few percent of the + framebuffer is cropped on most sets. The UI must adopt safe-area rules + (section 5) — this is as much a part of the fix as the FPGA work. +- There is no "CRT mode" toggle anywhere. **Publishing frames IS the mode + switch**: the core shows its noise pattern until your control word goes + live and reverts when you zero it. +- Two new modes exist when you're ready for them: **720x480i60** (mode 1) and + **352x288p50 PAL** (mode 2). The core side is done; you opt in per-frame + via the mode field. + +Backward compatibility is handled on the core side: an old launcher writing +the legacy 320x240 layout still displays (centered with 16-px black side +bars), and your existing fb-geometry validation already self-disables the +writer against an old core. Ship order doesn't matter. + +## 2. DDR contract v2 (normative) + +Physical base `0x3A000000`, mmap **0x300000** (3 MB, up from 640 KB). + +| Offset | Contents | +|---|---| +| `+0x0` | **word0**: `(frame_counter << 2) \| active_buffer`. Bit 1 reserved, write 0. `0` means "writer stopped". | +| `+0x4` | **word1**: `[31:16]` magic `0x5A50` ("ZP"); `[15:8]` h_offset, signed int8, pixels, + = right; `[7:4]` v_offset, signed 4-bit, lines, + = down; `[3:0]` mode | +| `+0x1000` | buffer 0 | +| `+0x180000` | buffer 1 | + +Modes: `0` = 352x240 @ 60p (NTSC, default), `1` = 720x480 @ 60i, +`2` = 352x288 @ 50p (PAL). Stride is always tight (`width * 4` bytes). +Pixel format is unchanged: memcpy linuxfb BGRX rows as-is; the core swaps +bytes in RTL. + +Per-mode framebuffer numbers: + +| Mode | fb size | stride | frame bytes | +|---|---|---|---| +| 0 | 352x240 | 1408 | 0x52800 (337 920) | +| 2 | 352x288 | 1408 | 0x63000 (405 504) | +| 1 | 720x480 | 2880 | 0x151800 (1 382 400) | + +Protocol rules: + +1. **Init:** write word1 (magic + mode + saved offsets) **before** the first + word0 publish. The core reads both words in one atomic 64-bit beat once + per vblank, so word1-then-word0 ordering guarantees the first frame is + interpreted correctly. +2. **Publish:** render into the inactive buffer, then write word0 once with + the incremented counter and that buffer's index (single 32-bit store — + this is the atomic commit). Counter is 30 bits, start at 1. +3. **Mode/offset change at runtime:** update word1 first, then bump word0. + The core latches mode and offsets at the field boundary; modes 0↔1 keep + the same line rate (instant re-lock), 0/1↔2 is a 50↔60 Hz retune (the CRT + takes a moment, like real hardware). +4. **Stop:** zero word0 (zero word1 too for tidiness). The core reverts to + its noise pattern within one frame. This is also your crash-recovery + story — if the launcher dies and the words go stale, the core keeps + scanning the last frame; only a zeroed word0 releases it, so keep the + existing stop-handler behavior. +5. **Offsets:** the core honors **−8…+8 px** horizontal, **−8…+2 lines** + vertical, and clamps anything outside (a garbage word1 degrades to a + saturated shift, never broken sync). Don't rely on the clamp — keep the + calibration UI within those ranges. +6. **480i is rendered progressive:** publish one normal 720x480 frame; the + core extracts fields itself (reads source line `2*line + field`). No + field splitting, no half-frame timing on the ARM side. + +## 3. Task 1 — Phase A (required): 352x240 writer + safe-area UI + +This is the must-ship piece; modes 1 and 2 are follow-ups. + +1. `--crt` startup sets fb0 to **352x240 32bpp** (the `vmode -r 352 240 + rgb32` equivalent of the current 320x240 setup). Update the fb-geometry + validation to expect 352x240. +2. Update writer constants: width 352, stride 1408, frame size 0x52800, + buffers at `+0x1000` / `+0x180000`, mmap 0x300000. +3. Write word1 on init: magic `0x5A50`, mode 0, offsets from launcher config + (default 0/0). Clear both words on stop. +4. UI safe-area pass (section 5). +5. Calibration screen (section 6). + +Acceptance: on hardware with the new core, the launcher UI fills a CRT +edge-to-edge; killing the launcher returns the noise pattern; a capture +device reports 15.734 kHz / 240p. + +## 4. Tasks 2 & 3 — PAL and 480i (when ready) + +**PAL (mode 2):** add a "video standard: NTSC / PAL" user setting. PAL +renders **352x288** and publishes mode 2. Note most PAL sets accept 60 Hz +RGB over SCART ("PAL-60"), so mode 0 remains a fine default in PAL regions; +mode 2 is for strict-50 Hz sets and correct-speed feel. + +**480i (mode 1):** add a 720x480 rendering path and (optionally) per-screen +mode selection — e.g. main UI in 240p, text-heavy screens in 480i. +Flicker discipline is mandatory (section 5, rule 4). + +## 5. UI rendering rules (apply to every mode) + +These are not suggestions; geometry alone doesn't fix "every CRT crops +differently": + +1. **Render full-bleed.** Background art/color must reach all four edges. + The outer few percent will be cropped on most sets and visible on a few — + both must look intentional. +2. **Safe areas** (SMPTE SD practice): + - *Action safe* (all interactive/meaningful content): central **90%** — + ~317x216 of 352x240, ~317x259 of 352x288, ~648x432 of 720x480. + - *Title safe* (text that must be readable): central **80%** — + ~282x192 / ~282x230 / ~576x384. +3. **Pixel aspect ratio is 10:11** (pixels ~9% narrower than square) in all + three modes. Ignorable for boxes-and-text; correct for logos/art that + must not look squished (a true circle needs ~10% more width in pixels). +4. **480i flicker discipline:** every scanline repaints 30x/second, so 1-px + horizontal lines and fine text shimmer. Use ≥2 px horizontal strokes, + avoid hard 1-px horizontal edges, or apply a mild vertical blur (the + standard console-era 480i dashboard trick). Existing CRT typography rules + in `native-core-poc.md` (integer snapping, bitmap fonts) stay in force. + +## 6. Calibration screen + +The launcher now owns centering (the OSD options are gone): + +- Draw a border test pattern (240p-test-suite style: 1-px frame at the + extreme edge, rectangles at the 90% and 80% safe areas, cross-hatch). +- Arrow keys nudge h_offset (−8…+8, 1-px steps) and v_offset (−8…+2), + publishing word1 live so the user sees the picture move in real time. +- Persist the values in launcher config; load them at init. Defaults are + zero — the standard timing is the centering mechanism, trims only + compensate for miscentered sets. + +## 7. Verification checklist (frontend-visible items) + +- Fill/centering on **2–3 different CRTs** plus a capture device (should + report 15.734 kHz exactly; 480i should be detected as 480i, not 240p). +- Writer-stop: kill the launcher → noise pattern returns. +- Trim screen: live nudge both axes; values survive a restart; out-of-range + values (if forced) shift-and-saturate without disturbing sync. +- Compat matrix: old launcher + new core → centered 320x240 with side bars; + new launcher + old core → writer self-disables via fb-geometry validation, + core shows noise (obvious, not subtle, breakage). +- 480i: fine horizontal lines should shimmer, not stack (no line pairing). +- HDMI output still locks in every mode (the core's ascal path handles it; + just confirm). + +## 8. Reference + +- FPGA-side spec and rationale: `docs/native-video-plan.md` (Menu_MiSTer). +- RTL that consumes this contract: `rtl/native_video_reader.sv` (the word1 + parse and buffer addresses are the source of truth, with simulation + coverage in `tb/native_video_reader_tb.sv`). +- Current writer: `src/app/native_video_writer.cpp` (zaparoo-launcher). +- Why the scaler is bypassed: `docs/native-core-poc.md` (zaparoo-launcher). From 9f1c2d6ced9b0f6446430506c09bb55ff8381d11 Mon Sep 17 00:00:00 2001 From: Callan Barrett Date: Thu, 11 Jun 2026 12:39:28 +0800 Subject: [PATCH 5/6] fix: generate 27.000 MHz from a dedicated video PLL The fitter rejected 27.000000 MHz on the shared PLL: all outputs of one PLL divide a common VCO, and lcm(100 MHz clk_sys, 27 MHz) = 2700 MHz is beyond the Cyclone V's 600-1600 MHz VCO range. 27.027027 MHz (1000/37) is exactly the closest sharable frequency, which is why stock MiSTer uses it. - Revert rtl/pll/pll_0002.v to stock; its 27.027 MHz output is now unconnected. - Add rtl/pll_video.v (+ pll_video_0002.v, qip): dedicated PLL whose sole output is exact 27.000000 MHz (VCO 1350 MHz = 50 x 27, C = 50); CLK_VIDEO comes from it, and the native video path holds in reset until it locks. - Add menu.sdc declaring the video clock asynchronous to all other clocks. It was absent from sys_top.sdc's exclusive clock groups, so TimeQuest analyzed the two-flop synchronizer and line-FIFO crossings as related paths (worst slack -10.4 ns, TNS -1529). All crossings are designed CDC structures. With the constraint, the full Quartus 17.0.2 compile meets timing on every domain (worst setup slack +0.53 ns, TNS 0), which the previous 27.027 MHz baseline did not (-4.8 ns, TNS -426). - Document the shared-VCO constraint in docs/native-video-plan.md. --- docs/native-video-plan.md | 23 ++++++--- files.qip | 2 + menu.sdc | 12 +++++ menu.sv | 17 +++++-- rtl/pll/pll_0002.v | 2 +- rtl/pll_video.qip | 6 +++ rtl/pll_video.v | 25 ++++++++++ rtl/pll_video/pll_video_0002.v | 86 ++++++++++++++++++++++++++++++++++ 8 files changed, 163 insertions(+), 10 deletions(-) create mode 100644 menu.sdc create mode 100644 rtl/pll_video.qip create mode 100644 rtl/pll_video.v create mode 100644 rtl/pll_video/pll_video_0002.v diff --git a/docs/native-video-plan.md b/docs/native-video-plan.md index 8894b4fd..bba293e0 100644 --- a/docs/native-video-plan.md +++ b/docs/native-video-plan.md @@ -145,9 +145,17 @@ geometry (section 4) plus safe-area UI rules (section 6). ## 4. Target timings -Everything derives from **one PLL change**: output 1 of `rtl/pll/pll_0002.v` -goes from 27.027027 MHz to **27.000000 MHz** — the universal SD video clock -(it is exactly 1716 × NTSC line rate and 1728 × PAL line rate). +Everything derives from one clock change: CLK_VIDEO goes from 27.027027 MHz +to **27.000000 MHz** — the universal SD video clock (it is exactly 1716 × +NTSC line rate and 1728 × PAL line rate). + +> **Implementation note (found at fit time):** 27.000 MHz cannot come from +> the existing PLL. All outputs of one PLL divide a shared VCO, and +> lcm(100 MHz clk_sys, 27 MHz) = 2700 MHz exceeds the Cyclone V's +> 600–1600 MHz VCO range — 27.027027 (1000 MHz / 37) is precisely the +> closest sharable frequency, which is why stock MiSTer uses it. The fix is +> a dedicated video PLL (`rtl/pll_video.v`, VCO 1350 MHz = 50 × 27, C = 50) +> whose sole output drives CLK_VIDEO; `pll_0002.v` stays stock. | Mode | ce_pix | H total | H active / FP / sync / BP (px) | V total | V active / FP / sync / BP (lines) | Line rate | Refresh | |---|---|---|---|---|---|---|---| @@ -348,9 +356,12 @@ These are as much a part of the fix as the RTL — geometry alone doesn't solve FPGA (this repo): -1. **`rtl/pll/pll_0002.v`**: `output_clock_frequency1` 27.027027 MHz → - `27.000000 MHz`. (Same single-line style as the earlier 20→27.027 change; - no other PLL params move.) +1. ~~`rtl/pll/pll_0002.v`: `output_clock_frequency1` 27.027027 MHz → + `27.000000 MHz`.~~ Superseded: the shared PLL cannot fit 27.000 MHz (see + the implementation note in §4). Instead `pll_0002.v` stays stock and a + new dedicated `rtl/pll_video.v` (+ `rtl/pll_video/pll_video_0002.v`, + `rtl/pll_video.qip`) generates CLK_VIDEO = 27.000000 MHz; menu.sv holds + the native video path in reset until it locks. 2. **`rtl/native_video_timing.sv`**: mode-0 constants — H 352/12/32/33, V 240/3/3/16. Structure the constants as per-mode parameter sets selected by a `mode` input (tied to 0 until Phases B/C) so later modes are additive. diff --git a/files.qip b/files.qip index 93247c16..b1b07c40 100644 --- a/files.qip +++ b/files.qip @@ -1,3 +1,5 @@ +set_global_assignment -name QIP_FILE rtl/pll_video.qip +set_global_assignment -name SDC_FILE menu.sdc set_global_assignment -name SYSTEMVERILOG_FILE rtl/sdram.sv set_global_assignment -name VERILOG_FILE rtl/lfsr.v set_global_assignment -name SYSTEMVERILOG_FILE rtl/cos.sv diff --git a/menu.sdc b/menu.sdc new file mode 100644 index 00000000..0a01b314 --- /dev/null +++ b/menu.sdc @@ -0,0 +1,12 @@ +# Core-level timing constraints (processed after sys/sys_top.sdc). + +# CLK_VIDEO comes from a dedicated PLL (rtl/pll_video.v) and is asynchronous +# to every other clock in the design: all crossings into and out of the video +# domain go through two-flop synchronizers or the line FIFO's dual-clock +# logic (rtl/native_video_reader.sv, rtl/native_video_top.sv). Without this +# group, derive_pll_clocks leaves the 27 MHz output related to the other +# clocks (shared 50 MHz reference, and absent from sys_top.sdc's exclusive +# groups), and the fitter tries to close those CDC paths against a ~1 ns +# edge relationship. +set_clock_groups -asynchronous \ + -group [get_clocks { *|pll_video|pll_video_inst|altera_pll_i|*[*].*|divclk}] diff --git a/menu.sv b/menu.sv index f6b84ea2..6aab28c3 100644 --- a/menu.sv +++ b/menu.sv @@ -237,10 +237,21 @@ pll pll .refclk(CLK_50M), .rst(0), .outclk_0(clk_sys), - .outclk_1(CLK_VIDEO), + .outclk_1(), // stock 27.027 MHz output, unused (see pll_video) .locked(locked) ); +// Exact 27.000000 MHz video clock from its own PLL: 27 MHz can't share a +// VCO with the 100 MHz clk_sys (lcm = 2700 MHz, above the VCO ceiling). +wire vid_locked; +pll_video pll_video +( + .refclk(CLK_50M), + .rst(0), + .outclk_0(CLK_VIDEO), + .locked(vid_locked) +); + ///////////////////// SDRAM /////////////////// // @@ -472,7 +483,7 @@ wire [1:0] native_mode; reg [1:0] ce_div; reg ce_pix; always @(posedge CLK_VIDEO) begin - if (RESET) ce_div <= 2'd0; + if (RESET | ~vid_locked) ce_div <= 2'd0; else ce_div <= ce_div + 2'd1; ce_pix <= (native_mode == 2'd1) ? ce_div[0] : (ce_div == 2'd0); end @@ -498,7 +509,7 @@ native_video_top native_video .clk_sys (clk_sys), .clk_vid (CLK_VIDEO), .ce_pix (ce_pix), - .reset (RESET), + .reset (RESET | ~vid_locked), .ddr_busy (DDRAM_BUSY), .ddr_burstcnt (DDRAM_BURSTCNT), diff --git a/rtl/pll/pll_0002.v b/rtl/pll/pll_0002.v index 833cc2b9..4c7ed140 100644 --- a/rtl/pll/pll_0002.v +++ b/rtl/pll/pll_0002.v @@ -25,7 +25,7 @@ module pll_0002( .output_clock_frequency0("100.000000 MHz"), .phase_shift0("0 ps"), .duty_cycle0(50), - .output_clock_frequency1("27.000000 MHz"), + .output_clock_frequency1("27027027 Hz"), .phase_shift1("0 ps"), .duty_cycle1(50), .output_clock_frequency2("0 MHz"), diff --git a/rtl/pll_video.qip b/rtl/pll_video.qip new file mode 100644 index 00000000..e8d4b6d1 --- /dev/null +++ b/rtl/pll_video.qip @@ -0,0 +1,6 @@ +set_global_assignment -name VERILOG_FILE [file join $::quartus(qip_path) "pll_video.v"] +set_global_assignment -name VERILOG_FILE [file join $::quartus(qip_path) "pll_video/pll_video_0002.v"] + +set_instance_assignment -name PLL_COMPENSATION_MODE DIRECT -to "*pll_video_0002*|altera_pll:altera_pll_i*|*" +set_instance_assignment -name PLL_AUTO_RESET ON -to "*pll_video_0002*|altera_pll:altera_pll_i*|*" +set_instance_assignment -name PLL_BANDWIDTH_PRESET AUTO -to "*pll_video_0002*|altera_pll:altera_pll_i*|*" diff --git a/rtl/pll_video.v b/rtl/pll_video.v new file mode 100644 index 00000000..1fbb347d --- /dev/null +++ b/rtl/pll_video.v @@ -0,0 +1,25 @@ +// Dedicated video PLL: exact 27.000000 MHz for SD CRT timing. +// +// This cannot come from the main PLL: every output of a PLL divides the +// same VCO, and the smallest common multiple of 100 MHz (clk_sys) and +// 27 MHz is 2700 MHz — outside the Cyclone V's 600-1600 MHz VCO range. +// (That constraint is why stock MiSTer uses 27.027027 MHz: 1000 MHz / 37 +// is the closest to 27 MHz a VCO shared with 100 MHz can reach.) +// Standalone, 27 MHz is exact: VCO = 50 MHz x 27 = 1350 MHz, C = 50. + +`timescale 1 ps / 1 ps +module pll_video ( + input wire refclk, // refclk.clk + input wire rst, // reset.reset + output wire outclk_0, // outclk0.clk + output wire locked // locked.export + ); + + pll_video_0002 pll_video_inst ( + .refclk (refclk), // refclk.clk + .rst (rst), // reset.reset + .outclk_0 (outclk_0), // outclk0.clk + .locked (locked) // locked.export + ); + +endmodule diff --git a/rtl/pll_video/pll_video_0002.v b/rtl/pll_video/pll_video_0002.v new file mode 100644 index 00000000..0980b88b --- /dev/null +++ b/rtl/pll_video/pll_video_0002.v @@ -0,0 +1,86 @@ +`timescale 1ns/10ps +module pll_video_0002( + + // interface 'refclk' + input wire refclk, + + // interface 'reset' + input wire rst, + + // interface 'outclk0' + output wire outclk_0, + + // interface 'locked' + output wire locked +); + + altera_pll #( + .fractional_vco_multiplier("false"), + .reference_clock_frequency("50.0 MHz"), + .operation_mode("direct"), + .number_of_clocks(1), + .output_clock_frequency0("27.000000 MHz"), + .phase_shift0("0 ps"), + .duty_cycle0(50), + .output_clock_frequency1("0 MHz"), + .phase_shift1("0 ps"), + .duty_cycle1(50), + .output_clock_frequency2("0 MHz"), + .phase_shift2("0 ps"), + .duty_cycle2(50), + .output_clock_frequency3("0 MHz"), + .phase_shift3("0 ps"), + .duty_cycle3(50), + .output_clock_frequency4("0 MHz"), + .phase_shift4("0 ps"), + .duty_cycle4(50), + .output_clock_frequency5("0 MHz"), + .phase_shift5("0 ps"), + .duty_cycle5(50), + .output_clock_frequency6("0 MHz"), + .phase_shift6("0 ps"), + .duty_cycle6(50), + .output_clock_frequency7("0 MHz"), + .phase_shift7("0 ps"), + .duty_cycle7(50), + .output_clock_frequency8("0 MHz"), + .phase_shift8("0 ps"), + .duty_cycle8(50), + .output_clock_frequency9("0 MHz"), + .phase_shift9("0 ps"), + .duty_cycle9(50), + .output_clock_frequency10("0 MHz"), + .phase_shift10("0 ps"), + .duty_cycle10(50), + .output_clock_frequency11("0 MHz"), + .phase_shift11("0 ps"), + .duty_cycle11(50), + .output_clock_frequency12("0 MHz"), + .phase_shift12("0 ps"), + .duty_cycle12(50), + .output_clock_frequency13("0 MHz"), + .phase_shift13("0 ps"), + .duty_cycle13(50), + .output_clock_frequency14("0 MHz"), + .phase_shift14("0 ps"), + .duty_cycle14(50), + .output_clock_frequency15("0 MHz"), + .phase_shift15("0 ps"), + .duty_cycle15(50), + .output_clock_frequency16("0 MHz"), + .phase_shift16("0 ps"), + .duty_cycle16(50), + .output_clock_frequency17("0 MHz"), + .phase_shift17("0 ps"), + .duty_cycle17(50), + .pll_type("General"), + .pll_subtype("General") + ) altera_pll_i ( + .rst (rst), + .outclk (outclk_0), + .locked (locked), + .fboutclk ( ), + .fbclk (1'b0), + .refclk (refclk) + ); +endmodule From b0fe65a4de6ba3d0f5390bf6af1dcb9edb405928 Mon Sep 17 00:00:00 2001 From: Callan Barrett Date: Thu, 11 Jun 2026 12:55:15 +0800 Subject: [PATCH 6/6] docs: clarify CRT mode coordination in the frontend brief - The app-level CRT mode (--crt: pixel fonts, CRT layout, DDR writer) stays; only the core-side status[9] enable and offset status bits are gone. The old wording ("no CRT mode toggle anywhere") read as if the concept itself was removed. - Add section 3 documenting the existing Main_MiSTer coordination (config/zaparoo_launcher_crt.bin read at menu-core load, OSD toggle respawning only the frontend) and the required Main-fork edits: drop the dead status writes, move offset ownership to the launcher, update fb mode to 352x240/1408, and widen the DDR blank to 0x300000. - Sketch a launcher-side toggle option via a reserved exit code so neither Main nor the system needs a restart. --- docs/native-video-frontend-brief.md | 82 +++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 15 deletions(-) diff --git a/docs/native-video-frontend-brief.md b/docs/native-video-frontend-brief.md index 2c0bd3c3..2ea4604e 100644 --- a/docs/native-video-frontend-brief.md +++ b/docs/native-video-frontend-brief.md @@ -7,7 +7,9 @@ repo) has the full background and rationale if you want it. FPGA side of everything below is implemented, simulated, and pushed. The launcher work in this brief is the only remaining piece. **Existing code this modifies:** `src/app/native_video_writer.cpp` and the -`--crt` startup path in zaparoo-launcher (see also its `docs/native-core-poc.md`). +`--crt` startup path in zaparoo-launcher (see also its `docs/native-core-poc.md`), +plus `support/zaparoo/alt_launcher.cpp` / `launcher_pages.cpp` in the +Main_MiSTer fork (section 3). --- @@ -24,10 +26,13 @@ control block you already write**. Key consequences for the app: correctly calibrated CRT. - The picture now *overscans* like broadcast TV: the outer few percent of the framebuffer is cropped on most sets. The UI must adopt safe-area rules - (section 5) — this is as much a part of the fix as the FPGA work. -- There is no "CRT mode" toggle anywhere. **Publishing frames IS the mode - switch**: the core shows its noise pattern until your control word goes - live and reverts when you zero it. + (section 6) — this is as much a part of the fix as the FPGA work. +- The *core-side* CRT enable is gone: the new core has no `status[9]` bit + and no OSD video options. **Publishing frames IS the core's mode switch**: + it shows its noise pattern until your control word goes live and reverts + when you zero it. The *app-level* CRT mode (the `--crt` startup path: + pixel fonts, CRT layout, DDR writer) is unchanged and very much stays — + see section 3 for how it's coordinated now. - Two new modes exist when you're ready for them: **720x480i60** (mode 1) and **352x288p50 PAL** (mode 2). The core side is done; you opt in per-frame via the mode field. @@ -87,7 +92,53 @@ Protocol rules: core extracts fields itself (reads source line `2*line + field`). No field splitting, no half-frame timing on the ARM side. -## 3. Task 1 — Phase A (required): 352x240 writer + safe-area UI +## 3. ARM-side coordination: who turns CRT mode on + +"CRT mode" remains a real mode of the *app*: it decides whether the launcher +renders pixel fonts and CRT layout into the DDR writer (`--crt`) or runs the +normal HDMI/scaler path. The Main_MiSTer fork already owns that decision and +the mechanism survives v2 almost unchanged: + +- **Persisted state:** `config/zaparoo_launcher_crt.bin` (1-byte bool, + written via `FileSaveConfig`). Main reads it when the menu core loads + (`zaparoo_alt_launcher_init_for_menu()` in `support/zaparoo/alt_launcher.cpp`) + and spawns the frontend with or without `--crt`. +- **Toggling:** the OSD "Zaparoo Frontend → Video" page calls + `alt_launcher_toggle_crt()`, which persists the new value, SIGTERMs the + frontend, and respawns it with the new flag. **No Main restart is needed** + — only the frontend process bounces. Keep this; a full Main re-exec is + strictly worse (slower, drops core state) and buys nothing. + +What v2 changes in Main (these are required Main-fork edits, same effort +bucket as Task 1): + +1. `user_io_status_set("[9]", …)` everywhere in `alt_launcher.cpp` is now a + no-op — the new core has no CRT status bit. Delete the writes and the + 500 ms re-assert timer. The frontend publishing word0/word1 *is* the + enable; Main's job shrinks to fb-mode setup, blanking, and spawning. +2. The H/V offset status writes (`[13:10]`/`[17:14]`) are dead too. Offsets + move into DDR word1, which only the frontend writes. Remove the OSD + "H Offset"/"V Offset" entries in `launcher_pages.cpp` and the + `zaparoo_video_offsets.bin` handling; the launcher owns centering now + (section 7). Optional nicety: on first run, the launcher migrates the + two bytes from `config/zaparoo_video_offsets.bin` into its own config so + existing users keep their calibration. +3. `set_native_crt_fb_mode()`: 320x240 stride 1280 → **352x240 stride 1408**. +4. `blank_native_crt_fb()`: region size 0xA0000 → **0x300000**. Under v2, + zeroing the region isn't just ghost-clearing — a zeroed word0 means + "writer stopped", so the blank deterministically parks the core on its + noise pattern until the new frontend instance publishes. + +Open choice (pick during implementation): if the CRT toggle should also live +in the launcher's own settings UI, don't have the launcher restart Main. +Instead: launcher writes `zaparoo_launcher_crt.bin` itself and exits with a +reserved exit code (e.g. 42 = "re-read CRT config and respawn me"); Main's +`alt_launcher_poll()` exit handler treats that code as a respawn-with-reload +instead of `return_to_normal_mode()`. That's a ~10-line Main change and +reuses the existing respawn machinery. The OSD toggle can stay as a second +entry point — both paths converge on the same persisted bool + respawn. + +## 4. Task 1 — Phase A (required): 352x240 writer + safe-area UI This is the must-ship piece; modes 1 and 2 are follow-ups. @@ -98,14 +149,14 @@ This is the must-ship piece; modes 1 and 2 are follow-ups. buffers at `+0x1000` / `+0x180000`, mmap 0x300000. 3. Write word1 on init: magic `0x5A50`, mode 0, offsets from launcher config (default 0/0). Clear both words on stop. -4. UI safe-area pass (section 5). -5. Calibration screen (section 6). +4. UI safe-area pass (section 6). +5. Calibration screen (section 7). Acceptance: on hardware with the new core, the launcher UI fills a CRT edge-to-edge; killing the launcher returns the noise pattern; a capture device reports 15.734 kHz / 240p. -## 4. Tasks 2 & 3 — PAL and 480i (when ready) +## 5. Tasks 2 & 3 — PAL and 480i (when ready) **PAL (mode 2):** add a "video standard: NTSC / PAL" user setting. PAL renders **352x288** and publishes mode 2. Note most PAL sets accept 60 Hz @@ -114,9 +165,9 @@ mode 2 is for strict-50 Hz sets and correct-speed feel. **480i (mode 1):** add a 720x480 rendering path and (optionally) per-screen mode selection — e.g. main UI in 240p, text-heavy screens in 480i. -Flicker discipline is mandatory (section 5, rule 4). +Flicker discipline is mandatory (section 6, rule 4). -## 5. UI rendering rules (apply to every mode) +## 6. UI rendering rules (apply to every mode) These are not suggestions; geometry alone doesn't fix "every CRT crops differently": @@ -138,9 +189,10 @@ differently": standard console-era 480i dashboard trick). Existing CRT typography rules in `native-core-poc.md` (integer snapping, bitmap fonts) stay in force. -## 6. Calibration screen +## 7. Calibration screen -The launcher now owns centering (the OSD options are gone): +The launcher now owns centering (the core's status bits are gone and Main's +OSD offset entries go with them — see section 3, item 2): - Draw a border test pattern (240p-test-suite style: 1-px frame at the extreme edge, rectangles at the 90% and 80% safe areas, cross-hatch). @@ -150,7 +202,7 @@ The launcher now owns centering (the OSD options are gone): zero — the standard timing is the centering mechanism, trims only compensate for miscentered sets. -## 7. Verification checklist (frontend-visible items) +## 8. Verification checklist (frontend-visible items) - Fill/centering on **2–3 different CRTs** plus a capture device (should report 15.734 kHz exactly; 480i should be detected as 480i, not 240p). @@ -164,7 +216,7 @@ The launcher now owns centering (the OSD options are gone): - HDMI output still locks in every mode (the core's ascal path handles it; just confirm). -## 8. Reference +## 9. Reference - FPGA-side spec and rationale: `docs/native-video-plan.md` (Menu_MiSTer). - RTL that consumes this contract: `rtl/native_video_reader.sv` (the word1