From d4b5cec7792d7acabba9c379c1edcf93668b8dcb Mon Sep 17 00:00:00 2001 From: Arushi Jain Date: Fri, 1 May 2026 20:13:15 -0400 Subject: [PATCH 01/14] feat(v2): NLU benchmark + minimal mazegen for smoke tests Add nlu_pipeline (env, runner, agents, observation, examples) with sample mazes and smoke test scripts. Include mazegen models/generators/solver and render_dataset for PNG rendering and solver checks. Omit automatic dataset generation (orchestrator, validator, mechanisms, generate_dataset) and bulk generated_mazes for a follow-up PR. Ignore smoke_tests/results and terminal_output.txt. Co-authored-by: Cursor --- .gitignore | 10 + src/v2/automatic_maze_generation/__init__.py | 0 .../mazegen/__init__.py | 0 .../mazegen/generators.py | 485 ++++++++++++++++++ .../mazegen/models.py | 202 ++++++++ .../mazegen/solver.py | 261 ++++++++++ .../render_dataset.py | 381 ++++++++++++++ src/v2/nlu_pipeline/__init__.py | 0 src/v2/nlu_pipeline/nlu_benchmark/__init__.py | 1 + src/v2/nlu_pipeline/nlu_benchmark/agents.py | 128 +++++ src/v2/nlu_pipeline/nlu_benchmark/config.py | 38 ++ src/v2/nlu_pipeline/nlu_benchmark/env.py | 216 ++++++++ .../nlu_benchmark/examples/run_llm.py | 17 + .../nlu_benchmark/examples/run_local_llm.py | 15 + .../nlu_benchmark/examples/run_random.py | 9 + src/v2/nlu_pipeline/nlu_benchmark/feedback.py | 39 ++ src/v2/nlu_pipeline/nlu_benchmark/loader.py | 23 + .../nlu_pipeline/nlu_benchmark/observation.py | 92 ++++ src/v2/nlu_pipeline/nlu_benchmark/parser.py | 88 ++++ .../nlu_benchmark/prompt_strategies.py | 196 +++++++ src/v2/nlu_pipeline/nlu_benchmark/querying.py | 92 ++++ src/v2/nlu_pipeline/nlu_benchmark/renderer.py | 130 +++++ src/v2/nlu_pipeline/nlu_benchmark/runner.py | 219 ++++++++ .../sample mazes/V01_empty_room.json | 52 ++ .../sample mazes/V02_winding_corridor.json | 258 ++++++++++ .../sample mazes/V03_multi_path.json | 255 +++++++++ .../sample mazes/V04_single_key.json | 96 ++++ .../sample mazes/V05_single_switch.json | 99 ++++ .../sample mazes/V06_chain_ks.json | 124 +++++ .../sample mazes/V07_chain_sk.json | 124 +++++ .../sample mazes/V08_chain_kk.json | 119 +++++ .../sample mazes/V09_distractor_simple.json | 126 +++++ .../sample mazes/V10_distractor_chain.json | 122 +++++ .../sample mazes/pngs/V01_empty_room.png | Bin 0 -> 9831 bytes .../pngs/V02_winding_corridor.png | Bin 0 -> 17475 bytes .../sample mazes/pngs/V03_multi_path.png | Bin 0 -> 13946 bytes .../sample mazes/pngs/V04_single_key.png | Bin 0 -> 18676 bytes .../sample mazes/pngs/V05_single_switch.png | Bin 0 -> 17708 bytes .../sample mazes/pngs/V06_chain_ks.png | Bin 0 -> 19823 bytes .../sample mazes/pngs/V07_chain_sk.png | Bin 0 -> 19640 bytes .../sample mazes/pngs/V08_chain_kk.png | Bin 0 -> 21257 bytes .../pngs/V09_distractor_simple.png | Bin 0 -> 26139 bytes .../pngs/V10_distractor_chain.png | Bin 0 -> 22997 bytes .../smoke_tests/analyze_smoke_runner_logs.py | 99 ++++ .../smoke_prompting_observation_querying.py | 397 ++++++++++++++ .../smoke_tests/smoke_smart_manual.py | 164 ++++++ 46 files changed, 4677 insertions(+) create mode 100644 .gitignore create mode 100644 src/v2/automatic_maze_generation/__init__.py create mode 100644 src/v2/automatic_maze_generation/mazegen/__init__.py create mode 100644 src/v2/automatic_maze_generation/mazegen/generators.py create mode 100644 src/v2/automatic_maze_generation/mazegen/models.py create mode 100644 src/v2/automatic_maze_generation/mazegen/solver.py create mode 100644 src/v2/automatic_maze_generation/render_dataset.py create mode 100644 src/v2/nlu_pipeline/__init__.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/__init__.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/agents.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/config.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/env.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/examples/run_llm.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/examples/run_local_llm.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/examples/run_random.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/feedback.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/loader.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/observation.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/parser.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/prompt_strategies.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/querying.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/renderer.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/runner.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V01_empty_room.json create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V02_winding_corridor.json create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V03_multi_path.json create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V04_single_key.json create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V05_single_switch.json create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V06_chain_ks.json create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V07_chain_sk.json create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V08_chain_kk.json create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V09_distractor_simple.json create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V10_distractor_chain.json create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V01_empty_room.png create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V02_winding_corridor.png create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V03_multi_path.png create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V04_single_key.png create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V05_single_switch.png create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V06_chain_ks.png create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V07_chain_sk.png create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V08_chain_kk.png create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V09_distractor_simple.png create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V10_distractor_chain.png create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/analyze_smoke_runner_logs.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_smart_manual.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..72676a3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +build/ +__pycache__ +*.Zone.Identifier +.venv/ +token.txt +.pytest_cache/ +.env + +src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/results/ +src/v2/nlu_pipeline/nlu_benchmark/terminal_output.txt diff --git a/src/v2/automatic_maze_generation/__init__.py b/src/v2/automatic_maze_generation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/v2/automatic_maze_generation/mazegen/__init__.py b/src/v2/automatic_maze_generation/mazegen/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/v2/automatic_maze_generation/mazegen/generators.py b/src/v2/automatic_maze_generation/mazegen/generators.py new file mode 100644 index 0000000..3057d86 --- /dev/null +++ b/src/v2/automatic_maze_generation/mazegen/generators.py @@ -0,0 +1,485 @@ +from __future__ import annotations + +from typing import Iterable, List, Set, Tuple +from collections import deque + +from .models import ( + Backbone, + Coord, + MazeGenSpec, + MazeLayout, + DenseMazeParams, + MultiRouteParams, + SequentialChainParams, + SideVaultParams, + WindingCorridorParams, +) + + +def in_bounds(c: Coord, width: int, height: int) -> bool: + x, y = c + return 0 <= x < width and 0 <= y < height + + +def neighbors4(c: Coord) -> List[Coord]: + x, y = c + return [(x + 1, y), (x - 1, y), (x, y + 1), (x, y - 1)] + + +def manhattan(a: Coord, b: Coord) -> int: + return abs(a[0] - b[0]) + abs(a[1] - b[1]) + + +def carve_cells(cells: Iterable[Coord], open_cells: Set[Coord], width: int, height: int, corridor_width: int = 1) -> None: + for x, y in cells: + for dx in range(corridor_width): + for dy in range(corridor_width): + cc = (x + dx, y + dy) + if in_bounds(cc, width, height): + open_cells.add(cc) + + +def build_walls_from_open(width: int, height: int, open_cells: Set[Coord]) -> Set[Coord]: + return {(x, y) for x in range(width) for y in range(height) if (x, y) not in open_cells} + + +def path_from_points(points: List[Coord]) -> List[Coord]: + out: List[Coord] = [] + for i in range(len(points) - 1): + x1, y1 = points[i] + x2, y2 = points[i + 1] + out.append((x1, y1)) + if x1 == x2: + step = 1 if y2 >= y1 else -1 + for y in range(y1 + step, y2 + step, step): + out.append((x1, y)) + elif y1 == y2: + step = 1 if x2 >= x1 else -1 + for x in range(x1 + step, x2 + step, step): + out.append((x, y1)) + else: + raise ValueError("Consecutive points must align horizontally or vertically") + if points: + out.append(points[-1]) + dedup: List[Coord] = [] + seen: Set[Coord] = set() + for p in out: + if not dedup or dedup[-1] != p: + dedup.append(p) + seen.add(p) + return dedup + +def generate_winding_corridor(spec: MazeGenSpec) -> MazeLayout: + assert spec.backbone == Backbone.WINDING_CORRIDOR + rng = spec.rng() + p: WindingCorridorParams = spec.backbone_params + width, height = spec.grid_width, spec.grid_height + + x_min, x_max = 1, max(1, width - 2) + y_min, y_max = 1, max(1, height - 2) + + current = (x_min, y_min) + points = [current] + horizontal = True + + for i in range(p.turn_count + 1): + seg_len = rng.randint(p.segment_min_length, p.segment_max_length) + x, y = current + + if horizontal: + target_x = min(x_max, x + seg_len) if i % 2 == 0 else max(x_min, x - seg_len) + if target_x == x: + target_x = min(x_max, x + seg_len) + current = (target_x, y) + else: + target_y = min(y_max, y + seg_len) if (i // 2) % 2 == 0 else max(y_min, y - seg_len) + if target_y == y: + target_y = min(y_max, y + seg_len) + current = (x, target_y) + + points.append(current) + horizontal = not horizontal + + path = path_from_points(points) + open_cells: Set[Coord] = set() + carve_cells(path, open_cells, width, height, corridor_width=p.corridor_width) + + if p.allow_side_stubs: + candidates = path[1:-1] + rng.shuffle(candidates) + stubs_added = 0 + for cell in candidates: + if stubs_added >= p.side_stub_count: + break + dirs = neighbors4(cell) + rng.shuffle(dirs) + for nb in dirs: + if in_bounds(nb, width, height) and nb not in open_cells: + open_cells.add(nb) + stubs_added += 1 + break + + start = path[0] + goal = path[-1] + walls = build_walls_from_open(width, height, open_cells) + + # --- new: expose mechanism slots on the forced path --- + pickup_idx = max(1, len(path) // 3) + blocker_idx = min(len(path) - 2, (2 * len(path)) // 3) + + # keep them away from start/goal and distinct + if blocker_idx <= pickup_idx: + blocker_idx = min(len(path) - 2, pickup_idx + 2) + + pickup_cell = path[pickup_idx] + blocker_cell = path[blocker_idx] + + return MazeLayout( + width=width, + height=height, + walls=walls, + start=start, + goal=goal, + slots={ + "pickup_1_candidates": [pickup_cell], + "blocker_1_candidates": [blocker_cell], + "distractor_branch_candidates": [], + }, + route_cells=[set(path)], + metadata={ + "backbone": spec.backbone.value, + "logic_chain": spec.logic_chain.value, + "turn_count": p.turn_count, + }, + ) + +def _route_template_cells(width: int, height: int, num_routes: int) -> Tuple[Coord, Coord, List[List[Coord]]]: + start = (1, height // 2) + goal = (width - 2, height // 2) + rows: List[int] = [] + if num_routes == 2: + rows = [1, height - 2] + elif num_routes == 3: + rows = [1, height // 2, height - 2] + else: + rows = [1 + i * max(1, (height - 3) // max(1, num_routes - 1)) for i in range(num_routes)] + rows = [max(1, min(height - 2, r)) for r in rows] + + routes: List[List[Coord]] = [] + for r in rows[:num_routes]: + points = [start, (2, start[1]), (2, r), (width - 3, r), (width - 3, goal[1]), goal] + routes.append(path_from_points(points)) + return start, goal, routes + + +def generate_multi_route(spec: MazeGenSpec) -> MazeLayout: + assert spec.backbone == Backbone.MULTI_ROUTE + p: MultiRouteParams = spec.backbone_params + width, height = spec.grid_width, spec.grid_height + start, goal, routes = _route_template_cells(width, height, p.num_routes) + open_cells: Set[Coord] = set() + route_sets: List[Set[Coord]] = [] + for route in routes: + carve_cells(route, open_cells, width, height, corridor_width=p.main_corridor_width) + route_sets.append(set(route)) + + walls = build_walls_from_open(width, height, open_cells) + return MazeLayout( + width=width, + height=height, + walls=walls, + start=start, + goal=goal, + route_cells=route_sets, + slots={ + "pickup_1_candidates": [c for c in routes[0][2:-2]] if routes else [], + "blocker_1_candidates": [goal], + "distractor_branch_candidates": [], + }, + metadata={ + "backbone": spec.backbone.value, + "logic_chain": spec.logic_chain.value, + "num_routes": len(routes), + }, + ) + + +def generate_side_vault(spec: MazeGenSpec) -> MazeLayout: + assert spec.backbone == Backbone.SIDE_VAULT + p: SideVaultParams = spec.backbone_params + width, height = spec.grid_width, spec.grid_height + + open_cells: Set[Coord] = set() + main_y = height // 2 + start = (1, main_y) + goal = (width - 2, main_y) + main_path = path_from_points([start, (width - 2, main_y)]) + carve_cells(main_path, open_cells, width, height) + + foyer_x = min(width - 4, max(3, width // 3)) + branch_dir = -1 if p.vault_position_mode in {"upper"} else 1 + if p.vault_position_mode == "random": + branch_dir = -1 if spec.rng().random() < 0.5 else 1 + branch_end_y = max(1, min(height - 2, main_y + branch_dir * p.vault_branch_depth)) + vault_path = path_from_points([(foyer_x, main_y), (foyer_x, branch_end_y), (min(width - 3, foyer_x + 2), branch_end_y)]) + carve_cells(vault_path, open_cells, width, height) + + blocker_x = min(width - 3, max(foyer_x + 2, width - 3 - p.blocker_distance_from_goal)) + walls = build_walls_from_open(width, height, open_cells) + return MazeLayout( + width=width, + height=height, + walls=walls, + start=start, + goal=goal, + slots={ + "pickup_1_candidates": [vault_path[-1]], + "blocker_1_candidates": [(blocker_x, main_y)], + "distractor_branch_candidates": [], + }, + route_cells=[set(main_path), set(vault_path)], + metadata={"backbone": spec.backbone.value, "logic_chain": spec.logic_chain.value}, + ) + + +def generate_sequential_chain(spec: MazeGenSpec) -> MazeLayout: + assert spec.backbone == Backbone.SEQUENTIAL_CHAIN + p: SequentialChainParams = spec.backbone_params + width, height = spec.grid_width, spec.grid_height + open_cells: Set[Coord] = set() + + start = (1, height // 3) + choke1 = (max(3, width // 3), height // 3) + zone2_entry = (max(4, width // 3 + 1), 2 * height // 3) + choke2 = (max(6, 2 * width // 3), 2 * height // 3) + goal = (width - 2, 2 * height // 3) + + main_points = [start, choke1, (choke1[0], zone2_entry[1]), zone2_entry, choke2, goal] + main_path = path_from_points(main_points) + carve_cells(main_path, open_cells, width, height) + + pickup1 = (max(1, choke1[0] - 1), max(1, start[1] - p.pickup1_branch_depth)) + pickup1_path = path_from_points([(choke1[0] - 1, start[1]), (choke1[0] - 1, pickup1[1])]) + carve_cells(pickup1_path, open_cells, width, height) + + pickup2 = (min(width - 2, zone2_entry[0] + p.pickup2_branch_depth), max(1, zone2_entry[1] - 1)) + pickup2_path = path_from_points([zone2_entry, (pickup2[0], zone2_entry[1]), pickup2]) + carve_cells(pickup2_path, open_cells, width, height) + + walls = build_walls_from_open(width, height, open_cells) + return MazeLayout( + width=width, + height=height, + walls=walls, + start=start, + goal=goal, + slots={ + "pickup_1_candidates": [pickup1_path[-1]], + "blocker_1_candidates": [choke1], + "pickup_2_candidates": [pickup2_path[-1]], + "blocker_2_candidates": [choke2], + "distractor_branch_candidates": [], + }, + route_cells=[set(main_path), set(pickup1_path), set(pickup2_path)], + metadata={"backbone": spec.backbone.value, "logic_chain": spec.logic_chain.value}, + ) + + + +def _carve_dense_maze_grid(cell_w: int, cell_h: int, rng) -> tuple[set[Coord], int, int]: + """ + Return open cells for a classic carved maze on a tile grid of size: + width = 2*cell_w + 1, height = 2*cell_h + 1 + """ + width = 2 * cell_w + 1 + height = 2 * cell_h + 1 + + open_cells: set[Coord] = set() + + # Mark all logical cells as open + for cx in range(cell_w): + for cy in range(cell_h): + open_cells.add((2 * cx + 1, 2 * cy + 1)) + + visited = set() + stack = [(0, 0)] + visited.add((0, 0)) + + while stack: + cx, cy = stack[-1] + neighbors = [] + for dx, dy in [(1, 0), (-1, 0), (0, 1), (0, -1)]: + nx, ny = cx + dx, cy + dy + if 0 <= nx < cell_w and 0 <= ny < cell_h and (nx, ny) not in visited: + neighbors.append((nx, ny, dx, dy)) + + if not neighbors: + stack.pop() + continue + + nx, ny, dx, dy = rng.choice(neighbors) + # open wall between current cell and next cell + wall_x = 2 * cx + 1 + dx + wall_y = 2 * cy + 1 + dy + open_cells.add((wall_x, wall_y)) + + visited.add((nx, ny)) + stack.append((nx, ny)) + + return open_cells, width, height + + +def _add_dense_maze_loops(open_cells: set[Coord], width: int, height: int, rng, loop_count: int) -> None: + candidates = [] + for x in range(1, width - 1): + for y in range(1, height - 1): + if (x, y) in open_cells: + continue + # candidate interior wall between two open cells + horiz = (x - 1, y) in open_cells and (x + 1, y) in open_cells + vert = (x, y - 1) in open_cells and (x, y + 1) in open_cells + if horiz or vert: + candidates.append((x, y)) + + rng.shuffle(candidates) + for c in candidates[:loop_count]: + open_cells.add(c) + + +def _shortest_path_on_open_cells(start: Coord, goal: Coord, open_cells: set[Coord], width: int, height: int) -> list[Coord]: + q = deque([start]) + parent = {start: None} + + while q: + cur = q.popleft() + if cur == goal: + break + for nb in neighbors4(cur): + if not in_bounds(nb, width, height): + continue + if nb not in open_cells or nb in parent: + continue + parent[nb] = cur + q.append(nb) + + if goal not in parent: + return [] + + path = [] + cur = goal + while cur is not None: + path.append(cur) + cur = parent[cur] + path.reverse() + return path + + + +def _pick_path_cell_by_progress(path: list[Coord], lo: float, hi: float, rng) -> Coord: + if len(path) < 3: + raise ValueError("Path too short to sample progress-based slot") + + start_idx = max(1, int(lo * (len(path) - 1))) + end_idx = min(len(path) - 2, int(hi * (len(path) - 1))) + if end_idx < start_idx: + end_idx = start_idx + idx = rng.randint(start_idx, end_idx) + return path[idx] + + + +def generate_dense_maze(spec: MazeGenSpec) -> MazeLayout: + assert spec.backbone == Backbone.DENSE_MAZE + rng = spec.rng() + p: DenseMazeParams = spec.backbone_params + + open_cells, width, height = _carve_dense_maze_grid( + p.maze_width_cells, + p.maze_height_cells, + rng, + ) + + if p.add_loops and p.loop_count > 0: + _add_dense_maze_loops(open_cells, width, height, rng, p.loop_count) + + # pick start/goal from open odd cells, far apart + candidates = sorted(open_cells) + best_pair = None + best_dist = -1 + for a in candidates: + for b in candidates: + d = manhattan(a, b) + if d > best_dist: + best_dist = d + best_pair = (a, b) + + if best_pair is None: + raise ValueError("Could not find start/goal in dense maze") + + start, goal = best_pair + path = _shortest_path_on_open_cells(start, goal, open_cells, width, height) + if not path: + raise ValueError("Dense maze path generation failed") + + pickup1 = _pick_path_cell_by_progress(path, p.pickup1_progress_min, p.pickup1_progress_max, rng) + blocker1 = _pick_path_cell_by_progress(path, p.blocker1_progress_min, p.blocker1_progress_max, rng) + pickup2 = _pick_path_cell_by_progress(path, p.pickup2_progress_min, p.pickup2_progress_max, rng) + blocker2 = _pick_path_cell_by_progress(path, p.blocker2_progress_min, p.blocker2_progress_max, rng) + + # enforce monotonic order along the path + idx = {cell: i for i, cell in enumerate(path)} + ordered = sorted([pickup1, blocker1, pickup2, blocker2], key=lambda c: idx[c]) + pickup1, blocker1, pickup2, blocker2 = ordered + + # ensure all 4 are distinct and separated + dedup = [] + for cell in [pickup1, blocker1, pickup2, blocker2]: + if cell not in dedup: + dedup.append(cell) + + if len(dedup) < 4: + # simple fallback using spaced path indices + n = len(path) + pickup1 = path[max(1, n // 5)] + blocker1 = path[max(2, (2 * n) // 5)] + pickup2 = path[max(3, (3 * n) // 5)] + blocker2 = path[max(4, (4 * n) // 5)] + + walls = build_walls_from_open(width, height, open_cells) + + return MazeLayout( + width=width, + height=height, + walls=walls, + start=start, + goal=goal, + slots={ + "pickup_1_candidates": [pickup1], + "blocker_1_candidates": [blocker1], + "pickup_2_candidates": [pickup2], + "blocker_2_candidates": [blocker2], + "distractor_branch_candidates": [], + }, + route_cells=[set(path)], + metadata={ + "backbone": spec.backbone.value, + "logic_chain": spec.logic_chain.value, + "dense_maze_cells": [p.maze_width_cells, p.maze_height_cells], + "solution_path_length": len(path) - 1, + }, + ) + + +def generate_from_spec(spec: MazeGenSpec) -> MazeLayout: + if spec.backbone == Backbone.WINDING_CORRIDOR: + return generate_winding_corridor(spec) + if spec.backbone == Backbone.MULTI_ROUTE: + return generate_multi_route(spec) + if spec.backbone == Backbone.SIDE_VAULT: + return generate_side_vault(spec) + if spec.backbone == Backbone.SEQUENTIAL_CHAIN: + return generate_sequential_chain(spec) + if spec.backbone == Backbone.DENSE_MAZE: + return generate_dense_maze(spec) + raise ValueError(f"Unsupported backbone: {spec.backbone}") + diff --git a/src/v2/automatic_maze_generation/mazegen/models.py b/src/v2/automatic_maze_generation/mazegen/models.py new file mode 100644 index 0000000..f708ae2 --- /dev/null +++ b/src/v2/automatic_maze_generation/mazegen/models.py @@ -0,0 +1,202 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import Enum +from typing import Dict, List, Optional, Set, Tuple + +Coord = Tuple[int, int] + + +class Backbone(str, Enum): + WINDING_CORRIDOR = "winding_corridor" + MULTI_ROUTE = "multi_route" + SIDE_VAULT = "side_vault" + SEQUENTIAL_CHAIN = "sequential_chain" + DENSE_MAZE = "dense_maze" + + +class LogicChain(str, Enum): + NONE = "none" + KD = "kd" + SG = "sg" + KS = "ks" + SK = "sk" + KK = "kk" + + +class DistractorMode(str, Enum): + NONE = "none" + WRONG_KEYS = "wrong_keys" + WRONG_SWITCHES = "wrong_switches" + DEAD_END_ROOMS = "dead_end_rooms" + DISTRACTOR_CHAIN = "distractor_chain" + + +@dataclass +class WindingCorridorParams: + corridor_length: int = 20 + turn_count: int = 4 + segment_min_length: int = 2 + segment_max_length: int = 5 + corridor_width: int = 1 + allow_side_stubs: bool = False + side_stub_count: int = 0 + start_goal_at_ends: bool = True + self_proximity_budget: int = 0 + + +@dataclass +class MultiRouteParams: + num_routes: int = 3 + min_route_length: int = 8 + max_route_length: int = 18 + allow_route_rejoin: bool = True + route_overlap_budget: int = 1 + route_asymmetry: float = 0.5 + dead_end_branch_count: int = 0 + main_corridor_width: int = 1 + + +@dataclass +class SideVaultParams: + foyer_size: str = "medium" + vault_branch_depth: int = 4 + vault_branch_turns: int = 1 + main_route_length_before_blocker: int = 8 + blocker_distance_from_goal: int = 2 + vault_position_mode: str = "random" + mainline_shape: str = "linear" + allow_small_dead_ends: bool = False + + +@dataclass +class SequentialChainParams: + zone1_size: str = "medium" + zone2_size: str = "medium" + choke1_orientation: str = "random" + choke2_orientation: str = "random" + pickup1_branch_depth: int = 1 + pickup2_branch_depth: int = 2 + zone2_internal_branches: int = 0 + main_progress_shape: str = "linear" + allow_local_dead_ends: bool = False + + + +@dataclass +class DenseMazeParams: + maze_width_cells: int = 7 + maze_height_cells: int = 7 + add_loops: bool = False + loop_count: int = 0 + pickup1_progress_min: float = 0.20 + pickup1_progress_max: float = 0.40 + blocker1_progress_min: float = 0.45 + blocker1_progress_max: float = 0.65 + pickup2_progress_min: float = 0.60 + pickup2_progress_max: float = 0.80 + blocker2_progress_min: float = 0.80 + blocker2_progress_max: float = 0.92 + + +@dataclass +class ValidationParams: + require_solvable: bool = True + require_no_bypass: bool = True + require_chain_order: bool = True + require_prerequisite_before_blocker: bool = True + require_single_main_path: bool = False + require_unique_shortest_path: bool = False + min_distinct_solution_routes: int = 1 + + +@dataclass +class MazeGenSpec: + backbone: Backbone + logic_chain: LogicChain + difficulty_tier: int + grid_width: int + grid_height: int + seed: int + distractor_mode: DistractorMode = DistractorMode.NONE + max_distractors: int = 0 + backbone_params: object = None + validation_params: ValidationParams = field(default_factory=ValidationParams) + + def rng(self): + import random + return random.Random(self.seed) + + +@dataclass +class Key: + id: str + position: Coord + color: str + + +@dataclass +class Door: + id: str + position: Coord + requires_key: str + initial_state: str = "locked" + + +@dataclass +class Switch: + id: str + position: Coord + controls: List[str] + switch_type: str = "toggle" + initial_state: str = "off" + + +@dataclass +class Gate: + id: str + position: Coord + initial_state: str = "closed" + + +@dataclass +class MazeLayout: + width: int + height: int + walls: Set[Coord] + start: Coord + goal: Coord + slots: Dict[str, List[Coord]] = field(default_factory=dict) + route_cells: List[Set[Coord]] = field(default_factory=list) + metadata: Dict[str, object] = field(default_factory=dict) + + +@dataclass +class MazeInstance: + width: int + height: int + walls: Set[Coord] + start: Coord + goal: Coord + keys: List[Key] = field(default_factory=list) + doors: List[Door] = field(default_factory=list) + switches: List[Switch] = field(default_factory=list) + gates: List[Gate] = field(default_factory=list) + metadata: Dict[str, object] = field(default_factory=dict) + + def to_json_like(self) -> dict: + return { + "maze": { + "dimensions": [self.width, self.height], + "walls": sorted([list(w) for w in self.walls]), + "start": list(self.start), + "goal": list(self.goal), + }, + "mechanisms": { + "keys": [k.__dict__ | {"position": list(k.position)} for k in self.keys], + "doors": [d.__dict__ | {"position": list(d.position)} for d in self.doors], + "switches": [s.__dict__ | {"position": list(s.position)} for s in self.switches], + "gates": [g.__dict__ | {"position": list(g.position)} for g in self.gates], + }, + "metadata": self.metadata, + } \ No newline at end of file diff --git a/src/v2/automatic_maze_generation/mazegen/solver.py b/src/v2/automatic_maze_generation/mazegen/solver.py new file mode 100644 index 0000000..c28a413 --- /dev/null +++ b/src/v2/automatic_maze_generation/mazegen/solver.py @@ -0,0 +1,261 @@ +from __future__ import annotations + +from collections import deque +from heapq import heappop, heappush +from typing import Dict, List, Optional, Tuple + +from .models import Coord, MazeInstance, MazeLayout +from .generators import in_bounds, neighbors4 + + +def solve_navigation_only(layout: MazeLayout) -> dict: + start, goal = layout.start, layout.goal + blocked = layout.walls + pq: List[Tuple[int, Coord]] = [(0, start)] + parent: Dict[Coord, Optional[Coord]] = {start: None} + dist: Dict[Coord, int] = {start: 0} + + while pq: + d, node = heappop(pq) + if node == goal: + break + if d != dist[node]: + continue + for nb in neighbors4(node): + if not in_bounds(nb, layout.width, layout.height) or nb in blocked: + continue + nd = d + 1 + if nb not in dist or nd < dist[nb]: + dist[nb] = nd + parent[nb] = node + heappush(pq, (nd, nb)) + + if goal not in dist: + return {"is_solvable": False, "optimal_cost": None, "path": []} + + path: List[Coord] = [] + cur: Optional[Coord] = goal + while cur is not None: + path.append(cur) + cur = parent[cur] + path.reverse() + return {"is_solvable": True, "optimal_cost": len(path) - 1, "path": path} + + +def count_shortest_paths(layout: MazeLayout, max_count: int = 3) -> int: + start, goal = layout.start, layout.goal + blocked = layout.walls + dist: Dict[Coord, int] = {start: 0} + count: Dict[Coord, int] = {start: 1} + pq: List[Tuple[int, Coord]] = [(0, start)] + + while pq: + d, node = heappop(pq) + if d != dist[node]: + continue + for nb in neighbors4(node): + if not in_bounds(nb, layout.width, layout.height) or nb in blocked: + continue + nd = d + 1 + if nb not in dist: + dist[nb] = nd + count[nb] = count[node] + heappush(pq, (nd, nb)) + elif nd == dist[nb]: + count[nb] = min(max_count, count[nb] + count[node]) + + return count.get(goal, 0) + +def _maze_lookup_tables(maze: MazeInstance) -> dict: + return { + "key_at": {k.position: k for k in maze.keys}, + "door_at": {d.position: d for d in maze.doors}, + "switch_at": {s.position: s for s in maze.switches}, + "gate_at": {g.position: g for g in maze.gates}, + "gate_to_switches": { + g.id: [s.id for s in maze.switches if g.id in s.controls] + for g in maze.gates + }, + } + + + +def _normalize_state( + pos: Coord, + inventory: frozenset[str], + opened_doors: frozenset[str], + switch_states: frozenset[str], +) -> Tuple[Coord, frozenset[str], frozenset[str], frozenset[str]]: + return (pos, inventory, opened_doors, switch_states) + + + +def _apply_cell_effects( + maze: MazeInstance, + pos: Coord, + inventory: frozenset[str], + opened_doors: frozenset[str], + switch_states: frozenset[str], + lookups: dict, +) -> Tuple[frozenset[str], frozenset[str], frozenset[str], List[str]]: + inventory_set = set(inventory) + opened_set = set(opened_doors) + switch_set = set(switch_states) + interactions: List[str] = [] + + key = lookups["key_at"].get(pos) + if key is not None and key.color not in inventory_set: + inventory_set.add(key.color) + interactions.append(f"pickup:{key.id}") + + sw = lookups["switch_at"].get(pos) + if sw is not None and sw.id not in switch_set: + # V1 behavior: activate once and keep on. + switch_set.add(sw.id) + interactions.append(f"toggle:{sw.id}") + + return frozenset(inventory_set), frozenset(opened_set), frozenset(switch_set), interactions + + + +def _can_enter_cell( + maze: MazeInstance, + pos: Coord, + inventory: frozenset[str], + opened_doors: frozenset[str], + switch_states: frozenset[str], + lookups: dict, +) -> Tuple[bool, frozenset[str], frozenset[str], List[str]]: + inventory_set = set(inventory) + opened_set = set(opened_doors) + interactions: List[str] = [] + + door = lookups["door_at"].get(pos) + if door is not None and door.id not in opened_set: + if door.requires_key not in inventory_set: + return False, inventory, opened_doors, [] + inventory_set.remove(door.requires_key) + opened_set.add(door.id) + interactions.append(f"open:{door.id}") + + gate = lookups["gate_at"].get(pos) + if gate is not None: + controllers = lookups["gate_to_switches"].get(gate.id, []) + is_open = any(sw_id in switch_states for sw_id in controllers) + if not is_open: + return False, inventory, opened_doors, [] + interactions.append(f"cross:{gate.id}") + + return True, frozenset(inventory_set), frozenset(opened_set), interactions + + + +def solve_maze(maze: MazeInstance) -> dict: + """ + Solve a maze using shortest-path search over full agent state. + + This solver supports movement plus the current mechanism semantics: + - keys are picked up on entry to their cell + - doors require a matching key color and consume that key on first use + - switches activate on first visit and remain on + - gates are traversable when any controlling switch is on + """ + lookups = _maze_lookup_tables(maze) + + start_inventory, start_opened, start_switches, start_interactions = _apply_cell_effects( + maze, + maze.start, + frozenset(), + frozenset(), + frozenset(), + lookups, + ) + start_state = _normalize_state(maze.start, start_inventory, start_opened, start_switches) + + queue = deque([start_state]) + parent: Dict[Tuple[Coord, frozenset[str], frozenset[str], frozenset[str]], Optional[Tuple[Coord, frozenset[str], frozenset[str], frozenset[str]]]] = { + start_state: None + } + action_taken: Dict[Tuple[Coord, frozenset[str], frozenset[str], frozenset[str]], Tuple[str, List[str]]] = { + start_state: ("START", start_interactions) + } + dist: Dict[Tuple[Coord, frozenset[str], frozenset[str], frozenset[str]], int] = {start_state: 0} + + goal_state: Optional[Tuple[Coord, frozenset[str], frozenset[str], frozenset[str]]] = None + + while queue: + state = queue.popleft() + pos, inventory, opened_doors, switch_states = state + if pos == maze.goal: + goal_state = state + break + + for nb in neighbors4(pos): + if not in_bounds(nb, maze.width, maze.height) or nb in maze.walls: + continue + + allowed, inventory_after_entry, opened_after_entry, entry_interactions = _can_enter_cell( + maze, nb, inventory, opened_doors, switch_states, lookups + ) + if not allowed: + continue + + final_inventory, final_opened, final_switches, cell_interactions = _apply_cell_effects( + maze, + nb, + inventory_after_entry, + opened_after_entry, + switch_states, + lookups, + ) + next_state = _normalize_state(nb, final_inventory, final_opened, final_switches) + if next_state in dist: + continue + + dist[next_state] = dist[state] + 1 + parent[next_state] = state + action_taken[next_state] = ( + f"MOVE_TO:{nb[0]},{nb[1]}", + entry_interactions + cell_interactions, + ) + queue.append(next_state) + + if goal_state is None: + return { + "is_solvable": False, + "optimal_cost": None, + "path": [], + "action_sequence": [], + "interactions": [], + "final_inventory": [], + "final_opened_doors": [], + "active_switches": [], + } + + states_path: List[Tuple[Coord, frozenset[str], frozenset[str], frozenset[str]]] = [] + cur = goal_state + while cur is not None: + states_path.append(cur) + cur = parent[cur] + states_path.reverse() + + path = [s[0] for s in states_path] + action_sequence: List[str] = [] + interactions: List[str] = [] + for st in states_path[1:]: + move_action, side_effects = action_taken[st] + action_sequence.append(move_action) + interactions.extend(side_effects) + + _, final_inventory, final_opened, final_switches = goal_state + return { + "is_solvable": True, + "optimal_cost": len(path) - 1, + "path": path, + "action_sequence": action_sequence, + "interactions": interactions, + "final_inventory": sorted(final_inventory), + "final_opened_doors": sorted(final_opened), + "active_switches": sorted(final_switches), + } + diff --git a/src/v2/automatic_maze_generation/render_dataset.py b/src/v2/automatic_maze_generation/render_dataset.py new file mode 100644 index 0000000..83c820f --- /dev/null +++ b/src/v2/automatic_maze_generation/render_dataset.py @@ -0,0 +1,381 @@ +# render_dataset.py +from __future__ import annotations + +import json +from copy import deepcopy +from io import BytesIO +from pathlib import Path +from typing import Any, Optional, Tuple + +import matplotlib + +matplotlib.use("Agg") +import matplotlib.pyplot as plt +from matplotlib.patches import Rectangle, Circle + + +CELL = 40 # pixels-ish via figure scale + + +def _extract_payload_fields(payload: dict): + maze = payload["maze"] + mechs = payload.get("mechanisms", {}) + + width, height = maze["dimensions"] + walls = {tuple(w) for w in maze["walls"]} + start = tuple(maze["start"]) + goal = tuple(maze["goal"]) + + keys = mechs.get("keys", []) + doors = mechs.get("doors", []) + switches = mechs.get("switches", []) + gates = mechs.get("gates", []) + + return width, height, walls, start, goal, keys, doors, switches, gates + + +def _row_col_payload_to_xy_payload(payload: dict) -> dict: + """Convert a row/col payload to renderer-space (x/y) without mutating input.""" + out = deepcopy(payload) + maze = out.get("maze", {}) + mechs = out.get("mechanisms", {}) + + def rc_to_xy(pos): + r, c = pos + return [c, r] + + dims = maze.get("dimensions") + if dims and len(dims) == 2: + rows, cols = dims + maze["dimensions"] = [cols, rows] + + maze["walls"] = [rc_to_xy(w) for w in maze.get("walls", [])] + if "start" in maze: + maze["start"] = rc_to_xy(maze["start"]) + if "goal" in maze: + maze["goal"] = rc_to_xy(maze["goal"]) + + for k in mechs.get("keys", []): + if "position" in k: + k["position"] = rc_to_xy(k["position"]) + for d in mechs.get("doors", []): + if "position" in d: + d["position"] = rc_to_xy(d["position"]) + for s in mechs.get("switches", []): + if "position" in s: + s["position"] = rc_to_xy(s["position"]) + for g in mechs.get("gates", []): + if "position" in g: + g["position"] = rc_to_xy(g["position"]) + + validation = out.get("validation", {}) + if "optimal_path" in validation: + validation["optimal_path"] = [rc_to_xy(p) for p in validation.get("optimal_path", [])] + return out + + + +def _color_to_facecolor(name: str) -> str: + mapping = { + "red": "#e74c3c", + "blue": "#3498db", + "green": "#2ecc71", + "yellow": "#f1c40f", + "purple": "#9b59b6", + "orange": "#e67e22", + } + return mapping.get(name.lower(), "#95a5a6") + + +def _draw_centered_text(ax, x: int, y: int, height: int, text: str, fontsize: int = 10, color: str = "black"): + ax.text( + x + 0.5, + height - 1 - y + 0.5, + text, + ha="center", + va="center", + fontsize=fontsize, + color=color, + fontweight="bold", + ) + + +def _draw_key(ax, x: int, y: int, height: int, color_name: str): + face = _color_to_facecolor(color_name) + cy = height - 1 - y + 0.5 + + # colored circle badge + ax.add_patch(Circle((x + 0.5, cy), 0.28, facecolor=face, edgecolor="black", linewidth=1.0)) + # key icon / fallback letter + ax.text( + x + 0.5, + cy, + "⚷", # if this glyph looks odd in your env, replace with "K" + ha="center", + va="center", + fontsize=11, + color="white", + fontweight="bold", + ) + + +def _draw_door(ax, x: int, y: int, height: int, color_name: str): + face = _color_to_facecolor(color_name) + by = height - 1 - y + + # colored inner door rectangle + ax.add_patch( + Rectangle( + (x + 0.18, by + 0.12), + 0.64, + 0.76, + facecolor=face, + edgecolor="black", + linewidth=1.0, + ) + ) + # small doorknob + ax.add_patch(Circle((x + 0.68, by + 0.5), 0.04, facecolor="white", edgecolor="white")) + + +def _draw_switch(ax, x: int, y: int, height: int, label: str): + by = height - 1 - y + + ax.add_patch( + Rectangle( + (x + 0.15, by + 0.2), + 0.7, + 0.6, + facecolor="#dfe6e9", + edgecolor="black", + linewidth=1.0, + ) + ) + ax.text( + x + 0.5, + by + 0.5, + label, + ha="center", + va="center", + fontsize=9, + color="black", + fontweight="bold", + ) + + +def _draw_gate(ax, x: int, y: int, height: int, label: str): + by = height - 1 - y + + # gate bars + for dx in [0.22, 0.38, 0.54, 0.70]: + ax.plot([x + dx, x + dx], [by + 0.15, by + 0.85], color="black", linewidth=1.4) + ax.plot([x + 0.18, x + 0.74], [by + 0.18, by + 0.18], color="black", linewidth=1.4) + ax.plot([x + 0.18, x + 0.74], [by + 0.82, by + 0.82], color="black", linewidth=1.4) + + ax.text( + x + 0.5, + by + 0.5, + label, + ha="center", + va="center", + fontsize=8, + color="black", + fontweight="bold", + bbox=dict(boxstyle="round,pad=0.08", facecolor="white", edgecolor="none", alpha=0.8), + ) + + +_AGENT_FACING_DELTA = { + "NORTH": (-1, 0), + "EAST": (0, 1), + "SOUTH": (1, 0), + "WEST": (0, -1), +} + + +def _draw_agent(ax, ar: int, ac: int, height: int, facing: str) -> None: + """Overlay current agent (row, col) and facing; same cell coords as ``_draw_centered_text``.""" + # GridState uses (row, col). Rendering uses x=col, y=row (inverted vertical axis). + cx = ac + 0.5 + cy = height - 1 - ar + 0.5 + ax.plot( + cx, + cy, + "o", + color="black", + markersize=10, + zorder=6, + markeredgecolor="black", + ) + dr, dc = _AGENT_FACING_DELTA.get(facing, (0, 0)) + if dr == 0 and dc == 0: + return + nr, nc = ar + dr, ac + dc + tip_x = nc + 0.5 + tip_y = height - 1 - nr + 0.5 + ax.annotate( + "", + xy=(tip_x, tip_y), + xytext=(cx, cy), + arrowprops=dict(arrowstyle="->", color="black", lw=1.5), + zorder=7, + ) + + +def _extract_optimal_path(payload: dict): + validation = payload.get("validation", {}) + return [tuple(p) for p in validation.get("optimal_path", [])] + + + + + + +def _draw_optimal_path(ax, path, height: int): + if not path: + return + + xs = [x + 0.5 for x, y in path] + ys = [height - 1 - y + 0.5 for x, y in path] + + ax.plot( + xs, + ys, + linewidth=3.0, + alpha=0.45, + zorder=2, + ) + + # mark start of path a little more clearly + ax.scatter( + [xs[0]], + [ys[0]], + s=35, + alpha=0.7, + zorder=3, + ) + + + +def _figure_from_maze_payload(payload: dict, title: str) -> Tuple[Any, Any, int]: + """Build figure/axes for a maze JSON payload; caller savesfig and closes.""" + payload = _row_col_payload_to_xy_payload(payload) + width, height, walls, start, goal, keys, doors, switches, gates = _extract_payload_fields(payload) + optimal_path = _extract_optimal_path(payload) + + fig_w = max(6, width * 0.55) + fig_h = max(4, height * 0.55) + fig, ax = plt.subplots(figsize=(fig_w, fig_h)) + + # base grid + for x in range(width): + for y in range(height): + is_wall = (x, y) in walls + facecolor = "black" if is_wall else "white" + ax.add_patch( + Rectangle( + (x, height - 1 - y), + 1, + 1, + facecolor=facecolor, + edgecolor="lightgray", + linewidth=0.8, + zorder=0, + ) + ) + + # path overlay first, so icons remain visible above it + _draw_optimal_path(ax, optimal_path, height) + + # start / goal + sx, sy = start + gx, gy = goal + ax.add_patch(Rectangle((sx, height - 1 - sy), 1, 1, facecolor="#c8f7c5", edgecolor="black", linewidth=1.2, zorder=4)) + ax.add_patch(Rectangle((gx, height - 1 - gy), 1, 1, facecolor="#f7d6c5", edgecolor="black", linewidth=1.2, zorder=4)) + _draw_centered_text(ax, sx, sy, height, "S", fontsize=11) + _draw_centered_text(ax, gx, gy, height, "G", fontsize=11) + + # keys + for key in keys: + x, y = key["position"] + color_name = key.get("color", "gray") + _draw_key(ax, x, y, height, color_name) + + # doors + for door in doors: + x, y = door["position"] + color_name = door.get("requires_key", "gray") + _draw_door(ax, x, y, height, color_name) + + # switches + for sw in switches: + x, y = sw["position"] + _draw_switch(ax, x, y, height, "S") + + # gates + for gate in gates: + x, y = gate["position"] + _draw_gate(ax, x, y, height, "G") + + ax.set_title(title) + ax.set_xlim(0, width) + ax.set_ylim(0, height) + ax.set_aspect("equal") + ax.axis("off") + + return fig, ax, height + + +def render_maze_payload(payload: dict, output_path: Path) -> None: + title = payload.get("task_id", output_path.stem) + fig, _ax, _height = _figure_from_maze_payload(payload, title) + plt.tight_layout() + fig.savefig(output_path, dpi=150, bbox_inches="tight") + plt.close(fig) + + +def render_maze_payload_bytes( + payload: dict, + *, + dpi: int = 150, + agent_pos: Optional[Tuple[int, int]] = None, + facing: str = "NORTH", +) -> bytes: + """Same layout as ``render_maze_payload``, PNG bytes (e.g. NLU live observations).""" + title = str(payload.get("task_id", "maze")) + fig, ax, height = _figure_from_maze_payload(payload, title) + if agent_pos is not None: + _draw_agent(ax, agent_pos[0], agent_pos[1], height, facing) + plt.tight_layout() + buf = BytesIO() + fig.savefig(buf, format="png", dpi=dpi, bbox_inches="tight") + plt.close(fig) + return buf.getvalue() + + + + +def main() -> None: + input_dir = Path("generated_mazes") + # input_dir = Path("../nlu_pipeline/nlu_benchmark/sample mazes") + output_dir = input_dir / "pngs" + output_dir.mkdir(parents=True, exist_ok=True) + + json_files = sorted(p for p in input_dir.glob("*.json") if p.name != "manifest.json") + if not json_files: + print("No maze JSON files found in generated_mazes/") + return + + for jf in json_files: + with open(jf, "r", encoding="utf-8") as f: + payload = json.load(f) + + out_path = output_dir / f"{jf.stem}.png" + render_maze_payload(payload, out_path) + print(f"[OK] rendered {out_path.name}") + + print(f"\nRendered {len(json_files)} PNGs to: {output_dir.resolve()}") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/v2/nlu_pipeline/__init__.py b/src/v2/nlu_pipeline/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/v2/nlu_pipeline/nlu_benchmark/__init__.py b/src/v2/nlu_pipeline/nlu_benchmark/__init__.py new file mode 100644 index 0000000..0b845b9 --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/__init__.py @@ -0,0 +1 @@ +"""NLU maze benchmark package.""" diff --git a/src/v2/nlu_pipeline/nlu_benchmark/agents.py b/src/v2/nlu_pipeline/nlu_benchmark/agents.py new file mode 100644 index 0000000..6567ff3 --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/agents.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +import os +import random +from dataclasses import dataclass, field +from typing import Dict, List, Optional + +from huggingface_hub import InferenceClient, get_token +from transformers import AutoModelForCausalLM, AutoTokenizer + +from nlu_benchmark.parser import ACTION_ORDER + +# More stable defaults for local model downloads on Windows. +os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "0") +os.environ.setdefault("HF_HUB_DISABLE_XET", "1") + +# Keep empty in source. Prefer env var `HF_TOKEN` or `huggingface-cli login`. +_LOCAL_HF_TOKEN = "" +if _LOCAL_HF_TOKEN.strip() and not os.environ.get("HF_TOKEN"): + os.environ["HF_TOKEN"] = _LOCAL_HF_TOKEN.strip() + + +class RandomAgent: + def __call__(self, messages: list[dict]) -> str: + return f"FINAL_OUTPUT: {random.choice(ACTION_ORDER)}" + + +DEFAULT_ROUTER_MODEL = "meta-llama/Llama-3.1-8B-Instruct:cerebras" +DEFAULT_LOCAL_MODEL = "HuggingFaceTB/SmolLM2-360M-Instruct" + + +@dataclass +class HFLLMConfig: + model: str = DEFAULT_ROUTER_MODEL + temperature: float = 0.0 + max_tokens: int = 64 + timeout: Optional[float] = 30.0 + + +@dataclass +class HuggingFaceLLMAgent: + """Remote HF Router-backed chat-completions agent.""" + + config: HFLLMConfig = field(default_factory=HFLLMConfig) + client: Optional[InferenceClient] = None + + def __post_init__(self) -> None: + if self.client is None: + token = os.environ.get("HF_TOKEN") or get_token() + if not token: + raise ValueError( + "No Hugging Face token found. Set HF_TOKEN or run `huggingface-cli login`." + ) + + self.client = InferenceClient( + api_key=token, + timeout=self.config.timeout, + ) + + def __call__(self, messages: List[Dict[str, str]]) -> str: + response = self.client.chat.completions.create( + model=self.config.model, + messages=messages, + temperature=self.config.temperature, + max_tokens=self.config.max_tokens, + ) + return response.choices[0].message.content.strip() + + +@dataclass +class LocalLLMConfig: + # Open-source/open-weight local models (examples): + # - Qwen/Qwen2.5-0.5B-Instruct + # - google/gemma-2-2b-it + model: str = DEFAULT_LOCAL_MODEL + temperature: float = 0.0 + max_new_tokens: int = 64 + device_map: str = "auto" + + +@dataclass +class LocalTransformersAgent: + """Local agent using Hugging Face Transformers (no inference credits).""" + + config: LocalLLMConfig = field(default_factory=LocalLLMConfig) + tokenizer: Optional[AutoTokenizer] = None + model: Optional[AutoModelForCausalLM] = None + + def __post_init__(self) -> None: + if self.tokenizer is None: + self.tokenizer = AutoTokenizer.from_pretrained(self.config.model) + if self.model is None: + self.model = AutoModelForCausalLM.from_pretrained( + self.config.model, + device_map=self.config.device_map, + ) + + def __call__(self, messages: List[Dict[str, str]]) -> str: + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + inputs = self.tokenizer(prompt, return_tensors="pt") + inputs = {k: v.to(self.model.device) for k, v in inputs.items()} + + generated = self.model.generate( + **inputs, + max_new_tokens=self.config.max_new_tokens, + temperature=self.config.temperature, + do_sample=self.config.temperature > 0, + ) + + prompt_len = inputs["input_ids"].shape[1] + new_tokens = generated[0][prompt_len:] + return self.tokenizer.decode(new_tokens, skip_special_tokens=True).strip() + + +if __name__ == "__main__": + agent = LocalTransformersAgent(config=LocalLLMConfig()) + out = agent( + [ + {"role": "system", "content": "Reply with one short sentence."}, + {"role": "user", "content": "What is 2+2?"}, + ] + ) + print(out) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/config.py b/src/v2/nlu_pipeline/nlu_benchmark/config.py new file mode 100644 index 0000000..d0f2783 --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/config.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from dataclasses import asdict, dataclass +from typing import Literal + + +@dataclass +class ExperimentConfig: + """Selects one implementation along each experimental axis. + + prompting + minimal – goal + action list only (system prompt) + standard – adds ``MECHANISM_LIST`` to the system prompt + verbose – standard + ``MECHANISM_RULES`` + extra user fields (neighbours, hints). + Maze **layout** text is in the system / user split from ``observation``, not from prompting. + + observation + text_only – initial NL maze in system; current situation text per user turn; last3 history + image_text – same as text_only + live PNG each turn; last3 = full feedback + screenshot_only – live PNG only (no NL map); last3 = action-only lines + + context_window + current – only the current observation (no prior steps in the prompt) + last3 – last 3 steps as structured lines prepended to the prompt + + querying + step_by_step – one LLM call per env step (only the first action in FINAL_OUTPUT is used) + subgoal – SUB_GOAL + ACTIONS list; re-queries when queue empty, stuck, or mid-budget + full_trajectory – same format as subgoal, but exactly one LLM call per episode (no re-query) + """ + + prompting: Literal["minimal", "standard", "verbose"] = "minimal" + observation: Literal["text_only", "image_text", "screenshot_only"] = "text_only" + context_window: Literal["current", "last3"] = "current" + querying: Literal["step_by_step", "subgoal", "full_trajectory"] = "step_by_step" + + def to_dict(self) -> dict: + return asdict(self) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/env.py b/src/v2/nlu_pipeline/nlu_benchmark/env.py new file mode 100644 index 0000000..9e01473 --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/env.py @@ -0,0 +1,216 @@ +from dataclasses import dataclass, field +from typing import Any, Dict, List, Set, Tuple, Optional + +Pos = Tuple[int, int] + +FACING_ORDER = ["NORTH", "EAST", "SOUTH", "WEST"] + +FACING_TO_DELTA: Dict[str, Tuple[int, int]] = { + "NORTH": (-1, 0), + "EAST": ( 0, 1), + "SOUTH": ( 1, 0), + "WEST": ( 0, -1), +} + + +@dataclass +class GridState: + rows: int + cols: int + walls: Set[Pos] + start: Pos + goal: Pos + agent_pos: Pos + facing: str = "NORTH" + step_count: int = 0 + max_steps: int = 50 + inventory: List[str] = field(default_factory=list) # collected key colors + keys: List[Dict[str, Any]] = field(default_factory=list) + doors: List[Dict[str, Any]] = field(default_factory=list) + switches: List[Dict[str, Any]] = field(default_factory=list) + gates: List[Dict[str, Any]] = field(default_factory=list) + + +@dataclass +class StepEvent: + type: str # TURNED, MOVED, BLOCKED, DONE, PICKUP, TOGGLED, NOTHING, WRONG_DONE, INVALID + message: str + + +class GridWorldEnv: + + def __init__( + self, + rows: int, + cols: int, + walls: Set[Pos], + start: Pos, + goal: Pos, + max_steps: int = 50, + mechanisms: Optional[Dict[str, Any]] = None, + ): + mechs = mechanisms or {} + self.initial = GridState( + rows=rows, + cols=cols, + walls=walls, + start=start, + goal=goal, + agent_pos=start, + max_steps=max_steps, + keys=mechs.get("keys", []), + doors=mechs.get("doors", []), + switches=mechs.get("switches", []), + gates=mechs.get("gates", []), + ) + self.state: Optional[GridState] = None + + def reset(self) -> GridState: + s = self.initial + self.state = GridState( + rows=s.rows, + cols=s.cols, + walls=set(s.walls), + start=s.start, + goal=s.goal, + agent_pos=s.start, + facing="NORTH", + step_count=0, + max_steps=s.max_steps, + inventory=[], + keys=[dict(k) for k in s.keys], + doors=[dict(d) for d in s.doors], + switches=[{**dict(sw), "on": bool(sw.get("on", False))} for sw in s.switches], + gates=[GridWorldEnv._gate_state_from_switches(dict(g), s.switches) for g in s.gates], + ) + return self.state + + @staticmethod + def _gate_state_from_switches(gate: Dict, switches: List[Dict]) -> Dict: + """Gates are open if any linked switch is on, else use initial/embedded state.""" + g = dict(gate) + gid = g.get("id") + if gid: + if any( + bool(sw.get("on")) and gid in sw.get("controls", []) + for sw in switches + ): + g["state"] = "open" + else: + g["state"] = g.get("state", g.get("initial_state", "closed")) + return g + + def step(self, action: str) -> tuple[GridState, StepEvent]: + assert self.state is not None, "Call reset() first." + + verb = action.strip().upper() + + # --- Turns --- + if verb in ("TURN_LEFT", "TURN_RIGHT"): + idx = FACING_ORDER.index(self.state.facing) + self.state.facing = FACING_ORDER[(idx + (-1 if verb == "TURN_LEFT" else 1)) % 4] + self.state.step_count += 1 + return self.state, StepEvent("TURNED", f"Now facing {self.state.facing}.") + + # --- Move one step forward --- + if verb == "MOVE_FORWARD": + dr, dc = FACING_TO_DELTA[self.state.facing] + r, c = self.state.agent_pos + nr, nc = r + dr, c + dc + reason = self._blocked(nr, nc) + if reason: + return self.state, StepEvent("BLOCKED", f"MOVE_FORWARD blocked by {reason}.") + self.state.agent_pos = (nr, nc) + # With matching key in inventory, moving onto a door tile opens it (no TOGGLE on doors) + door = self._door_at((nr, nc)) + if door and door["requires_key"] in self.state.inventory: + self.state.doors = [ + d for d in self.state.doors if tuple(d["position"]) != (nr, nc) + ] + self.state.step_count += 1 + if self.state.agent_pos == self.state.goal: + return self.state, StepEvent("DONE", f"Reached goal at {self.state.goal}.") + return self.state, StepEvent("MOVED", f"Moved to {self.state.agent_pos}.") + + # --- Pick up object at current position --- + if verb == "PICKUP": + pos = self.state.agent_pos + key = self._key_at(pos) + if key: + self.state.inventory.append(key["color"]) + self.state.keys = [k for k in self.state.keys if tuple(k["position"]) != pos] + self.state.step_count += 1 + return self.state, StepEvent("PICKUP", f"Picked up {key['color']} key.") + self.state.step_count += 1 + return self.state, StepEvent("NOTHING", f"Nothing to pick up at {pos}.") + + # --- Toggle facing switch only (opens/closes linked gates; doors and gates are not toggled directly) --- + if verb == "TOGGLE": + dr, dc = FACING_TO_DELTA[self.state.facing] + r, c = self.state.agent_pos + target = (r + dr, c + dc) + sw = self._switch_at(target) + if sw: + self._toggle_switch(sw) + self.state.step_count += 1 + st = "on" if sw.get("on") else "off" + return self.state, StepEvent("TOGGLED", f"Switch at {target} is {st}.") + self.state.step_count += 1 + if self._door_at(target) or self._gate_at(target): + return self.state, StepEvent("NOTHING", "Use PICKUP to collect keys. Doors open when you have the right key. Only switches can be TOGGLED (gates follow switch on/off).") + return self.state, StepEvent("NOTHING", f"No switch to toggle at {target}.") + + # --- Agent signals task complete --- + if verb == "DONE": + if self.state.agent_pos == self.state.goal: + return self.state, StepEvent("DONE", f"Task complete at {self.state.goal}.") + self.state.step_count += 1 + return self.state, StepEvent("WRONG_DONE", f"DONE called but not at goal {self.state.goal}.") + + return self.state, StepEvent("INVALID", f"Unknown action: {action}") + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + + def _blocked(self, nr: int, nc: int) -> Optional[str]: + """Return a reason string if (nr, nc) is impassable, else None.""" + if nr < 1 or nr > self.state.rows or nc < 1 or nc > self.state.cols: + return "out of bounds" + if (nr, nc) in self.state.walls: + return "wall" + door = self._door_at((nr, nc)) + if door and door["requires_key"] not in self.state.inventory: + return f"locked {door['requires_key']} door" + gate = self._gate_at((nr, nc)) + if gate and gate.get("state", gate.get("initial_state", "closed")) == "closed": + return "closed gate" + return None + + def _key_at(self, pos: Pos): + return next((k for k in self.state.keys if tuple(k["position"]) == pos), None) + + def _door_at(self, pos: Pos): + return next((d for d in self.state.doors if tuple(d["position"]) == pos), None) + + def _switch_at(self, pos: Pos): + return next((s for s in self.state.switches if tuple(s["position"]) == pos), None) + + def _gate_at(self, pos: Pos): + return next((g for g in self.state.gates if tuple(g["position"]) == pos), None) + + def _recompute_gates_from_switches(self) -> None: + """A gate is open if any of its linked switches is on.""" + for gate in self.state.gates: + gid = gate.get("id") + if not gid: + continue + on = any( + bool(s.get("on")) and gid in s.get("controls", []) + for s in self.state.switches + ) + gate["state"] = "open" if on else "closed" + + def _toggle_switch(self, sw: Dict) -> None: + sw["on"] = not sw.get("on", False) + self._recompute_gates_from_switches() diff --git a/src/v2/nlu_pipeline/nlu_benchmark/examples/run_llm.py b/src/v2/nlu_pipeline/nlu_benchmark/examples/run_llm.py new file mode 100644 index 0000000..8d4ac4f --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/examples/run_llm.py @@ -0,0 +1,17 @@ +import os + +# Optional: paste a token here for quick runs, or set HF_TOKEN in your shell / `huggingface-cli login`. +_HF_TOKEN_FOR_THIS_SCRIPT = "" +if _HF_TOKEN_FOR_THIS_SCRIPT: + os.environ["HF_TOKEN"] = _HF_TOKEN_FOR_THIS_SCRIPT + +from nlu_benchmark.runner import EpisodeRunner +from nlu_benchmark.agents import HuggingFaceLLMAgent, HFLLMConfig + +runner = EpisodeRunner.from_json("nlu_benchmark/sample mazes/V02_winding_corridor.json") + +# Uses HFLLMConfig defaults (small Qwen on HF Router). Override model=... if needed. +agent = HuggingFaceLLMAgent(config=HFLLMConfig()) + +result = runner.run(agent) +print(result["success"]) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/examples/run_local_llm.py b/src/v2/nlu_pipeline/nlu_benchmark/examples/run_local_llm.py new file mode 100644 index 0000000..02f578c --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/examples/run_local_llm.py @@ -0,0 +1,15 @@ +from nlu_benchmark.runner import EpisodeRunner +from nlu_benchmark.agents import LocalTransformersAgent, LocalLLMConfig + +runner = EpisodeRunner.from_json("nlu_benchmark/sample mazes/V02_winding_corridor.json") + +# Small local model (no HF inference credits required). +agent = LocalTransformersAgent( + config=LocalLLMConfig( + model="HuggingFaceTB/SmolLM2-360M-Instruct", + max_new_tokens=16, + ) +) + +result = runner.run(agent) +print(result["success"]) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/examples/run_random.py b/src/v2/nlu_pipeline/nlu_benchmark/examples/run_random.py new file mode 100644 index 0000000..4e7a3ca --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/examples/run_random.py @@ -0,0 +1,9 @@ +from nlu_benchmark.runner import EpisodeRunner +from nlu_benchmark.agents import RandomAgent + +runner = EpisodeRunner.from_json("nlu_benchmark/sample mazes/V01_empty_room.json") + +agent = RandomAgent() +result = runner.run(agent) + +print("Success:", result["success"]) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/feedback.py b/src/v2/nlu_pipeline/nlu_benchmark/feedback.py new file mode 100644 index 0000000..7bf58db --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/feedback.py @@ -0,0 +1,39 @@ +"""Step feedback strings for the episode loop — independent of prompt strategy.""" + +from __future__ import annotations + +from typing import Any, Literal + +ObservationKind = Literal["text_only", "image_text", "screenshot_only"] + + +def action_feedback_for_prompt(observation: ObservationKind, text: str) -> str: + """Step outcomes for ``Last result:`` / history; cleared for screenshot-only. Parse failures are never masked (see runner).""" + if observation == "screenshot_only": + return "" + return text + + +def format_step_feedback( + action: str, event_type: str, event_message: str, prev_pos: Any +) -> str: + """Format env step for ``Last result:`` (branches match ``StepEvent.type`` in ``env``).""" + if event_type == "BLOCKED": + return f"BLOCKED — {action}: {event_message} You remain at {prev_pos}." + if event_type == "TURNED": + return f"TURNED — {action}: {event_message}" + if event_type == "MOVED": + return f"MOVED — {action}: {event_message}" + if event_type == "DONE": + return f"SUCCESS — {action}: {event_message}" + if event_type == "PICKUP": + return f"PICKUP — {action}: {event_message}" + if event_type == "NOTHING": + return f"NOTHING — {action}: {event_message} You remain at {prev_pos}." + if event_type == "TOGGLED": + return f"TOGGLED — {action}: {event_message}" + if event_type == "WRONG_DONE": + return f"WRONG DONE — {action}: {event_message} You remain at {prev_pos}." + if event_type == "INVALID": + return f"INVALID — {action}: {event_message} You remain at {prev_pos}." + return f"{event_type} — {action}: {event_message}" diff --git a/src/v2/nlu_pipeline/nlu_benchmark/loader.py b/src/v2/nlu_pipeline/nlu_benchmark/loader.py new file mode 100644 index 0000000..c852f58 --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/loader.py @@ -0,0 +1,23 @@ +import json +from pathlib import Path +from nlu_benchmark.env import GridWorldEnv + + +def load_maze(path) -> GridWorldEnv: + data = json.loads(Path(path).read_text(encoding="utf-8")) + maze = data["maze"] + rows, cols = maze["dimensions"] + walls = {tuple(w) for w in maze["walls"]} + start = tuple(maze["start"]) + goal = tuple(maze["goal"]) + max_steps = data.get("max_steps", 100) + mechanisms = data.get("mechanisms", {}) + return GridWorldEnv( + rows=rows, + cols=cols, + walls=walls, + start=start, + goal=goal, + max_steps=max_steps, + mechanisms=mechanisms, + ) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/observation.py b/src/v2/nlu_pipeline/nlu_benchmark/observation.py new file mode 100644 index 0000000..a4e3062 --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/observation.py @@ -0,0 +1,92 @@ +"""Observation builder for the NLU benchmark. + +* **text_only** / **image_text** – The runner appends initial NL layout to the + system message once per episode. Each user turn: ``render_user_observation_text``, + last3 history, and live PNG when image is enabled. + +* **screenshot_only** – No initial NL block; user gets live PNG and action-only + history; step feedback as configured elsewhere. Fallback PNG on disk if live + render fails. +""" + +from __future__ import annotations + +import base64 +from pathlib import Path +from typing import List, Literal, Optional + +from nlu_benchmark.renderer import render_maze_image_png_bytes, render_user_observation_text + + +class _StepRecord: + __slots__ = ("position", "facing", "action", "feedback") + + def __init__(self, position, facing, action, feedback): + self.position = position + self.facing = facing + self.action = action + self.feedback = feedback + + +class ObservationBuilder: + """Builds what the model sees each step from config.observation + context_window.""" + + def __init__( + self, + observation: Literal["text_only", "image_text", "screenshot_only"], + context_window: Literal["current", "last3"], + ) -> None: + self._observation = observation + self._context_window = context_window + self._history: List[_StepRecord] = [] + + def reset(self) -> None: + self._history.clear() + + def record(self, position, facing: str, action: str, feedback: str) -> None: + self._history.append(_StepRecord(position, facing, action, feedback)) + + def history_text(self) -> str: + if self._context_window == "current" or not self._history: + return "" + recs = self._history[-3:] + if self._observation == "screenshot_only": + lines = ["Recent steps (oldest first, action only):"] + for rec in recs: + lines.append(f" {rec.action}") + return "\n".join(lines) + lines = ["Recent history (last 3 steps, oldest first):"] + for rec in recs: + lines.append( + f" {rec.position} facing {rec.facing} -> {rec.action} -> {rec.feedback}" + ) + return "\n".join(lines) + + def build_text(self, state) -> str: + if self._observation == "screenshot_only": + return "" + return render_user_observation_text(state) + + def build_image_blocks(self, state, maze_json_path: Optional[str]) -> List[dict]: + if self._observation == "text_only": + return [] + try: + raw = render_maze_image_png_bytes(state) + except Exception: + raw = b"" + if raw: + b64 = base64.b64encode(raw).decode("utf-8") + return [{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}}] + b = _load_maze_png_block(maze_json_path) + return [b] if b else [] + + +def _load_maze_png_block(maze_json_path: Optional[str]) -> Optional[dict]: + if not maze_json_path: + return None + p = Path(maze_json_path) + img_path = p.parent / "pngs" / (p.stem + ".png") + if not img_path.exists(): + return None + b64 = base64.b64encode(img_path.read_bytes()).decode("utf-8") + return {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/parser.py b/src/v2/nlu_pipeline/nlu_benchmark/parser.py new file mode 100644 index 0000000..d321cd9 --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/parser.py @@ -0,0 +1,88 @@ +import re +from typing import List, Optional + +# Canonical order for prompts / error messages (single source of truth). +ACTION_ORDER = ( + "TURN_LEFT", + "TURN_RIGHT", + "MOVE_FORWARD", + "PICKUP", + "TOGGLE", + "DONE", +) +VALID_ACTIONS = set(ACTION_ORDER) +ACTIONS_HINT = ", ".join(ACTION_ORDER) + +_SYNONYMS = { + "turn left": "TURN_LEFT", + "rotate left": "TURN_LEFT", + "turn right": "TURN_RIGHT", + "rotate right": "TURN_RIGHT", + "move forward": "MOVE_FORWARD", + "go forward": "MOVE_FORWARD", + "forward": "MOVE_FORWARD", + "pick up": "PICKUP", + "pickup": "PICKUP", + "toggle": "TOGGLE", + "done": "DONE", + "finished": "DONE", +} + +_FINAL_OUTPUT_RE = re.compile(r"(?i)^FINAL_OUTPUT\s*:\s*(.*)\s*$") + + +def parse_final_output( + text: str, allow_regex_fallback: bool = True +) -> Optional[List[str]]: + """Parse model output into one or more validated action tokens, or None. + + Checks the last 5 non-empty lines for: + FINAL_OUTPUT: or FINAL_OUTPUT: a, b, c + (comma-separated, each token must be a valid action). + + If that fails, optionally falls back to a single action from the last + matching synonym in the full text. + """ + lines = [ln.strip() for ln in text.splitlines() if ln.strip()] + trailing = lines[-5:] if len(lines) >= 5 else lines + + for line in reversed(trailing): + m = _FINAL_OUTPUT_RE.match(line) + if m: + rest = m.group(1).strip() + if not rest: + return None + out: List[str] = [] + for part in rest.split(","): + p = part.strip() + if not p: + continue + a = normalize_action(p) + if not a: + return None + out.append(a) + return out if out else None + if re.match(r"(?i)^FINAL_OUTPUT\s*:", line): + return None + + if allow_regex_fallback: + norm = text.lower() + matches = [] + for phrase, canonical in _SYNONYMS.items(): + pattern = re.escape(phrase).replace(r"\ ", r"\s+") + for m in re.finditer(pattern, norm): + matches.append((m.start(), canonical)) + if matches: + matches.sort(key=lambda x: x[0]) + return [matches[-1][1]] + + return None + + +def normalize_action(raw: str) -> str: + """Normalize a raw token from a comma-separated list into a canonical action string. + + Returns "" for unrecognized tokens. + """ + verb = raw.strip().upper().replace(" ", "_") + return verb if verb in VALID_ACTIONS else "" diff --git a/src/v2/nlu_pipeline/nlu_benchmark/prompt_strategies.py b/src/v2/nlu_pipeline/nlu_benchmark/prompt_strategies.py new file mode 100644 index 0000000..73aa321 --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/prompt_strategies.py @@ -0,0 +1,196 @@ +"""Prompt strategies for the NLU benchmark. + + minimal – goal + action list. Per-turn user text is ``render_user_observation_text``; + initial layout is in the system message when observation is text or image+text. + standard – system prompt adds the static ``MECHANISM_LIST``; user layout same as + minimal for text observation content. + verbose – system: mechanism list + domain rules; user: neighbour view, + inventory, per-step mechanism hints. + +Initial maze NL is ``render_initial_maze_text`` in the system prompt; each user +turn includes ``render_user_observation_text`` (when text or image+text), not +here. +""" + +from __future__ import annotations + +from nlu_benchmark.env import FACING_ORDER, FACING_TO_DELTA + +# Standard system prompt: high-level object types. Verbose reuses this and adds +# MECHANISM_RULES + per-step hints in the user turn. +MECHANISM_LIST = ( + "The environment may contain:\n" + "- Keys: pick them up to open doors of the matching color\n" + "- Doors: blocked passages that require a matching key\n" + "- Switches: toggle these to open or close linked gates\n" + "- Gates: blocked passages controlled by switches\n" +) + +# Verbose system prompt: operational rules (action semantics). Not in Standard. +MECHANISM_RULES = ( + "RULES (domain logic):\n" + " - PICKUP: take a key on your current cell and store it in your inventory.\n" + " - Doors: keys and doors are color-matched. With the matching key in your inventory, move onto\n" + " the door to open it\n" + " - Switches: face a switch and TOGGLE to flip it on or off. Only switches are toggled. Linked\n" + " gates are open if at least one linked switch is on, and closed if all are off.\n" + " - Gates: you cannot TOGGLE a gate. CLOSED gates block movement; OPEN gates do not.\n" + " - Closed gates and doors you lack a key for block movement like walls until resolved.\n" + " - Use DONE only when you are standing on the goal cell." +) + +# How models must terminate the reply (Minimal + Standard + Verbose base). +FINAL_OUTPUT_INSTRUCTION = ( + "On the last line, output exactly:\n" + "FINAL_OUTPUT: or FINAL_OUTPUT: , , ... " + "(comma-separated; one or more valid actions)" +) + + +class PromptStrategy: + """Base: shared action hint injection.""" + + def __init__(self, actions_hint: str) -> None: + self._actions_hint = actions_hint + + def build_system_prompt(self, querying_suffix: str = "") -> str: + raise NotImplementedError + + def build_user_prompt( + self, + obs_text: str, + history_text: str, + state, + last_feedback: str, + ) -> str: + raise NotImplementedError + + +# --------------------------------------------------------------------------- +# Minimal — goal + action list only +# --------------------------------------------------------------------------- + +class MinimalPromptStrategy(PromptStrategy): + def build_system_prompt(self, querying_suffix: str = "") -> str: + return ( + "Task: move to the goal cell in the grid.\n" + f"Valid actions: {self._actions_hint}.\n" + f"{FINAL_OUTPUT_INSTRUCTION}" + + (f"\n\n{querying_suffix}" if querying_suffix else "") + ) + + def build_user_prompt( + self, + obs_text: str, + history_text: str, + state, + last_feedback: str, + ) -> str: + history_block = f"{history_text}\n\n" if history_text else "" + obs_block = f"Observation:\n{obs_text}\n\n" if obs_text else "" + return ( + f"{history_block}" + f"{obs_block}" + f"Position: {state.agent_pos} | Facing: {state.facing} | Goal: {state.goal} | " + f"Step {state.step_count + 1}/{state.max_steps}\n" + f"Last result: {last_feedback}\n" + "What is your next action?" + ) + + +# --------------------------------------------------------------------------- +# Standard — mechanism list only (user prompt same as Minimal) +# --------------------------------------------------------------------------- + +class StandardPromptStrategy(MinimalPromptStrategy): + def build_system_prompt(self, querying_suffix: str = "") -> str: + return ( + "Task: move to the goal cell in the grid.\n" + f"{MECHANISM_LIST}\n" + f"Valid actions: {self._actions_hint}.\n" + f"{FINAL_OUTPUT_INSTRUCTION}" + + (f"\n\n{querying_suffix}" if querying_suffix else "") + ) + + +# --------------------------------------------------------------------------- +# Verbose — mechanism list + rules (system); optional hint lines (user) +# --------------------------------------------------------------------------- + +class VerbosePromptStrategy(StandardPromptStrategy): + def build_system_prompt(self, querying_suffix: str = "") -> str: + std = StandardPromptStrategy.build_system_prompt(self, "").rstrip() + chunks = [std, MECHANISM_RULES] + if querying_suffix: + chunks.append(querying_suffix) + return "\n\n".join(chunks) + + def build_user_prompt( + self, + obs_text: str, + history_text: str, + state, + last_feedback: str, + ) -> str: + steps_left = state.max_steps - state.step_count + budget_warn = ( + f" WARNING: Only {steps_left} steps remaining!\n" + if steps_left <= max(5, state.max_steps // 5) + else "" + ) + r, c = state.agent_pos + gr, gc = state.goal + manhattan = abs(r - gr) + abs(c - gc) + + facing_idx = FACING_ORDER.index(state.facing) + rel_dirs = [ + ("AHEAD", FACING_ORDER[facing_idx % 4]), + ("RIGHT", FACING_ORDER[(facing_idx + 1) % 4]), + ("BEHIND", FACING_ORDER[(facing_idx + 2) % 4]), + ("LEFT", FACING_ORDER[(facing_idx + 3) % 4]), + ] + neighbour_lines = [] + for rel, cardinal in rel_dirs: + dr, dc = FACING_TO_DELTA[cardinal] + nr, nc = r + dr, c + dc + if nr < 1 or nr > state.rows or nc < 1 or nc > state.cols: + desc = "out of bounds" + elif (nr, nc) in state.walls: + desc = "wall" + elif (nr, nc) == state.goal: + desc = f"GOAL ({nr},{nc})" + else: + desc = f"open ({nr},{nc})" + neighbour_lines.append(f" {rel}: {desc}") + neighbour_block = "From your perspective:\n" + "\n".join(neighbour_lines) + "\n" + + mechanism_block = _mechanism_hints_text(state) + + history_block = f"{history_text}\n\n" if history_text else "" + obs_block = f"Observation:\n{obs_text}\n\n" if obs_text else "" + inventory_str = ", ".join(state.inventory) if state.inventory else "none" + + return ( + f"{history_block}" + f"{obs_block}" + f"Position: {state.agent_pos} | Facing: {state.facing} | Goal: {state.goal} | " + f"Manhattan: {manhattan} | Step {state.step_count + 1}/{state.max_steps} ({steps_left} left)\n" + f"Inventory: {inventory_str}\n" + f"{budget_warn}" + f"{neighbour_block}" + f"{mechanism_block}" + f"Last result: {last_feedback}\n" + "What is your next action?" + ) + + +def _mechanism_hints_text(state) -> str: + """Short reminders when the map has interactive objects; observation still has details.""" + lines = [] + if state.keys or state.doors: + lines.append(" - PICKUP keys; with the right key, MOVE_FORWARD into a door to open it.") + if state.switches or state.gates: + lines.append(" - Face a switch and TOGGLE; gates follow linked switches (do not TOGGLE gates).") + if not lines: + return "" + return "Hints:\n" + "\n".join(lines) + "\n" diff --git a/src/v2/nlu_pipeline/nlu_benchmark/querying.py b/src/v2/nlu_pipeline/nlu_benchmark/querying.py new file mode 100644 index 0000000..fcf352c --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/querying.py @@ -0,0 +1,92 @@ +"""Querying modes for the NLU benchmark. + +A single `QueryingMode` class covers all three behaviours; only `should_query()` +and a few small details differ: + + step_by_step — one LLM call per env step: queue holds at most one action + (only the first action from FINAL_OUTPUT is used; then re-query). + subgoal — same output format as full trajectory, but re-query when the + queue runs out, after failures, or mid-episode. + full_trajectory — one query per episode; same SUB_GOAL / ACTIONS format + (or FINAL_OUTPUT: … as fallback, like step_by_step). + +The episode loop lives in ExperimentRunner.run() (runner.py), not here. +""" + +from __future__ import annotations + +import re +from typing import List, Literal + +from nlu_benchmark.parser import normalize_action, parse_final_output + +QueryingKind = Literal["step_by_step", "subgoal", "full_trajectory"] + +_SUBGOAL_RE = re.compile(r"(?i)SUB_GOAL\s*:\s*(.+)") +_ACTIONS_RE = re.compile(r"(?i)ACTIONS\s*:\s*(.+)") + + +class QueryingMode: + """When to call the model and how to parse its reply.""" + + def __init__(self, kind: QueryingKind) -> None: + self.kind = kind + self.current_subgoal = "" + self._trajectory_loaded = False + + def reset(self) -> None: + self.current_subgoal = "" + self._trajectory_loaded = False + + def should_query(self, queue, failures) -> bool: + if self.kind == "step_by_step": + # With at most one queued action (see parse_actions), this is true after each step. + return not queue + if self.kind == "subgoal": + return not queue or failures >= 3 + # full_trajectory + return not self._trajectory_loaded and not queue + + def parse_actions(self, model_text: str) -> List[str]: + if self.kind == "step_by_step": + out = parse_final_output(model_text) + return [out[0]] if out else [] + + m = _SUBGOAL_RE.search(model_text) + self.current_subgoal = m.group(1).strip() if m else "" + + m2 = _ACTIONS_RE.search(model_text) + if m2: + actions = [a for a in (normalize_action(t) for t in m2.group(1).split(",")) if a] + else: + out = parse_final_output(model_text) + actions = out if out else [] + + if self.kind == "full_trajectory" and actions: + self._trajectory_loaded = True + return actions + + def system_prompt_suffix(self) -> str: + if self.kind == "step_by_step": + return "" + if self.kind == "subgoal": + return ( + "For each turn output:\n" + " SUB_GOAL: \n" + " ACTIONS: " + ) + return ( + "Output your complete trajectory once as:\n" + " SUB_GOAL: \n" + " ACTIONS: \n" + "The last action in ACTIONS should be DONE (when you expect to be at the goal).\n" + "You will not be queried again — this is your only planning turn." + ) + + def step_metadata(self) -> dict: + if self.kind == "step_by_step": + return {} + meta = {"subgoal": self.current_subgoal} + if self.kind == "full_trajectory": + meta["full_trajectory"] = True + return meta diff --git a/src/v2/nlu_pipeline/nlu_benchmark/renderer.py b/src/v2/nlu_pipeline/nlu_benchmark/renderer.py new file mode 100644 index 0000000..ab00dff --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/renderer.py @@ -0,0 +1,130 @@ +"""Maze text split: **initial** layout (system) vs **current** situation (user turn).""" + +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path + +from nlu_benchmark.env import GridState + +_RENDER_DATASET_MOD = None + + +def _render_dataset_module(): + """Load ``automatic_maze_generation/render_dataset.py`` without requiring ``v2`` on ``PYTHONPATH``.""" + global _RENDER_DATASET_MOD + if _RENDER_DATASET_MOD is None: + path = Path(__file__).resolve().parents[2] / "automatic_maze_generation" / "render_dataset.py" + name = "_multinet_automatic_maze_generation_render_dataset" + spec = importlib.util.spec_from_file_location(name, path) + if spec is None or spec.loader is None: + raise ImportError(f"Cannot load maze renderer from {path}") + mod = importlib.util.module_from_spec(spec) + sys.modules[name] = mod + spec.loader.exec_module(mod) + _RENDER_DATASET_MOD = mod + return _RENDER_DATASET_MOD + + +def _grid_state_to_maze_payload(state: GridState) -> dict: + """JSON-shaped maze dict for ``render_maze_payload`` / ``render_maze_payload_bytes``.""" + return { + "task_id": "nlu_live", + "maze": { + # Unified convention: payloads are always (row, col). + "dimensions": [state.rows, state.cols], + "walls": [list(w) for w in sorted(state.walls)], + "start": list(state.start), + "goal": list(state.goal), + }, + "mechanisms": { + "keys": [dict(k) for k in state.keys], + "doors": [dict(d) for d in state.doors], + "switches": [dict(s) for s in state.switches], + "gates": [dict(g) for g in state.gates], + }, + } + + +def _static_layout_lines(state: GridState) -> list[str]: + wall_str = ", ".join(f"({r},{c})" for r, c in sorted(state.walls)) or "none" + return [ + f"The world is a {state.rows} by {state.cols} grid.", + "Coordinates are given as (row, column).", + "The top-left corner is (1,1).", + f"The start is at {state.start}.", + f"The goal is at {state.goal}.", + f"The following cells are walls: {wall_str}.", + ] + + +def _mechanism_lines(state: GridState) -> list[str]: + parts: list[str] = [] + for key in state.keys: + r, c = key["position"] + parts.append(f"There is a {key['color']} key at ({r},{c}).") + + for door in state.doors: + r, c = door["position"] + parts.append( + f"There is a locked {door['requires_key']} door at ({r},{c})." + f" It requires the {door['requires_key']} key to open." + ) + + for switch in state.switches: + r, c = switch["position"] + controls = ", ".join(switch.get("controls", [])) + on_off = "on" if switch.get("on") else "off" + parts.append( + f"There is a {switch.get('switch_type', 'toggle')} switch at ({r},{c}) (currently {on_off})." + f" It controls: {controls}." + ) + + for gate in state.gates: + r, c = gate["position"] + cur = gate.get("state", gate.get("initial_state", "closed")) + parts.append( + f"There is a gate ({gate['id']}) at ({r},{c})." + f" It is currently {cur} (initially {gate.get('initial_state', 'closed')})." + ) + return parts + + +def render_initial_maze_text(state: GridState) -> str: + """Episode layout for the **system** prompt. Pass ``state`` from ``env.reset()``.""" + return "\n".join(_static_layout_lines(state) + _mechanism_lines(state)) + + +def render_user_observation_text(state: GridState) -> str: + """**Current** state for the **user** turn (text or image+text modes).""" + inv = ", ".join(state.inventory) if state.inventory else "empty" + head = [ + "Current situation (this step):", + f"The goal is at {state.goal}.", + f"You are at {state.agent_pos} facing {state.facing}.", + "Environment steps used so far: " + f"{state.step_count} (max {state.max_steps} before timeout).", + f"Your inventory: {inv}.", + "", + "Map contents as of this step (keys on the ground, doors, switches, gates):", + ] + mech = _mechanism_lines(state) + if mech: + head.extend(mech) + else: + head.append("(No keys on the ground, doors, switches, or gates in the current state description.)") + return "\n".join(head) + + +def render_maze_image_png_bytes(state: GridState) -> bytes: + """Render the current ``GridState`` to a PNG (same style as ``render_dataset.render_maze_payload``).""" + mod = _render_dataset_module() + payload = _grid_state_to_maze_payload(state) + ar, ac = state.agent_pos + return mod.render_maze_payload_bytes( + payload, + dpi=150, + agent_pos=(ar, ac), + facing=state.facing, + ) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/runner.py b/src/v2/nlu_pipeline/nlu_benchmark/runner.py new file mode 100644 index 0000000..39427ee --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/runner.py @@ -0,0 +1,219 @@ +"""ExperimentRunner — the single episode loop for all experiment configurations. + +Usage +----- + from nlu_benchmark.config import ExperimentConfig + from nlu_benchmark.runner import build_runner + + cfg = ExperimentConfig(prompting="verbose", querying="full_trajectory") + runner = build_runner(cfg, env, maze_json_path="path/to/maze.json") + result = runner.run(agent) # verbose=True: print progress + result = runner.run(agent, verbose=False) # quiet for batch runs + +Or from a JSON file directly: + + runner = ExperimentRunner.from_json("path/to/maze.json", config=cfg) + result = runner.run(agent) +""" + +from __future__ import annotations + +from typing import Callable, List, Optional + +from nlu_benchmark.config import ExperimentConfig +from nlu_benchmark.feedback import action_feedback_for_prompt, format_step_feedback +from nlu_benchmark.observation import ObservationBuilder +from nlu_benchmark.prompt_strategies import ( + PromptStrategy, + MinimalPromptStrategy, + StandardPromptStrategy, + VerbosePromptStrategy, +) +from nlu_benchmark.parser import ACTIONS_HINT +from nlu_benchmark.querying import QueryingMode +from nlu_benchmark.renderer import render_initial_maze_text + + +# --------------------------------------------------------------------------- +# Factory +# --------------------------------------------------------------------------- + +def build_runner( + config: ExperimentConfig, + env, + maze_json_path: Optional[str] = None, +) -> ExperimentRunner: + """Assemble an ExperimentRunner from a config. + + This is the one place that maps config values to concrete implementations. + """ + obs = ObservationBuilder(config.observation, config.context_window) + + prompt: PromptStrategy = { + "minimal": MinimalPromptStrategy, + "standard": StandardPromptStrategy, + "verbose": VerbosePromptStrategy, + }[config.prompting](ACTIONS_HINT) + + querying = QueryingMode(config.querying) + + return ExperimentRunner( + env=env, + config=config, + obs_builder=obs, + prompt_strategy=prompt, + querying_mode=querying, + maze_json_path=maze_json_path, + ) + + +# --------------------------------------------------------------------------- +# Runner +# --------------------------------------------------------------------------- + +class ExperimentRunner: + """Runs a maze episode. Owns the full episode loop.""" + + def __init__( + self, + env, + config: ExperimentConfig, + obs_builder: ObservationBuilder, + prompt_strategy: PromptStrategy, + querying_mode, + maze_json_path: Optional[str] = None, + ) -> None: + self.env = env + self.config = config + self.obs = obs_builder + self.prompt = prompt_strategy + self.querying = querying_mode + self.maze_json_path = maze_json_path + + @classmethod + def from_json( + cls, + path: str, + config: Optional[ExperimentConfig] = None, + ) -> ExperimentRunner: + from nlu_benchmark.loader import load_maze + return build_runner(config or ExperimentConfig(), load_maze(path), path) + + # ------------------------------------------------------------------ + # Episode loop + # ------------------------------------------------------------------ + + def run(self, agent: Callable[[List[dict]], str], *, verbose: bool = True) -> dict: + """Run one full episode. + + Parameters + ---------- + verbose: + If True, print per-step progress to stdout. Use False for batch evaluation. + + Returns + ------- + dict: + success – bool + steps_used – int + final_state – GridState + transcript – list[dict] with one record per executed action + config – dict, serialised ExperimentConfig for this run + """ + state = self.env.reset() + self.obs.reset() + self.querying.reset() + + system_prompt = self.prompt.build_system_prompt(self.querying.system_prompt_suffix()) + if self.config.observation in ("text_only", "image_text"): + system_prompt = ( + f"{system_prompt}\n\nInitial maze (fixed for this episode):\n" + f"{render_initial_maze_text(state)}" + ) + messages: List[dict] = [{"role": "system", "content": system_prompt}] + + action_queue: List[str] = [] + last_feedback = "Episode start." + consecutive_failures = 0 + transcript: List[dict] = [] + max_steps = self.env.initial.max_steps + + while state.step_count < max_steps: + + # --- Query model if needed --- + if self.querying.should_query(action_queue, consecutive_failures): + consecutive_failures = 0 + messages.append(self._build_message(state, last_feedback)) + model_text = agent(messages) + messages.append({"role": "assistant", "content": model_text}) + action_queue = self.querying.parse_actions(model_text) + + if not action_queue: + last_feedback = ( + f"Could not parse FINAL_OUTPUT (one or more valid actions). " + f"Use only: {ACTIONS_HINT}." + ) + continue + + if not action_queue: + # e.g. full trajectory finished executing (no re-query) + break + + # --- Execute next queued action --- + action = action_queue.pop(0) + position_before = state.agent_pos + + state, event = self.env.step(action) + step_detail = format_step_feedback(action, event.type, event.message, position_before) + last_feedback = action_feedback_for_prompt(self.config.observation, step_detail) + event_type = event.type + + if event_type in {"BLOCKED", "WRONG_DONE", "INVALID"}: + consecutive_failures += 1 + action_queue.clear() # abandon the rest of the planned sequence + else: + consecutive_failures = 0 + + transcript.append({ + "step": state.step_count, + "position_before": position_before, + "position_after": state.agent_pos, + "action": action, + "event_type": event_type, + "feedback": step_detail, + **self.querying.step_metadata(), + }) + + self.obs.record(state.agent_pos, state.facing, action, last_feedback) + + if event_type == "DONE": + if verbose: + print(f" Success at step {state.step_count}") + return self._result(True, state, transcript) + + if verbose: + print(f" Step {state.step_count}/{max_steps}: {action} -> {event_type}") + + return self._result(False, state, transcript) + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + + def _build_message(self, state, last_feedback: str) -> dict: + obs_text = self.obs.build_text(state) + history_text = self.obs.history_text() + prompt_text = self.prompt.build_user_prompt(obs_text, history_text, state, last_feedback) + images = self.obs.build_image_blocks(state, self.maze_json_path) + if images: + return {"role": "user", "content": images + [{"type": "text", "text": prompt_text}]} + return {"role": "user", "content": prompt_text} + + def _result(self, success: bool, state, transcript: List[dict]) -> dict: + return { + "success": success, + "steps_used": state.step_count, + "final_state": state, + "transcript": transcript, + "config": self.config.to_dict(), + } diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V01_empty_room.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V01_empty_room.json new file mode 100644 index 0000000..7da7b35 --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V01_empty_room.json @@ -0,0 +1,52 @@ +{ + "task_id": "validation_10_v01_empty_room", + "version": "2.0", + "seed": 101, + "difficulty_tier": 1, + "description": "Baseline open room with no mechanisms.", + "maze": { + "dimensions": [ + 8, + 8 + ], + "walls": [], + "start": [ + 1, + 1 + ], + "goal": [ + 6, + 6 + ] + }, + "mechanisms": { + "keys": [], + "doors": [], + "switches": [], + "gates": [], + "blocks": [], + "teleporters": [], + "hazards": [] + }, + "rules": { + "key_consumption": true, + "switch_type": "toggle", + "hidden_mechanisms": [], + "observability": "full", + "view_size": 7 + }, + "goal": { + "type": "reach_position", + "target": [ + 6, + 6 + ], + "auxiliary_conditions": [] + }, + "metadata": { + "chain_pattern": "none", + "tiling": "square", + "wall_topology": "open" + }, + "max_steps": 100 +} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V02_winding_corridor.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V02_winding_corridor.json new file mode 100644 index 0000000..fdd300c --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V02_winding_corridor.json @@ -0,0 +1,258 @@ +{ + "task_id": "validation_10_v02_winding_corridor", + "version": "2.0", + "seed": 102, + "difficulty_tier": 1, + "description": "Single-path serpentine corridor with repeated direction changes.", + "maze": { + "dimensions": [ + 20, + 8 + ], + "walls": [ + [ + 1, + 2 + ], + [ + 1, + 6 + ], + [ + 2, + 2 + ], + [ + 2, + 4 + ], + [ + 2, + 6 + ], + [ + 3, + 2 + ], + [ + 3, + 4 + ], + [ + 3, + 6 + ], + [ + 4, + 2 + ], + [ + 4, + 4 + ], + [ + 4, + 6 + ], + [ + 5, + 2 + ], + [ + 5, + 4 + ], + [ + 5, + 6 + ], + [ + 6, + 2 + ], + [ + 6, + 4 + ], + [ + 6, + 6 + ], + [ + 7, + 2 + ], + [ + 7, + 4 + ], + [ + 7, + 6 + ], + [ + 8, + 2 + ], + [ + 8, + 4 + ], + [ + 8, + 6 + ], + [ + 9, + 2 + ], + [ + 9, + 4 + ], + [ + 9, + 6 + ], + [ + 10, + 2 + ], + [ + 10, + 4 + ], + [ + 10, + 6 + ], + [ + 11, + 2 + ], + [ + 11, + 4 + ], + [ + 11, + 6 + ], + [ + 12, + 2 + ], + [ + 12, + 4 + ], + [ + 12, + 6 + ], + [ + 13, + 2 + ], + [ + 13, + 4 + ], + [ + 13, + 6 + ], + [ + 14, + 2 + ], + [ + 14, + 4 + ], + [ + 14, + 6 + ], + [ + 15, + 2 + ], + [ + 15, + 4 + ], + [ + 15, + 6 + ], + [ + 16, + 2 + ], + [ + 16, + 4 + ], + [ + 16, + 6 + ], + [ + 17, + 2 + ], + [ + 17, + 4 + ], + [ + 17, + 6 + ], + [ + 18, + 4 + ] + ], + "start": [ + 1, + 1 + ], + "goal": [ + 18, + 6 + ] + }, + "mechanisms": { + "keys": [], + "doors": [], + "switches": [], + "gates": [], + "blocks": [], + "teleporters": [], + "hazards": [] + }, + "rules": { + "key_consumption": true, + "switch_type": "toggle", + "hidden_mechanisms": [], + "observability": "full", + "view_size": 7 + }, + "goal": { + "type": "reach_position", + "target": [ + 18, + 6 + ], + "auxiliary_conditions": [] + }, + "metadata": { + "chain_pattern": "none", + "tiling": "square", + "wall_topology": "serpentine_corridor", + "turn_count": 5 + }, + "max_steps": 220 +} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V03_multi_path.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V03_multi_path.json new file mode 100644 index 0000000..70f6119 --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V03_multi_path.json @@ -0,0 +1,255 @@ +{ + "task_id": "validation_10_v03_multi_path", + "version": "2.0", + "seed": 103, + "difficulty_tier": 1, + "description": "Three structurally distinct routes connect the start room to the goal room.", + "maze": { + "dimensions": [ + 12, + 12 + ], + "walls": [ + [ + 1, + 1 + ], + [ + 1, + 2 + ], + [ + 1, + 3 + ], + [ + 1, + 9 + ], + [ + 1, + 10 + ], + [ + 2, + 1 + ], + [ + 2, + 2 + ], + [ + 2, + 3 + ], + [ + 2, + 9 + ], + [ + 2, + 10 + ], + [ + 3, + 5 + ], + [ + 3, + 7 + ], + [ + 3, + 9 + ], + [ + 3, + 10 + ], + [ + 4, + 2 + ], + [ + 4, + 3 + ], + [ + 4, + 4 + ], + [ + 4, + 5 + ], + [ + 4, + 7 + ], + [ + 5, + 2 + ], + [ + 5, + 3 + ], + [ + 5, + 4 + ], + [ + 5, + 7 + ], + [ + 5, + 8 + ], + [ + 5, + 9 + ], + [ + 6, + 2 + ], + [ + 6, + 3 + ], + [ + 6, + 4 + ], + [ + 6, + 6 + ], + [ + 6, + 7 + ], + [ + 6, + 8 + ], + [ + 6, + 9 + ], + [ + 7, + 2 + ], + [ + 7, + 3 + ], + [ + 7, + 4 + ], + [ + 7, + 6 + ], + [ + 7, + 7 + ], + [ + 7, + 8 + ], + [ + 7, + 9 + ], + [ + 9, + 1 + ], + [ + 9, + 2 + ], + [ + 9, + 3 + ], + [ + 9, + 9 + ], + [ + 9, + 10 + ], + [ + 10, + 1 + ], + [ + 10, + 2 + ], + [ + 10, + 3 + ], + [ + 10, + 9 + ], + [ + 10, + 10 + ] + ], + "start": [ + 1, + 6 + ], + "goal": [ + 10, + 6 + ] + }, + "mechanisms": { + "keys": [], + "doors": [], + "switches": [], + "gates": [], + "blocks": [], + "teleporters": [], + "hazards": [] + }, + "rules": { + "key_consumption": true, + "switch_type": "toggle", + "hidden_mechanisms": [], + "observability": "full", + "view_size": 7 + }, + "goal": { + "type": "reach_position", + "target": [ + 10, + 6 + ], + "auxiliary_conditions": [] + }, + "metadata": { + "chain_pattern": "none", + "tiling": "square", + "wall_topology": "triple_route_maze", + "path_count": 3, + "path_lengths": [ + 11, + 15, + 19 + ] + }, + "max_steps": 140 +} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V04_single_key.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V04_single_key.json new file mode 100644 index 0000000..de290aa --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V04_single_key.json @@ -0,0 +1,96 @@ +{ + "task_id": "validation_10_v04_single_key", + "version": "2.0", + "seed": 104, + "difficulty_tier": 2, + "description": "Retrieve the red key from the lower vault, return through the foyer, and open the red door guarding the goal room.", + "maze": { + "dimensions": [ + 14, + 12 + ], + "walls": [ + [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10], + [2, 4], [2, 5], [2, 6], [2, 7], [2, 8], [2, 9], [2, 10], + [3, 4], [3, 5], [3, 6], [3, 7], [3, 8], [3, 9], [3, 10], + [4, 1], [4, 3], [4, 4], [4, 5], [4, 10], + [5, 4], [5, 5], [5, 10], + [6, 10], + [7, 4], [7, 5], [7, 10], + [8, 1], [8, 3], [8, 4], [8, 5], [8, 10], + [9, 4], [9, 5], [9, 6], [9, 7], [9, 8], [9, 9], [9, 10], + [10, 4], [10, 5], [10, 6], [10, 7], [10, 8], [10, 9], [10, 10], + [11, 4], [11, 5], [11, 6], [11, 7], [11, 8], [11, 9], [11, 10], + [12, 1], [12, 3], [12, 4], [12, 5], [12, 6], [12, 7], [12, 8], [12, 9], [12, 10] + ], + "start": [ + 1, + 2 + ], + "goal": [ + 12, + 2 + ] + }, + "mechanisms": { + "keys": [ + { + "id": "kR", + "position": [ + 5, + 8 + ], + "color": "red" + } + ], + "doors": [ + { + "id": "DR", + "position": [ + 9, + 2 + ], + "requires_key": "red", + "initial_state": "locked" + } + ], + "switches": [], + "gates": [], + "blocks": [], + "teleporters": [], + "hazards": [] + }, + "rules": { + "key_consumption": true, + "switch_type": "toggle", + "hidden_mechanisms": [], + "observability": "full", + "view_size": 7 + }, + "goal": { + "type": "reach_position", + "target": [ + 12, + 2 + ], + "auxiliary_conditions": [] + }, + "dependency_chain": { + "depth": 1, + "sequence": [ + { + "step": 1, + "type": "key-door", + "element": "kR", + "unlocks": "DR" + } + ], + "notation": "kR -> DR -> G" + }, + "metadata": { + "chain_pattern": "key_door", + "tiling": "square", + "wall_topology": "room_chain_with_key_branch" + }, + "max_steps": 140 +} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V05_single_switch.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V05_single_switch.json new file mode 100644 index 0000000..b520383 --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V05_single_switch.json @@ -0,0 +1,99 @@ +{ + "task_id": "validation_10_v05_single_switch", + "version": "2.0", + "seed": 105, + "difficulty_tier": 2, + "description": "Trigger the switch in the lower vault to open the gate guarding the goal room.", + "maze": { + "dimensions": [ + 14, + 12 + ], + "walls": [ + [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10], + [2, 4], [2, 5], [2, 6], [2, 7], [2, 8], [2, 9], [2, 10], + [3, 4], [3, 5], [3, 6], [3, 7], [3, 8], [3, 9], [3, 10], + [4, 1], [4, 3], [4, 4], [4, 5], [4, 10], + [5, 4], [5, 5], [5, 10], + [6, 10], + [7, 4], [7, 5], [7, 10], + [8, 1], [8, 3], [8, 4], [8, 5], [8, 10], + [9, 4], [9, 5], [9, 6], [9, 7], [9, 8], [9, 9], [9, 10], + [10, 4], [10, 5], [10, 6], [10, 7], [10, 8], [10, 9], [10, 10], + [11, 4], [11, 5], [11, 6], [11, 7], [11, 8], [11, 9], [11, 10], + [12, 1], [12, 3], [12, 4], [12, 5], [12, 6], [12, 7], [12, 8], [12, 9], [12, 10] + ], + "start": [ + 1, + 2 + ], + "goal": [ + 12, + 2 + ] + }, + "mechanisms": { + "keys": [], + "doors": [], + "switches": [ + { + "id": "s1", + "position": [ + 5, + 8 + ], + "controls": [ + "g1" + ], + "switch_type": "toggle", + "initial_state": "off" + } + ], + "gates": [ + { + "id": "g1", + "position": [ + 9, + 2 + ], + "initial_state": "closed" + } + ], + "blocks": [], + "teleporters": [], + "hazards": [] + }, + "rules": { + "key_consumption": true, + "switch_type": "toggle", + "hidden_mechanisms": [], + "observability": "full", + "view_size": 7 + }, + "goal": { + "type": "reach_position", + "target": [ + 12, + 2 + ], + "auxiliary_conditions": [] + }, + "dependency_chain": { + "depth": 1, + "sequence": [ + { + "step": 1, + "type": "switch-gate", + "element": "s1", + "unlocks": "g1" + } + ], + "notation": "s1 -> g1 -> G" + }, + "metadata": { + "chain_pattern": "switch_gate", + "tiling": "square", + "wall_topology": "room_chain_with_switch_branch" + }, + "max_steps": 140 +} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V06_chain_ks.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V06_chain_ks.json new file mode 100644 index 0000000..3bb1fab --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V06_chain_ks.json @@ -0,0 +1,124 @@ +{ + "task_id": "validation_10_v06_chain_ks", + "version": "2.0", + "seed": 106, + "difficulty_tier": 3, + "description": "The red key opens the upper choke; the switch in the lower crypt opens the final gate to the goal chamber.", + "maze": { + "dimensions": [ + 14, + 12 + ], + "walls": [ + [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10], + [2, 4], [2, 5], [2, 6], [2, 7], [2, 8], [2, 9], [2, 10], + [3, 4], [3, 5], [3, 6], [3, 7], [3, 8], [3, 9], [3, 10], + [4, 1], [4, 3], [4, 4], [4, 5], [4, 6], [4, 10], + [5, 4], [5, 5], [5, 6], [5, 10], + [6, 10], + [7, 4], [7, 5], [7, 6], [7, 10], + [8, 1], [8, 2], [8, 3], [8, 4], [8, 5], [8, 6], [8, 10], + [9, 1], [9, 2], [9, 3], [9, 4], [9, 5], [9, 6], [9, 10], + [10, 1], [10, 2], [10, 3], [10, 4], [10, 5], [10, 6], [10, 10], + [11, 1], [11, 2], [11, 3], [11, 4], [11, 5], [11, 6], [11, 10], + [12, 1], [12, 2], [12, 3], [12, 4], [12, 5], [12, 6], [12, 7], [12, 9], [12, 10] + ], + "start": [ + 1, + 2 + ], + "goal": [ + 12, + 8 + ] + }, + "mechanisms": { + "keys": [ + { + "id": "kR", + "position": [ + 2, + 3 + ], + "color": "red" + } + ], + "doors": [ + { + "id": "DR", + "position": [ + 5, + 2 + ], + "requires_key": "red", + "initial_state": "locked" + } + ], + "switches": [ + { + "id": "s1", + "position": [ + 6, + 8 + ], + "controls": [ + "g1" + ], + "switch_type": "toggle", + "initial_state": "off" + } + ], + "gates": [ + { + "id": "g1", + "position": [ + 11, + 8 + ], + "initial_state": "closed" + } + ], + "blocks": [], + "teleporters": [], + "hazards": [] + }, + "rules": { + "key_consumption": true, + "switch_type": "toggle", + "hidden_mechanisms": [], + "observability": "full", + "view_size": 7 + }, + "goal": { + "type": "reach_position", + "target": [ + 12, + 8 + ], + "auxiliary_conditions": [] + }, + "dependency_chain": { + "depth": 2, + "sequence": [ + { + "step": 1, + "type": "key-door", + "element": "kR", + "unlocks": "DR" + }, + { + "step": 2, + "type": "switch-gate", + "element": "s1", + "unlocks": "g1" + } + ], + "notation": "kR -> DR -> s1 -> g1 -> G" + }, + "metadata": { + "chain_pattern": "ks", + "tiling": "square", + "wall_topology": "shared_room_chain_layout" + }, + "max_steps": 180 +} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V07_chain_sk.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V07_chain_sk.json new file mode 100644 index 0000000..0ad9095 --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V07_chain_sk.json @@ -0,0 +1,124 @@ +{ + "task_id": "validation_10_v07_chain_sk", + "version": "2.0", + "seed": 107, + "difficulty_tier": 3, + "description": "The switch opens the upper choke; the red key waits in the lower crypt behind that first mechanism, and the final door guards the goal chamber.", + "maze": { + "dimensions": [ + 14, + 12 + ], + "walls": [ + [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10], + [2, 4], [2, 5], [2, 6], [2, 7], [2, 8], [2, 9], [2, 10], + [3, 4], [3, 5], [3, 6], [3, 7], [3, 8], [3, 9], [3, 10], + [4, 1], [4, 3], [4, 4], [4, 5], [4, 6], [4, 10], + [5, 4], [5, 5], [5, 6], [5, 10], + [6, 10], + [7, 4], [7, 5], [7, 6], [7, 10], + [8, 1], [8, 2], [8, 3], [8, 4], [8, 5], [8, 6], [8, 10], + [9, 1], [9, 2], [9, 3], [9, 4], [9, 5], [9, 6], [9, 10], + [10, 1], [10, 2], [10, 3], [10, 4], [10, 5], [10, 6], [10, 10], + [11, 1], [11, 2], [11, 3], [11, 4], [11, 5], [11, 6], [11, 10], + [12, 1], [12, 2], [12, 3], [12, 4], [12, 5], [12, 6], [12, 7], [12, 9], [12, 10] + ], + "start": [ + 1, + 2 + ], + "goal": [ + 12, + 8 + ] + }, + "mechanisms": { + "keys": [ + { + "id": "kR", + "position": [ + 6, + 8 + ], + "color": "red" + } + ], + "doors": [ + { + "id": "DR", + "position": [ + 11, + 8 + ], + "requires_key": "red", + "initial_state": "locked" + } + ], + "switches": [ + { + "id": "s1", + "position": [ + 2, + 3 + ], + "controls": [ + "g1" + ], + "switch_type": "toggle", + "initial_state": "off" + } + ], + "gates": [ + { + "id": "g1", + "position": [ + 5, + 2 + ], + "initial_state": "closed" + } + ], + "blocks": [], + "teleporters": [], + "hazards": [] + }, + "rules": { + "key_consumption": true, + "switch_type": "toggle", + "hidden_mechanisms": [], + "observability": "full", + "view_size": 7 + }, + "goal": { + "type": "reach_position", + "target": [ + 12, + 8 + ], + "auxiliary_conditions": [] + }, + "dependency_chain": { + "depth": 2, + "sequence": [ + { + "step": 1, + "type": "switch-gate", + "element": "s1", + "unlocks": "g1" + }, + { + "step": 2, + "type": "key-door", + "element": "kR", + "unlocks": "DR" + } + ], + "notation": "s1 -> g1 -> kR -> DR -> G" + }, + "metadata": { + "chain_pattern": "sk", + "tiling": "square", + "wall_topology": "shared_room_chain_layout" + }, + "max_steps": 180 +} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V08_chain_kk.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V08_chain_kk.json new file mode 100644 index 0000000..09ae2a1 --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V08_chain_kk.json @@ -0,0 +1,119 @@ +{ + "task_id": "validation_10_v08_chain_kk", + "version": "2.0", + "seed": 108, + "difficulty_tier": 3, + "description": "Two key-door pairs occupy the same dungeon layout: red for the upper choke, blue for the final gate room choke.", + "maze": { + "dimensions": [ + 14, + 12 + ], + "walls": [ + [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10], + [2, 4], [2, 5], [2, 6], [2, 7], [2, 8], [2, 9], [2, 10], + [3, 4], [3, 5], [3, 6], [3, 7], [3, 8], [3, 9], [3, 10], + [4, 1], [4, 3], [4, 4], [4, 5], [4, 6], [4, 10], + [5, 4], [5, 5], [5, 6], [5, 10], + [6, 10], + [7, 4], [7, 5], [7, 6], [7, 10], + [8, 1], [8, 2], [8, 3], [8, 4], [8, 5], [8, 6], [8, 10], + [9, 1], [9, 2], [9, 3], [9, 4], [9, 5], [9, 6], [9, 10], + [10, 1], [10, 2], [10, 3], [10, 4], [10, 5], [10, 6], [10, 10], + [11, 1], [11, 2], [11, 3], [11, 4], [11, 5], [11, 6], [11, 10], + [12, 1], [12, 2], [12, 3], [12, 4], [12, 5], [12, 6], [12, 7], [12, 9], [12, 10] + ], + "start": [ + 1, + 2 + ], + "goal": [ + 12, + 8 + ] + }, + "mechanisms": { + "keys": [ + { + "id": "kR", + "position": [ + 2, + 3 + ], + "color": "red" + }, + { + "id": "kB", + "position": [ + 6, + 8 + ], + "color": "blue" + } + ], + "doors": [ + { + "id": "DR", + "position": [ + 5, + 2 + ], + "requires_key": "red", + "initial_state": "locked" + }, + { + "id": "DB", + "position": [ + 11, + 8 + ], + "requires_key": "blue", + "initial_state": "locked" + } + ], + "switches": [], + "gates": [], + "blocks": [], + "teleporters": [], + "hazards": [] + }, + "rules": { + "key_consumption": true, + "switch_type": "toggle", + "hidden_mechanisms": [], + "observability": "full", + "view_size": 7 + }, + "goal": { + "type": "reach_position", + "target": [ + 12, + 8 + ], + "auxiliary_conditions": [] + }, + "dependency_chain": { + "depth": 2, + "sequence": [ + { + "step": 1, + "type": "key-door", + "element": "kR", + "unlocks": "DR" + }, + { + "step": 2, + "type": "key-door", + "element": "kB", + "unlocks": "DB" + } + ], + "notation": "kR -> DR -> kB -> DB -> G" + }, + "metadata": { + "chain_pattern": "kk", + "tiling": "square", + "wall_topology": "shared_room_chain_layout" + }, + "max_steps": 180 +} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V09_distractor_simple.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V09_distractor_simple.json new file mode 100644 index 0000000..b2e6fc8 --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V09_distractor_simple.json @@ -0,0 +1,126 @@ +{ + "task_id": "validation_10_v09_distractor_simple", + "version": "2.0", + "seed": 109, + "difficulty_tier": 3, + "description": "The red key-door chain is critical, but two wrong-color keys sit in dead-end side rooms off the main dungeon route.", + "maze": { + "dimensions": [ + 16, + 12 + ], + "walls": [ + [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10], + [2, 4], [2, 5], [2, 6], [2, 7], [2, 8], [2, 9], [2, 10], + [3, 4], [3, 5], [3, 6], [3, 7], [3, 8], [3, 9], [3, 10], + [4, 1], [4, 3], [4, 4], [4, 5], [4, 10], + [5, 4], [5, 5], [5, 10], + [6, 10], + [7, 4], [7, 5], [7, 10], + [8, 1], [8, 3], [8, 4], [8, 5], + [9, 6], [9, 7], [9, 8], + [10, 6], [10, 7], [10, 8], + [11, 6], [11, 7], [11, 8], + [12, 1], [12, 3], [12, 6], [12, 7], [12, 8], [12, 9], [12, 10], + [13, 1], [13, 3], [13, 4], [13, 5], [13, 6], [13, 7], [13, 8], [13, 9], [13, 10], + [14, 1], [14, 3], [14, 4], [14, 5], [14, 6], [14, 7], [14, 8], [14, 9], [14, 10] + ], + "start": [ + 1, + 2 + ], + "goal": [ + 14, + 2 + ] + }, + "mechanisms": { + "keys": [ + { + "id": "kR", + "position": [ + 5, + 8 + ], + "color": "red" + }, + { + "id": "kY", + "position": [ + 11, + 4 + ], + "color": "yellow" + }, + { + "id": "kB", + "position": [ + 10, + 10 + ], + "color": "blue" + } + ], + "doors": [ + { + "id": "DR", + "position": [ + 9, + 2 + ], + "requires_key": "red", + "initial_state": "locked" + } + ], + "switches": [], + "gates": [], + "blocks": [], + "teleporters": [], + "hazards": [] + }, + "rules": { + "key_consumption": true, + "switch_type": "toggle", + "hidden_mechanisms": [], + "observability": "full", + "view_size": 7 + }, + "goal": { + "type": "reach_position", + "target": [ + 14, + 2 + ], + "auxiliary_conditions": [] + }, + "dependency_chain": { + "depth": 1, + "sequence": [ + { + "step": 1, + "type": "key-door", + "element": "kR", + "unlocks": "DR" + } + ], + "notation": "kR -> DR -> G" + }, + "distractors": [ + { + "type": "wrong_color_key", + "element_id": "kY", + "description": "Yellow key in an upper dead-end chamber." + }, + { + "type": "wrong_color_key", + "element_id": "kB", + "description": "Blue key in a lower dead-end chamber." + } + ], + "metadata": { + "chain_pattern": "key_door_with_dead_end_distractors", + "tiling": "square", + "wall_topology": "room_chain_with_dead_end_branches" + }, + "max_steps": 220 +} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V10_distractor_chain.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V10_distractor_chain.json new file mode 100644 index 0000000..88e274c --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V10_distractor_chain.json @@ -0,0 +1,122 @@ +{ + "task_id": "validation_10_v10_distractor_chain", + "version": "2.0", + "seed": 110, + "difficulty_tier": 3, + "description": "The red path reaches the goal, but a green key-door chain opens a dead-end upper spur that looks like progress.", + "maze": { + "dimensions": [ + 16, + 12 + ], + "walls": [ + [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10], + [2, 4], [2, 5], [2, 6], [2, 7], [2, 8], [2, 9], [2, 10], + [3, 4], [3, 5], [3, 6], [3, 7], [3, 8], [3, 9], [3, 10], + [4, 1], [4, 3], [4, 4], [4, 5], [4, 10], + [5, 4], [5, 5], [5, 10], + [6, 10], + [7, 4], [7, 5], [7, 10], + [8, 1], [8, 3], [8, 4], [8, 5], [8, 10], + [9, 6], [9, 7], [9, 8], [9, 9], [9, 10], + [10, 6], [10, 7], [10, 8], [10, 9], [10, 10], + [11, 6], [11, 7], [11, 8], [11, 9], [11, 10], + [12, 1], [12, 6], [12, 7], [12, 8], [12, 9], [12, 10], + [13, 1], [13, 3], [13, 6], [13, 7], [13, 8], [13, 9], [13, 10], + [14, 1], [14, 3], [14, 4], [14, 5], [14, 6], [14, 7], [14, 8], [14, 9], [14, 10] + ], + "start": [ + 1, + 2 + ], + "goal": [ + 14, + 2 + ] + }, + "mechanisms": { + "keys": [ + { + "id": "kR", + "position": [ + 5, + 8 + ], + "color": "red" + }, + { + "id": "kG", + "position": [ + 11, + 4 + ], + "color": "green" + } + ], + "doors": [ + { + "id": "DR", + "position": [ + 9, + 2 + ], + "requires_key": "red", + "initial_state": "locked" + }, + { + "id": "DG", + "position": [ + 12, + 4 + ], + "requires_key": "green", + "initial_state": "locked" + } + ], + "switches": [], + "gates": [], + "blocks": [], + "teleporters": [], + "hazards": [] + }, + "rules": { + "key_consumption": true, + "switch_type": "toggle", + "hidden_mechanisms": [], + "observability": "full", + "view_size": 7 + }, + "goal": { + "type": "reach_position", + "target": [ + 14, + 2 + ], + "auxiliary_conditions": [] + }, + "dependency_chain": { + "depth": 1, + "sequence": [ + { + "step": 1, + "type": "key-door", + "element": "kR", + "unlocks": "DR" + } + ], + "notation": "kR -> DR -> G" + }, + "distractors": [ + { + "type": "distractor_chain", + "element_id": "branch_G", + "description": "Green key and green door open an upper spur that dead-ends." + } + ], + "metadata": { + "chain_pattern": "key_door_with_distractor_chain", + "tiling": "square", + "wall_topology": "room_chain_with_chain_distractor" + }, + "max_steps": 220 +} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V01_empty_room.png b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V01_empty_room.png new file mode 100644 index 0000000000000000000000000000000000000000..c4325e866ead60db7a666e5aff1753d4b1966f00 GIT binary patch literal 9831 zcmeHt2T+q+yDsWhP(elMRb`7xZ_+|hsfsiq^p5lr={3X-LIC~M&_Rg`Bm|^{UK9u@ zp@2#C1&Q z7#kZK*9~L++iYwHJ%P^yjw8S?d0(d-fG^E(1G{i5h*(AiBKx!7!F%j0Q)_nW4~!9jag zQ-CVr8q{8!HBE%HChRpJZO{bZsJCx*$2hXFJy1Bp_RLt|5S#Ju@B?hWpYCL1JAD*5 z{nJN^Z1?or1lX>gcjsg~`~3g@nD<0}+_HUmc=$`*xu5lsu5m7-p?F#RrtWN}=j|!+ zdunp-@S{tWcsc1qHoNi2OpP$_kl8FBK6a72S1Z0q=I|Vst8)`nnO3}*Ku>Yy5odl) zOn#T)(qQLy|8O$c))~9CJE!Y98Aofz*@Z1(NBAa!YMm<O5Mi&mwzQ?WBmmgeZv%5E*`9sQ6l(WsZy@f#QOUOMv#L`nMa z-+Ov4@&N?4Gl4Jgyz%HmSf57GsAsx%vUxoDjBJsk-@8}ctMc_f|AvdMB_-_e!nWGQ z4Ygnx8)==*`hH%Yya=1^&9^sRBtPbNsf+%8pFLT8Z{7$OGnZ%U6i>(DjC59?d&nfQ z-CeBcLbMt^i(ShhIccqamcW_An1kfDI^sb)fJ z|2H;=PPex0B?!#1X~7hNVnKm*`Zt=)b@#&Qy^w?|v0$N{uLk%Z@~~~Z()=FPl_k}( z&nV}g%a03wK7$=`k#1oU$`P%QX2vk?Bw$%)SV*>#18&$AwpUnGRFu2Sq?W^o0lUBw zIq%BO_Z;X6aOmvJg0Xf;xcIeR+x)cp-Bo;Iu4*JgkvK86QNv^vUYVXC?MK$HIRGa} zdCccftaX?dG^&nW`Tp=^ANIsn$hhyIshTXG{hj9?0|xxEs^h-M)oR;ZKH-~-Tjm-w zFUiy5tmukPp@5fbW?4$_cAY|)CAGDi5o=CS#0ak#GLKhM0l2k#W=-r^9wVp-f+1I79NFUv5L{0(mGDoe)mYv|`BSDta1LOs9bm&L<1u z6)n&iY1hk+V(cj~8{+}xtD9S9N6G^#TnwVqo{7L(^HM8D~HZxy3)& z6|V?uGHaTGZW2dATRN94OW6nh);#Wyezn%M%8g;9vLCf#m^MY!|B)^#i0T~^r(fcbV#!=`p2eYUi#jYV%D~`|cB`d|q$Hyn==>&`x0h}Xx;iQDuQk#nQ49fCCl*oSgcyLCAuDl-{-=Xr5Ix~+iV0UXTlFy@O)?Op&- zp}^991KD#iFZ?!CBY9~O$Xzx3#_TWftGz&&gU1H##^HEvOZOP%upp+6wzf8w*T(LR zPEa63s5QfTqWI>OUX8h2D_H7Qix4$F^~!Lu_TKs+)-kyL-K4iI$z(e6E|Ang^LX^>OnI!5eUASv~vKTur5b^aD+5RX!HYI zWgEBfF2hh3o)^FRg~8Q%pIyu@ZkOJLXX&o>D0^AJm27^z9oq$B&8c3om0H%=UHRlv zTp*k7-h%mvnoBx>=pwJs0~YR!AKFi#M`kRIM8V~2fZI?T*m5Xlf9!EExj6SzHj#$3yC0$?@j+AqMxE-DNYiOy!VgJvLX?ktYqwX}8W>Bam}o3US6 zv>M?tH*qtbrPwQlE>*&xhzCeemeu;>iSUVSF%wy=wFdDTyXZ@?&NN^*tBeen*o9DS z;5ZlYETd=X3f0RER78I{K!^-R*h(c2wfp5|EB~T`gjlF{9ze)JKwgxOSe5#4hs!t2 z7p`KFQ|IqtYR3H;qMH`(GHu-m$6))D;uTZ4$cu>S=zV8e>gHIx(&C z`q}LFgOdRDF+x?2r-l_uSv^8Vle9M{t2O}eMf4kN)0#J@RyP5oV>x}mM18^Q@$4C{ z2{A!qUKz3lDa^S2?Kz*Abbueq623PA&_0sDK)VuJ8$ru;$QpX5@JQ_lebbHLlS+%o zix@5;0)aHcnvMgpcGvJLPC+1KFln=8^VLMl$|_!d#0)_5*u4CZPQf4of6SwbkV`fn zN9{Vh`GtRaZxEwMtJ}S*6M;3ZYN}=d<;{^Ar24shkd(Vjw*xj|v1TIJlvL~gBQx)- zinWRRV)fkHo~kpQ9&?tWP_d8>Zqqq{X7%uMo}T+a+N(lY(H&`TvivU!eUbse!6d@D zL=mN*;o#`B*W+n9!3`xYHI1~E#sw90c}whe{BK!?+;(IZ;6dUu8aJpeGdNK+(wNJ0 zAe#r&%W=CmvYU|B>@Qls{>{Wu&SWkxYKl6W>NYS+9xiYdacQi2=gse#Tm zQc!5hp#ImKK?Ob?`cw0lg9g%p%G{c$tD4#P{f!9-DYSudbv+0Y=NR)raTHk6hIGOj z6aQ*quxMn2rS2BEk!N@{&CkQ}iXFJc_Zs|A^(Qjhhiq>Qc38?$n>{5cK#R4%*$Vs2 zbX)x96DX3LaIj$a=S4gzC*t)ojV52NbUZ9H!fyRdQp+8%c+S8IAT-LN0ii&Fj%H`6 z203hJ8T#i0)d&)l)RGe{2R0?U`DX%WKTD*`bQLlCVf$7aM+3Y80FX%>@tyS)-?7_X z>jTkbD#3eZg}M_KsJV%yqDsL18GHT?viRD`P?L~;?M)SD#9V~^1n*u(A<4SblNLV* z$}u|weTzD0N4^gCE=>Pgq{Ao~+0$hTg7@^)NbzePRlTZlL>0%t3WNI66SYkSq!05| zDt7B8DboJ!hxwf>;QH%PC?=;4z2i)k)+P``8G1$5{LkR;z4wmkt=~seG%70oVISn`PCHfK|r(Bce<9{5-0od*wnU-2nj#BDk8CCW4cC*T&n$htj z@4~h9Bf^0S`1OalUCa?@=ivI8?gFqc6j^fJX7LfbYmepO>bg-0Id#E)4`<8?iz()C zZB4v&dvdi9O9p|p!QXbWzAX#EoTx)ptdqOPY^H$3;@WXSdqLHIscZaq0rphIx3`e$ znNNdk12eJ@_dQQ`6BMQ9JUuRinvrXpwU{Y8Xf6`{`2PL-v259SG1J%cZ1^oY4vXkP zi}ax71Qp;L9t0SJ*Qhz;-IGU{i4ha1!-UGoxlR72{ZhBGMNB^pbYqSptE zsQ3k-1{hwolK;s5-9ScPW<`!`|wOv?19upuK%6ma3f>-2-!zxf}?EU!!9$ z%B2F57)D&L?gQ!^Fl0M<_$`NONVt&jO97qpVu>Q5#@oAh+f93<6;D`m)^i)Z4DnYr zb2PXIm-vY4QT(uAC$$!BgZ+l%*udx;7e7%eeh)F?1f41(T=z8j0&ux~erNJIsb6}8 z`xTbHJ#Z-igvyH)Iki{by8JA?^Np=!w!j@Vvs&EMHo{#IdK_;j`h%)N9ZaHRE1e2DMU-~{_s}8LAmDcBx=~@ z)+FWG!R7g8(!H6Z8vVUWYSOH`MOV>t=!Gscs^5h3<@n9%umAvGiskSjIqx_oQrQI+ zRc)$TRKWHUA8fDreDHl--03TXsZ6uZeZV>WW5YxbQ%(q5lA(s6!U3qvd>2v56m;f$ zLakBA7SipqpvEwqs;}eJ@1IMsnfN-E6}7T+%|Jl7tMH@^Cv&uJ8kGRhUs;t3B(cum z7C{i3r7qMun;l??g@avD(KG>ziY$R!^kGLx5Yq`fJHIkXQE3O1!w!Px+N=D{rmC~W z4Wl__C~fIUs-%}vCQctHBCvfN@BlN~{f2ME|6WK?W~v*Cx`yC*83hmsZ=9PX=-I$|WcT<=d5SZJ_`_?dpFO#L;=!N8~y9H>5UxKL_` zVCzZ@ZSV)3nJ5&926fzbU-!%-#!N#U7|zqFrUJrqC{b{A63%!XeB8cD)NZ{BrP;1_ zruK!P|J(WJNuu8CH;(_(DT99GAwMSr!Y%Nu`n&N!331ztToIZ@6!YQ0p_MUiBO15A z=bRuCskVlJK+#%6Vc~g-FRW=ixN)gF;j-n(V39eDxMrD72udVZ*+4lbcu8TC?&mtp z>iy59AQCD5W8St$Iyq+E*xMr?Z*`n8&wZEfwNX-Uhae9HOs<7p8c=K^81JsNMYZk_ z-``h!U>9+&?YmW1SVI{u>NF;Yu0waQlt&UnV3GQ*;}rjnI{ARAme`73xHKAlh-e6i za1(87)d0&hS_MVyG^UB5$|VG-JEy=7)WjF-+KnVf;VS{NN6Yziq-Yi z(A78HPI=wQ(#A;h?%ZUv`d?}hnkWt$@b@{RJ6(l4($ui16HOZ*5cw;)Q_6dAVcTfUvZG@*z4{1zaSBydo_(?(5W&^K?*;U?#`B zVW|7re>=&_ui^zx{`oyg;%p7 zI%+rnLd!JAr;z97;2I9$ik8Q|#F8hm0!?MOSZ6Mba<2bYGKw_n8#P(r9HKj{p4qTD z6^D1@V}}t!PvLDI6&F9;I>COj-jbMosSOs+^>ir!>w=?pP;tDp0i&`8nsk%d##Ntc z`6J4KB8&8`>2!#uGkm^jdp4)GN!F6n*e>;y*u&Rp!yLPcLJ?Okxlb=WLRlgc8%1wN zq~v!#N1n6P`sIN7UIM?tWx7cue3X_ z@jt{KLIMUCH^7!7gtP7RB<@e1FX;n($fmB38L2Y?yJ=5^i?I&xg&|jX4@I1~A{8^; z)UIu)Hx$H4wkIMkiYamgPLki#5ytd7K>W@|+Y9+?ly!s-2c9B-s?|dozD=jK&}*(r zW2$d2wd^bhFvk_=eXlrAo+x%0X<_Zr)beze4F{sp?<0*SAwI+gpuq92KWsS}T-=a1 zez#UH`IC$1`kw!IW_g3QwN~N2T+0^u{hH07{+`+y>bwP&`kj_Wzkwh9?Q8T+CFbuP zTuC`kC0YfhFF8bc^t|5!)f`rAG36BIRud}_wiPwVA`e+pw! zi!Ah#G&^L3zbdkAXyUYPm0UoNn!Ku|qen=;bKa>W4(O;M3TicbMIv~Me)DweR`)t1 zdl)He_Tx(a!Q@YFo-u-ElaDl+)DE|j0S!%uT5@BW4MIIgTwpKpY)pvz z&C;+6CqwP7dnm;fUqJLQGrf|mIxU~5^NK>ZJwV(T-4H#BZ$&^vw;Cd=?mVbQe&Dt> zCIDmR+*#6#k|#AS*Xi0-w;3LfuZ{;#zs4!m12ryBmsnijSh66=R(U{5V6C%K;E&3` zFJWiNR>u}AbzNEX`R8n%Lm!0=*PG*kK9=LYPI=^UZoBj9Mk9~sBNfU)w065UUYz%M zLIU1!cnn-uEe9>P+nt9WQ0T{5s1Xixg-FRGr*+`GRVVZhvxQl5`f=OxcaD055ZouF ziV3DoKT@v?JXDm_G&?D#>ZU$G2g&{ZS6^pi8V<%1)ZiZ{h%alo#F@0+qx27 zg13#9DmhD#18y2Z3C4xkVF$GFQBQ8AhK7dmn5RcE$IukxJ-a8RJ~eut89`W0e_K~q zSI0Gw>CyjSANNnTd;foVOw%9h5cF1gbA6o$XbvGA3yf5wn`5j3#E7zE0A_|Q-8{_R##eV_#el2?3Va4H9AumKM@-FP8s zpXM-6qt)hf>4}yUMA|v*EUF$kK_$1#8#fI z>CM8&lm8PH|Ctf{uS5k%y|qa7{**)ly-^Uch;vkX$%(=r)S|}5MuCzq68UQ|P*PuCU!)6}>)gSvr4drTGh8v?+uIumBp-8U z2jkOjVMHl%Mx2WnDgz3tnpuc6+~5?I@{K&$Jyc?af34`%{W202(-sS~obk-DJ12X%jO>;0Ko1uOH%PNX{5 z5GcMXh&Rj2%iR6C^?7RP&j!ZE#`;Wd`R?uQ{ezoJ0uzUx&-gA23kdiI?ANr}{Z>&{ zR#tON;W=Yt;~(#OS1Eei2!X6G2mUbI48i}v2=>3iEdSo|@PFst3ey}gHo^-%z)-=! z$f#=`;d`(IWg}W54v=+Er0-%S#>NO)!3p&5(xdmkEH%YEg8v_l&i0l$5nT^tJPpd8h= zD4h@8f9wGaR5o@@*BK06Kj6NEWHN_GD`;*A*uf3Qr4YA}Jr)sQ^`j>234clB{@ZSR z{56hH{Ocl%p=Ox5x%t4rfMUai?5ijg_>ZKm5*2FPm<;Rp=!6U63R7g9O0}`ljq0A? z@|1UyeF|W2M%^BAC0F(9fPf!EVI(3^o4kFoc5RkhNfQK`_*G}Mwz~dva(`V+`S8ny zI{dAvbO@l#09SKza`G(s{-MM|c?NF&qGzE8Ou>B3zZY;^5Xw-A*JnVq-<)7*TQ&ou zeJ|0V$}f`tB1$?N{RZD_+!cFbp&Ru-O8w8@IQmoZ_rH|-#s(jtvVPTXZ(Cbi0Y(SJ z^ePw`1{h8;J|GeXO9E|rzoQTI9 zjyrC#=WzV#Q;7IeCyy*cACE(xM8Mr#)~$8fux43QeEeyox2r4cr+2s@;v!uiLs9L( zr_4IND;Q~EVe_f^@1w|qSHQWpk#}$35|EH57tK0->SUPdO{ZJ-qes^dFF4-v$)a0p z%d2NibNP7M5sThqi)SC3_vLT4&z!NP>di+RguS!~|^7aoe+9+?-rvs0$#gDfZeIu@ZP4~vW% z8iH(E%t>h8RpIcd5^vHA5}^@=?_LBoR5KMZ;i?>)$#9=6DOTOnKk<^Cnd4b_FyF9e zSGB9bAU37wF9hQ4KO4?Nf>k`cfyXX%Fuaun(&z_}&%X`-`sy`WgPO#4HTJuDfHX%z z+|d3{v!g0dq!@QX!qb&G9Gs?xJtj#}Vk6vqd_+@EAyeCtV`&lfbdB`sI<3cwRys^R zlZcISasQCsfK$Cp^?jJwV_efyG!oFYz!>-jtnL_(u&4r?Nf6=kofOoe%Y@srv3I-^qO z6Vt22X&G!w(8ObnjAI<-@V(+&p?%d}_T6b0a%e4uNo%@cWi1O^QsEU{^=FAE(pGIt z)YKG+;Iqm=+Pgta=>fSM-*(i#a}9MdV>M56;0x{PHqC~0SdpSHgOl6WGN z**fpOuA`;!ARp=m-*{w^WX|aN<+JopK_(T*6NyB!%OVU?$ru5jj682Ed|i@Fx~^*k zO*N|5mXT5@?$v^vv35*D&E%MaX-qPu;vjHJEsngnC@Drq!|Q1RmUsk=i4GsXnf-7q zNJ^6Pmw36mqwW+4-tHqL1u!xxPwLPgmCz{P8X71&G@x-tWw$%ysj8euX|tQD$0v{@fF({9-F2iy-IRYpEfE@MsngCtB4+< zL4iS3V3;yWYkvHz-E!ptL~!7G80M3$Oj4MX0|z}Op%;O2&cmV$9y9vX73|E4_@Wc1 zPL&&Kj0Ox|i}CYuLEu!m0vTO69J)IBvKAR*iqCU$k z-Zb=;%_L3^ItZP30qq;l^7t~fu3|~)BXxi`jPErPFA=U?7*NK#`#Z*&8aWsb!h;a}?g_aQ>I=lt=08fEpWF+Yla?+MD220Oi zCk_)aJww)*N;Y_tOEhyIv|WbdM-$`LSigpbhr?-sJY9{w$vxdYA7yKNJ=b_hKzi?> z?`g@g^m)65?OqXk4UHdwgM`B%({s8eBd%n3Wz* z=CJ_oV&IrsfZGVtYMJcioo$BJFh!iaKx7J=>bFs{CfSIFmtQ+fJhIdU(O+fivTL7y z&Mznh$;7rSX`~0CwX?wZJ}uPr>eYUeueOe(Q+Lbz67(_|KrRvZ#Kg(Uf$_I~prn+^ z)=>~-iFcB#B~_39afwyCUQ-{iCIH1V;M_VmR9ZnUe$$rn#GdC5F1CwYieo?W#Q@vGLXMam~_e>?!%l!GpB}J5j=Kob|m67M84b{2e=Nm z#FOn9>eTJ4kKq@`0SAH`OffsO{Wh#x(``=jeG?ErOAM^c%uEk!MSSr_N^9ukiy(yw zVC&bxIS2o;DFjKw>uwjSMgkkAL9&s`cG-Mgn+0+*4TVu)_-c)RQ4#8D3Nkwoh zsTL>a;YeYM{n_*YlT*axi_Nr{!p4h(^9v@Ps;iRuo%vK>AnIvodX{PD zzqPK(8D1slbQvN*NFYkqr0jcDCks>ZjV3n0WR2l13&;sgnqhmutF7Y&(fR0(fDZ3+ z7djj;Q1~Er{`~oJt@H}x$Xlpb$GzBK0^)FdYhv>H<-vFSWmj`ztgNhL+QUs)oAvHw zPyI7L-)iGIxrW9{G5Hth0`w;Jxe*i@8>YV{x2K)lb)S$H3DWn<#N>nr$G-_{uE|%I ztFQA*ua~zH^YTJjY*%b=D_rPUc`Tz^g9QQ9sgD3$m*ZdregDYP_jIdFPF+*vWPn@2X0$A$O-5}UZ;ddH>oP|C^T&U1 z&X!ZBb!Ci8oULqW_qp>M1K~-Xxl}RPk+U!L7$yQn={{Ac*j=;&yU1bcwIvTBKtmSs z>e^9!B}iQg5yi#4dH??XVe-IEPx?MR63`(&mH;Y}2UiM)q(%^+5_OFa9A7GC?6&b8 z0;2JLy=g&)7SIv`z_XnZbuX($8Sl*yJ-coEw2GC>;5BF{uH(#&fYZpe(Nnaf!@*G{_xB&hTc$^7ZV0zUi!=F%qOOk?BH!a1 z--whr47PG>uj2LhZk$au2I78HfE?T>xfYEt^d6_dBSh__O@PT@+RO)qXJR7Aq)>Tu zPz+?t!^{ZdPz2mG_0-X~V#a$9WfefK>8Ya(4<+KtCc+5kD>+)rnb@UuB^g~ zp2=6%cGYc>MNNV2Cc`aZD3&Quf0OIAGe-RA@G3%DNO7_fD~>UR>!aaAS0Ks#>G}Ql z+gn4T3q}E**z8C7PNaaONd094tnP+Z3_~E|5`L^O9HZ5A`##F;HeQF|#xruCnj?#b z!%sB~;}pX#5k**+=S1 zS9t=S>hfNd+dVFjug3ubzRpl%r8s}z>ZvzqNHU#FuPs0SW%RIKw3k3g1tPdy>@+HU zeU?}`cINW6J3R(wMVVC-sUgC$W!3Pp^63Yu*ijUcd}`C%`{2PoySJ~J&X5o!Z zF7%8x-odWj*7*4c6ga zH|YeKMecTt=y^(*M+>S)!?wl|>ew)NXT^5e6%p+l=v4Rwvo*(n`q z;awHGWKUleObt2;3U@g9G6cdIePEH~FdVN{L4B%V#raX+z&e&v6ll26o06%}r|OwA zXFb1q#d`s#ne@WlUX*oHaj~N8jRoFD{ZKtIqC>4Bb?P!F-p zqc!iHit&%L|@0GZ@DZqi%`McFezt{my3$4UO|?`21k;zR4E+ zREx8C_Ea{Ida0e$DGSJV%ll@M7i=M|k(IS?JeVH;O}=P9qy!amH)#PBG0cM!MOO+L zf>1}1Z$4ZV&h_!)IW{hJX^T$NHWxVTB=!wbX-t@2d(DxI1IVU383>Q3AOpw(z0i?P zvSs1#2}0%tGtgbo*Xk@7a|VyN4OtuyMM-yr!iFBuevl206cf>WUB+7pjg~s+xgnYc zwsMnpa+bMwc8d1FUFQ`VNZuM5x9_vRH8AUJBSylg-DH4r(SJTMI^qU(E=h)d+t zMw(o4u?*K%iKU`BNWAe5Hf|H&QO}MpXiTC&hY96^2*kw$2bZ=A>bvEjR|1JhtJOJ4 z;imh&y}dB}i2?2EGw3)Mo}Vk%>f$xm>(atSjQQ6PD5$JyCvC^E8vb|(?6J2HMuJdF zTXx8?LGyz3mWrobE<4(uH%r9WE1aNe0-??^rk8N|9KFZ*GLQCwMjtfceXZ}pXi*@Y zmP=5>WD4>tA}#*zEQayXa&9G(Bc)W~Ldv+? z?Y9*ic^|hg0UBd<b#$o_^j#Frseic&o7~FDVl(r|xI4mdbZOB;3<9jrLW`C4D~v);CvPlZg78r z^20zg^z`r^UW0512>i-7fD1A=TIE`9|_I_hm zlvE3G*wvMhB0nFTKiL)upLA2BvGNRRrow|Kcn57d#5B~3ir4=DH$7L7dbWrFE1WuC zPrcdgI1M&rE{QOj?)b)*;BcU=|B7eMj%xO9XqN;)E%7;#|Ag1>aLalOi866Vv z075B+lzJ^$5F~P3+1-*3r*zk&x9QAP7h^v@fF9--?Jz{*7E^E)gil57K)FJ!#$2Z+ zc@{ot5Re}99e&G|jk*Gq_h7+h?L4H&??m^Dn6i}%8UFf!7tf5-C57*b|_Kq+}Zr(T&J5r;mVKf@Hm*p^flFjJ!5MH zLZ%_uuRkBz?loO(OxlkZ4%pM|oy;7{D|~1y7n7imxh+X@NnoKopo=RwDdIH~ss$4! zHDz+;+toUTe^bC(?z~pfrczq4%%Vb9+c5hX(K{zmB@j?{R9aW^H?U~?5ydD7SbZT> zZ$PUG`!1C^(!6ux2RN_*S33Q2`Sh4{b=EVm%|*%%61uen^RWlVP+3oPm}HhvVsh!d|KjVS^SUpGOl$g!!7L&1k^yT>9%JLN{>-Q zv%+L$zUsqH>MNh2jtJ$xf#u-O1jGdGE{7NoAPt^BTY79T7j&f3m-HsecSV)Hr$JE$ zz9e#nIy&Qk>PfA9_YGNRBpu>WN_Gl;Gh5E-4kG?m9|c-b~soVZ_>t4nCo=~vp1T{ePCAmmblwJ3pznr7MpT&^Ti zdu6v81=PC^wL*rx`RXTcniBy+`m)b}vmHJT_}_rhYyt50@Pg=0!*?kwAejCM#kfa7 zydes1gH3h5iS22oRmou0mOxn)Y7AwnLmX8kQZx$Opu_lFl3l1pMH9`Xtd_x&I{F?p z`gltBtdba~Fz;LuomcB36o z*7)2Aly#XR?*mYq7%_wYX+Q*&8Ai+GuLxvVcL_Yy(G2PtZWciYl&83~28zKhdXx6N z_cs*qqfi!t7PIx*0p_(S8Otb>SIkpuy1->&AA2+~gP>=k8JwNAvWE`(tOPMjl)sO z=XA7NiF2I$TOdrkMMr%jmj#)EWH}1rc_KeLr1-=G8pT|> ztQRe~gVTo0?Hb}WP$)BK8-WO?##82^NZGPvx;sdw;R-8gug%Gu@>%a(biY{WCb?8nu7nSTMpZQV6Wvxd{(gSnO5DTiK*ag)OtW@x z0C%O!fqI6|%#izFhy9j@?_V@SOdCR4#^+y2FY=C!$K0JNJYX5nb#~!s9$#ApcJsG< z)aAIS`(y=B=i5y1uC36d2@@5Z}EGL;yCIz+8<8nx9_(jm5GRt3LYOA+%}JO>-5vr>AGFP3$sfXJ>b2IHzZ^CsMItxGH+It&Z6^ zfT~q3DnRp3G8}F$xRH^O(HQ#lY}bt1DmS=9IL1EO-E$j>O>K#R<8Z#=T&Uo0X@>mG zU6fz(!ZW~59Mblz_$zuwzrWig1CCYT%wG?by7rKH-`apG+r4Gwl00PlVl}&`2JEBm zlC-&TmI6=w>8W)sWwCC<=?MF>W_W-K9v!_pMDucOP$=Y^qeqXLH#1zEdkcj^V*&#MpM1NawtV*AzoRZ^4&3=CM+4J4du9+xlX))EQCj;z#^41&B>2{E5N^;K1B}yWZ z=Wnt6j9F*2n1}sb%`Wo$GyoD?ec|4mq?%N+=2p!J-cptsoLCjB-EZGJy-0u zvq+z7P5tPPxbd$r-k*}{PYX8#t>8-RsT+UHxIex42NM3bIrQfk3-laphQIqgSkj*x zstc}NyGBv5^K8HOifosAAlvh7yX$jva;|3itZ!Su%h>O;W!*?m!`Tc)_&sLA2KV_N5b!J76K4tU&3+i)Eq3G)h|eZdB*VK+==g$wj^l#3|&25s@|BFiKKM1Hl0_(p>toRi%#sBL|{g77tNlov6 z+pB+u$p425l@GGvUnc}{$knibMEwa*}i`&SU;o{zvg1bFUlgnYAE~?y6NxY-M{D-{X>J7DWv@pJiEm^$v zPdXUpuf4H%I}(q*0BYMF=9!4xrluW8_-5ccT3qDWtIuxVUaW#W`?2eb^X4(J{o5`c z6Ycx2X|461o{^*9=X%ln?92YEd4ezOn7eNQIUY zMhCYOjJC%fABMgD;xGLFet65Q zasRSk3b+x@5-TIqJlLb7r+^;{c-1tw9RBPY(LBs$YLW8!AvM+2UrTeVhrb7ACS)6A zb&vC6n->GkiSLlV+fmf`BGE>Dr(^qVM7zP5$Y#UQHIhtn@{oCy2g=*CXU~pS`z4aA zF*Ei#Sg$Dhznk{7 ouvi+}$DjFsZ0JONYMKkqjI_cm2d?tb z`L6;K^z@8B*8S*55C(w%9DRtcaZKxnoa$5-S z-aff~u@4G7>jmUQE(3nGdjSl{7eRN7^z<&&AF=`daPFEv@Hp@>FgSWYZx7Pf)BDP+ z{sTR|AM*dv36FB)$J6+E{zO0Z@N2YCJ+$t-i2b}ROg?3%mcDm#{>$a2HRc$83MNA8 zP}MfuT5A|1Bd>aS{E*ET@z`HiV2@vkTxvNv!=+H#wQ4cQ7@B%!pY)fL^bMY7C(M0m z!G@OVH=gtaf{HU2%az4W{`zDc2$$9i@3Da`_RMvIFg0fr29G)CNhFfFtDgJsm?aOL znLL3@_e~q7L;W?gVoe;A&q2$_CLvfUgucipz`R|jf1Z2l39e0_{n67RP&PXjY~TFy z(p(DzJ*%&F-JSJePVDql3Bum3ldDy8Auw^)k52)+eagv8n@dVy;NAUt$1y4qGq_7jMyq;8xf)sR;Ng0f z(C=1Jb@7is{wOFgbI1>N4}y2NY7{)Wk9NTaA)C-!=sjxlNSm@aSpad&knsH%Oeqpm z<{SuDSQ#as9h>~fC-tDypu&r?;58{FVbOQ_u(ZiMug>)8vI74N#s~+n4_a`N4vdpF zub}<)_Q_ng+0RQvDOh<*!6m2gi&`N?LtJ8Lmwbv!ODi#7BVRtcxN`#0A!VuD<%RHE zYma$HkCW=qZJ(xKFystTG|VO;Az>1snL_l`5!5p~U&2c@&ojaGAHl|03(tHowLy^3 zDbovmRzdn-@pYeq{>v}Ei;q@-k3;3H>^v*5N&{ts{-Yxs zDjylldT8f8=$9=PeiH0?Y8uykrjK^Sw~I#>wvnT!wV}x~=HgV(q#v|HG;rg7;NDi< z`7C^DVqwO<*#*pN2HxoTY{S-NehydBR$((*bxI|U4d zT&&fIQ{3$>7oU1QWF*bh(EQoO;OVb@c8W3bHu}Uw={x2JOBDh=cY|XP|aKcWOU?-g{)ejGNJr@wMc=(F7eC>#stTzq$aim*xw=oL?winMjy{s0}#OV?%tk01lonV%VbFeKIBt-9eD-=nN2#cPfeLM+DCT^Y%WgD3#Oa z%xz!Ls$?RfMtuKnZj`0V`JZ?R;#Of*Sy;zZakPmYw?jTYidnd88RG8T*phP`M3=H* zT+A8EmX4!H%%?!E=@&kf!l&wUjJDzPphR3 zb*m2cPIamL4eKVvD6Z;Z^ikeMPHGmP?fl{O=qT+3NOLEqar5B%a%NTdqRO=|KE)>d zpit~{tEauVS#oj#+WWFQTi6qpGc0F@7x4ON4s9$^EtP*w>gZjQE4mGy|7kj@Gjnc) zO>S(RhMaOkG2G?8h?=$Cy&N4?L4*UirKh-?VqnRvBKHs3!;_sF%M?LyRaMgi46&&+ z9d&J3m^V6*l0i#ZP~ZCsxie+PV-|o#lqxIsO)LTx+z>*QM>jYcgr>poxl>(a1<2$t zW^6*KF8x7^-LR!@RcXV=u#>&H-z!G^F&)y?8h;M;qIqQB&&%H2P|dcHiXE>wJ%K(uI{?^kkK&DJ7#9ZmAj)sEuJ6k^@@oz}HrOS`RRoJjG8P zxPkX|pFIppfAJ8PHfaM(dtMQs-sD*7FxY%TfHSR20;mikmTSE1Th2~9Slv2*kT(2e zD0f1w!IQ(=KWp?FlE6^0K#38h#PO zL?bn<*QQ(R@iHv*;&RC4_? z7-9!CZrW7WRs*e+6|NT24bC;FJvsWWRUhSw_B67gGIM0gG_xv8#PT1eUM%&j|DXhO z{XE*_*LY+m+hvXYK^tiC*|OlcmaD^81v=* zyw3H`zJYXL(s|q@EIlLGTH#YerLMOm#l_OyG%e!w?vx;nco8pt07h6LS58=$-->_E zgIwp+YLh>9zVcBLN4zH~n^mq4f@H>U>TTl8SRiCVt&7Ofo1Z=CshweHDq0VG8)Jq1 zCcYi94K0>q5Hv0BJe1sDAuV)KCzc09Yd4IBb!gwztGlPWBm>9F@##_0tV_*qi5=pr zrq7wqF+t+g5$eT?LpIgt@Ur@Hzm$zRmzP|I#ECy?b#yWL<}MEW2JNA$^9VNojy9Hv zpr~bYeF#k5^(vS3xlPU6A_5p8X9}`}22XTrf|I2&=tOOCv)h}mtxA2X$V!2rmDRkr z62B#@$#sWwR1v#wNeX;Mw;iM%Ywvv7`%7G7`aa$#7(1zot!ynm6pdanGFdn2U?COW zWl?@k`y}5fJe(k{i14Fuu5G?PJf4G5k2CYjpvJ9fM|*8&H)f7#1acBb6S~|9vrnGj zAx~gRpGdN97v|+em%_Ol*hXwtn|5gDs7v+yRsYk2wv^6+xxt;h9S5*;R@ebaVo#y_ z!-#L;$-6(;<-I-EphdZSo1_*0>Nx1j`7f@O40L%(y6A_ytY2(czhT6|=37t#NAcO< zZbX>0LJX+fmheBZob`y76bt0#vT_ggw3jpS0eZraa6cSez^#5TU2BiU4u<70P_M2S z`(tr~-YJ5k3V)*$SlMs{L>R@nU@yzkswmpv(j69cq#E(nA1a5jdZT%f%l7)2mkkaM zdaZyV3q}>O7=ZRy0RCfLdWL(n5xc9~{5zg~wT-y7^+ovsYW9&E)dAL38*-xiCf3%} zEFo7yyAF92v<%K!3Vz`!)ly#5LuLOXX=$2UpoVPd?Q-Tz_T)OO;t3P9Jzu+fDvg1H z(u5wuupC%SMY4NnZ#$EO8ll(P6JDtRVG&SEj93sj%zQR`sGXVErVSvgyKSS}sZle$ zi9cT(vaGtbM#Lu@e6s4r%9w}ERGX^7Aj}1rc8lX`x{uT9y1F_h!3Q&U!wu73cy8!T z!P)yHX*NR*($0a*Ee-1|v8pEQs00nn3NLI|Qbp;KP6t0EP%z%4y3gq(zYx)BkMQ_> zn5s3;T{X;ruIyA!LjA9AdA`0pka|vFx;6X-&tzsZFV(tS$zXb~fs!j65@gdp*z~@z zHH@_W1QQUzszQpr8okZ?%50Mh6T6r^_;q``ABfdvS4j&};S?IS3YU(mBp6N@rxHt{ z90q#D7n|-F4JAtT(Y@7F;c=E@Rq5IVp6hevy(6Wk68fB0nE#CU?!zqQ7p~BgH8)(s z(i+%TDhu{`hs`Gbs*vEY?VODHJ|ae=OlLEp4B2t$!X0BjiKQb1S%>!?*T|Y6<$<(l zFqPL-3wXhu{h<}4=85a74iBmW>&ALmGb5UjYq0!clq)^Q(8G+N)yZj7O+j2wBZk<+>QTjJuYaOKd1}Hs>hN3pt+#T|8#YR(4OOcg26-V&Pz#@@#v7FBVtg(Jg%dEOx7- zZ9Ow)^PIDTT6#CUMxxK4L(bT$bSveYPNw#SRrzeqDG}&Qv}BI9x-S~08Y$a#z3m6q ziW*5oMl1xHHaXyRbN|E!+~*~BAGlU6tK@!4j-qQl#wUVaO->lEmV6A}IZ^l?8Lq+#!S*Bh z9&s%JDfX>z?nF%?7pLnb((&_8)=bt9dfNa7P3x&8PQjj>7vq|5CZaErS(FA)L)gL9r@eya{Z8pyd;-dNwW8z@gHcYGbH$5%JSt8~R@6OH={#Wr1JD;`3{wIK zEUpS^dx#|@u>zyP9^;fHp_}r}-2yyn?wM5+cOEbI#d-jSS}f;~(4uHjru)npF9sSA zv_bCn<8gq`Eu(d_O~sUj0WY!KVQOk>1eEs7Yqn`g+T1-l96<=lLEJz-2m!P@(H;vD z;Mjem<|!N`g%1L}2g5_VR7UFx5D(WyOmjWWkcU)&Bnd#>wX?OY$GOcmU(QD?$|Id5 z?!jbln(~Ae1$cImyhDsOCqS?aq4?Z>5Sb)#5NIvU%wi}!bMa6$sYcW6JeGKpCa1~a zmEvPp`7GS@Ydb_5{Q!J3w{3H#LRA!jAD-We=?2-R+R>E8=#`ywE`ju9BG@of9`1+X{&6RGw1x1VxGD; z0-P|d_B2oWWq_@sQ|?!CY8FDxDrRV3uJ8x| z;+hmc(u?$=Gnsyl+jVY-K3M>Rc6i6Ovu2_~r+y`A1f++7O@&T5(c5`bwvt9xUsQfp z5!prU8n@EOIpJE-SmER~4<1*SJ`qVUvFNfa1P z;^48#MFtj=R2tbk#Tp$jkj%Lu(=nWEyYS<1iqU9TMu^VE6NeX@cchm=Is|@`I>0y) zO;pzc`U9LND*haRgL~BG4=ta_71U6&2m6F1w)K-o{sY(J$7X;$6cDM#A8&YtRL=#m zs|>1L9sed1WXIK_JEaQ^ao@od zS77(Yi&J@~+<*@KYRny@3u~M`JYYKGaD?#yMo6sN6Ea>IV}-6X-3qm8o-SmP?97}) zuRH6d^JhDCapYw2-^(UXqsXuOfCOlZnHi_KY?um6uy2UW-B2oBnRh|-A~ldQSM6>B z5<)B*J$X!V!4tPv-x*pcB6Cigo12F)GKU-lkDPT!H!Q~BbSGv0}P_Z1G+P zq{aWz(DeU0Wqp6bKa)lO@hDaLw~fWgm9L*Ydo~c&us8WyW(GTWJB3J zQn(q`$r7ZhnmxQq8Wr-wq|?>awN-b?qsNZ<>F)*nrS5`V`rMhB3dwFLI+*f(W~PW@ zGDDdWm$=0JH zCvNIYm*zmW;J-|jCO}Vuv2=cpp&l`Q+YVr?{yTC19}n@rvQT*cAO0TH8%BUmhi&EK z?VayWY^d?7Ki0hq5DNKMu3YJZ-n2V&{(QBGqYi})m~Jv%Lv)kkKH@P;fp~#*^ytx> z+vH|}M|{jgY3^gz_Yfuaat_*O-{j4cJxT14rs1p`co6{heHsB?2UDLtTnieN`VA<# zAnB?>p&}d^sf*?xT{H+wPG+4?I&=1HSE|$L_F`Ot8d9e&kVM%ZoSARRAZJF*9tDEY zT;d-ZX5iO#Ea1T>mr{&eVrH}hARhfI zh!Njr)Vb6HF?ww@yK48IJ!hL+%}5t7>hqY}dBDLll;Rxr(ozP{!JKm$a2tI3^s5H# z+ncI@c+_19_wE~FL{NKrmhGBecIA2m_LnD!ZQo3p!nNc!KGI`1XO z|7%(OKRwH%8b(FQBp>{(Teg+{(eQ%El82eYsarro_EeqwUQ1GUcXw@d_49J^Q`v48 zmjIDaByNMNYcbLrQsSc02p*2*s#A5UfmK;kQ!|h?7G^$CvXN`MHa0GM^j2}e zrGm!*Hl+sC)FfJ}ljZNg3><|Q@Fw~2I(3JXI4p})M>S%Ao|3qF%^IgvR=>qtq@Qk& z!Hs3XRnSVCplt!3o}TRat;yLB9$533=i5JhL>K2d#W>|1#MmH@){CyvE6q0n=$U!OhK5M
XUA1ZzT1}2Ien+%n>Yf^RDtTN(cKc62XlbMYP{rfhQ0@d5%qO&U z5q|;eTe=J+LX3|bIg$-Apb{5Tf5+pnMcZjCH(QOK7a@V#zqhw{$fC`Rq_|Qg5n$9N zn>U}&;BHr`!b7=CV$$L0KMCKwd6N$A%5`vZ3TrpXg)ChFG=rGy9A*j>1ouDSrfwl& zDq%puD3gcOQV>)Pc~KULMYJosHRLV8b?%jydy!5J{_D2Ayob-fUoTW9w~ft8p8*AH z88a$&=Hu9t(p{#cSz7d4Rj?pc)rPEIyVf5L1VTCuIg~PtO``nJEv(7=Qv` zqHye?p@8Y`+1j;8JwqGk%)QRe5df;#7Y$bQ0H%vcT(xc6HYyR21%M_Hmxw4+d_En} zsZV|!FZw+sP@^4l#U(2hVgQ3k0x_pjca^&Y1GMwtF_&;XJ18k(B} zHf`FZRHBFNft@2n2S+HC2opkP+GZ%Na4nz%YXLc_4pm2ip)&bcLIyXZo%+sr<+=LD z{as4cAxHzrhp^xH4p5akImuU$ivY1O@JH;)ervh@EBe$%kuk$qfKY2Nx|~8ayN`?r zPfu@M=yCd+DDWR{4}1^6eG_ z?|BWs+bXFbzJ2-f<=K$SQhKV>sD(&elB3HAB(?4)?{J0UF+ub5^X?ug|3?P)&OAlx zWaf}XPJz^B-ETP^5NhEG!oQmazqd&Lt1Z&IoF0&xPh^qp$_@+=yh;~1W5UI*4W|Ji zDqLLeM&)o$6#+q#qoZR<$LGO1ClnYE09X|wLdbsf=zU#FHiFz#tK{-_ z%&ad^x7q<)B}Zqvzhdap9y4qb<89uFz`^10GUGP7YnT4f-&3b}VDIy8H@E2K)?K@I zhqjySg6asBI1Grvq_;)EtDt89g5fJidEY1K;C2mw8y_wQ^rshzWmLJL0;m{&n*%`O z1*BBgJRVOs2M(J3dibU3Z&_G$YU+@N*r@@Cy2ahoJkXmXjCg&U(}->5sB=&~&!7Jg zJ0NR*1#}QR1t357;opoeEF=!!1orv6W}WVkD2b~XTle!h?3=-0c^dCej~;Tae)KQ+ zp1^w{@b4D_e`}Z=db6^!!p@vIqa(b{W@Z)C-nZ7dMqH~Ec&p)$QBLdtjli-tJ>n~jIG>9;XvE8#B`^@`dsjlWcS>6E!m1) zlD>)OKo#2q?48hI?$tX%P>>BYRKKa+OuBsZg~&de?Msdt8XQ}Q?E`=et?H^*44N`kPkGv z*}5us`)Sl3D2ufJuN}U05J>X(y)F9zTYUi&pS}yYpa1=DI{hISemX1F>K$n1M|x1m SiNFzhUmx~9RCVz5rT+lh=bViI literal 0 HcmV?d00001 diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V04_single_key.png b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V04_single_key.png new file mode 100644 index 0000000000000000000000000000000000000000..88c5df4533208a47e8364157c1ed5abab1088222 GIT binary patch literal 18676 zcmeHv2UL@3-fz^g>^OoBDj>}nMiFTu0!p<3qf%6Q3kuRnLhm71kv=LAIw}G}fKWpZ zs1zmANhkp##89P^00~L%6K8hs?%8j@TXw#?d(StXgK$Xl=FRi`+pj!CUN_X+_wA8y zArQ#ED?eYp34!dWhCsgY-m?q*CU)L(H~6OJt7GA7jBxS|xb5u-F}UsP>4EU|aJ4_` z@96F0ihwK0E1r@+D|^(%*VofWO+f+nmuJW$yqy)^So?K?tNh9HXGd<8B5vhJhvD`cB`~10(aH4eL#z{5Bv~5boA492;`T0;d>#F z^WScl2fvfv1J(la>YsSv`qgurpPyZ`wzl>gd+*q%*hlW_zo92)s>u-Ie^HmD0g*rsga>xN&B zMI_+y__C?YkfeDWxmaWR0{=;=bkB>MJ@H}vOGx(Cl(xdPBw1nXP-uiIBpJ3{zUlDg zlCz(}cNDCeegB5B$XQs#33;xMg)A1Z>*jIRmebIcr|5)6CZ_SEpsnwX&9Qa7`9j?y z{t?^`Lu}8_%e2a*7dj_mLuoQJ92t(e=nxaru!vlgi4gVvd2>Rr(ZWh6^sd)hkDhtg zG8V<2D+^qn@LuguMWZ&+C~4G+6zZmY&F80&IO~G-pLWu~JIfWlG_8_@9G2fEcx&$% zlT>9U8R3QBkNhktkw1)T6?RT|SX^FSE*kjhE^*JK`q*;ZN`ZMp!3%ODdmf=6!R(GG z$g<-y2hep3wHP%Wq&Rz8&wPAws4L9BX1P^_5X$P$(=tWgJz(#pPDoXD zZ&%7(jybKSX0?+eQ(SFtwJx|#OZPsN>v3o=lDz#n9Yl((5ZDB+?XF$B+V!-VJuB_z z_%e;P7yKCv*kUy`&U-9qDt33PkeF?)uPZ$y=Etz{y>=(~1DkfV*ZR%1&q3E~PiK*^ zc6OXe1SgldX~bPKB7{&_V`X#2b!Lri2Wcqw07|Z*p+Uq3;q&9cHoE_iBYGvCyAkg* z76wUH*e>6FnzN4LR>@jbydG9vTFv5aIblV%{Zw2<31h`_<#b2K7`Q6aySKRPbu2?U z1|lFCh7D|yX`dxxraXQ~Pd?2|Zqvy90&doEsLYYL)+3}C?fN3kXRfF!X|FzV$IMte2a2Q{&_;66-4w6@0i%^TZ-kXh|a>EgckKN^=B%^GK}B$Ty@ zha?mi7td;halbfgtIyYlGW)WM#bqvsy14Nfl${U zB2`n&8<|967lp&9syaI%dY1zNQeM!;^Fk$M+2-*9+VpDAZhN&*55j?VD&hW=#?(35 z4vCHPm&l~%FcESOi^Ao&)y*y}6ywSVce5NMSc6XFGpA4Aj9t^Siy62-o#xfpW3Cz& z;}kljPRVx?_yX>dh#x6sbCxBzUUanz|M}rE<>oVNp`LiIyx>S@FinBRoPLJYh z=YHmnOK@(V<7`l+#Mto)#uNC7+I0$wn-DKT_Cx+4zjtt(T+XiUi*cHa!Ji*gQsdO& z?Q=1)u|&pb0A)595%^A|l^gc?iJloM+SuIO95!GZP*hn}70-o%EvqCbG=}!FG-h5s zE0}8zl0@rR8=j*jEVsG1yJP0-S39ZkAzRCBMss*-|Im;n*%579u*#%>+^ieU(`L`? zgM+OI&BK8$>&JnhnN{-=Z#2?q@lz=#%z?Bq5J$PVyk?|NX*h2!zZ?}HOXN*yt=CkJ zkB@hdXls2&5{+y__4zYl2ya;Tkz~Y#W&g zicCOz+s0CawvFx7bauQr>LYXhWmx$S!+{?a+dP))+Zc!00TUDzTn#LH4AG^zJ~G^w z#aUa1y{fAVxz7q6or9{nrjd$mtwTnr;;LZ{adB}4h=DmIW5^ldPabs6Lgp6sFzIvd z3aCwjs!LmM&NWQiYi?N6w%z^k+L_#8eS=W^#8f046v5$a3HJ1CS;P8Ro&Tuc0L*jK z)ZErIr2D>j#f5z@jSNB?f(h1RWp7{$c-F!!Dv|&yL%iWou>x}D8g;!Um_94a&4QG- z?2+%jnaSOnGN0mZW^pa&5od|7uf$liynhoZm3fx4E|e3rUPrMFW0Yr^W*^}dGeURw z^-a!tqh0K5D?Ul_Y5(-w%V`4HSUOV8L1IAYA)d`3V~k_mnoWAUkPAM>V_Ldy4vpQQ zM@X60*;gnG*L&02S9q0{*d>+Wd$Wd?WZjAoU|ny0kJwW9p_95EnK!fGyjg`}Rhf!p zCt;EE?wGcCoZkFI*;1rY=*wD7%2-((e zOhglGBDGeC*s#jrbc>CXy89ehv07rV$HLp&!~$%L*Tu-bAAf#)D9hoX8jhOm%?1^_ zvQ{HEgP)HN?p>$UrBNO_XH7XdKR-`=>{cPg^!Lc-?IeaBhQ%{ybB7PI5ZqO5t`A-G zGRwX$l6#Ir8SL5|=SKx@&KRMrGB-aucg}wIDDvelJyEaAn+pMr)&|7*p20&hW-)dg zj2*kcN_q?3xaCNU<8U|y$Q$nh4eit7@Q28o%@1eoeKmA*E_n7LOZ-Kgv4MfAp|-%mcj9KYi7Ga) zuV1Ixgn`!&FoxZ+vn!xeBJLA-bvUN$VzLK0Wn(Zl?vpOtzTbO!qNP%TyV3S+EFk6{ zNzYK5HPl6N3i)PTN_+Fe2D|M9JtJVO=EJ<9CcS@lWlD1~$arj>*Su%dz^L$k7b>2M z;Ur_siiSV?SB(X`(^L1OQJmGRtRC*}Q{k*5XvE~!2cQP3jd8ShXHa-R^2s#_#!2Q&bZwGA*D;M8B>R`IimAICM3HncbGRi4JJ7d}Qb9=KpazRP2*I<;!i%F!!4!js# z4|q8kz@SQS1L>>o(A8AA&g94^UxbK7(OenE(7sYTdaFI@YW?x%d(J4%6iUmTE7Ih# z-}7#gF}K?^D@x-=p|6PXKFNfZahv0;CBeOC_7szEuqdw$t&jk zfdYbcsVB4o#r+3wr!{14cy9Kh@fF|xSQ>7qpI=yYj-!%274A6>14hZ4|W)TWpNOu(vmQgd|Co2o%^(PJaO{o zgLLMEr=p{BuB=hInYO@A9 z^8JE))Ap4H4n50Udo`=PRMBUM!8bpC-dHMwt@jSS`?{KA0@Ij&e!mq_?xjArx41=q z*SmM`sHpfb8*PraY>djYhZM*pnDr~t2eBR$i-3NX1GyF}NhECv(#b;Ve4m?=y==EO6a^`lP#5H zq;L+keU~W|o>!KG*G>ucjE9}p)yz{PRt?E%M#Mg*YPHQ-ef&~A{W7{h;grfiYe|iY zp=jl3b;j~e`@&bEtiCp_e!o&}_CD2->pRKJFb1UPa??($&gPcvWcqiCm(w>|oW^6`%=Pui8g7A?Y z>AwntzMs84HJOIUTv1#8p%Gs_l=Qa8w|U}*Vn&75_hy1WoVxd}x#jf{*jzI~3a8Ac zcrc=9*r!VqlRveb^P#z~`KSQ-=Pj5D{(Pisw<7h&xN?y`ABSzZ$wB*#gX6WB(*^N} zc?GrHRLfHWikdZ?hgKeFYc&InI|KSeK~&Vho3rU%Ny9?+xASKzl=xE4-1zu3Ujpws zafTKdeGRriP!Hey5fqPn@mmgl{Y)=0hf2yuxTs89YXl~8Hv4v|5wy_l2xc6v;Rna9 z^A}1O`PTk$$D!27J#>F;qIWi1k*K=8>#|R&5w&%m(z`arr1|)5^%v#fFM1;%PJGa> z_n27g7@8*VJ}#_+(_)}MAM4O!NF;51f5+Y;<%F)woMfT^*(!@9ftl8QAA6uXGB0u< z*7fyrm6PsiDS->>_=?E<3Pm*`Rx3$w^JQb3{==dd_}aCk+$#uIa)!SCe8zl!TOU1F zwIP2v;OutS&?kaezchEMK3TZUs93_B#hk-p@d^17BI@!xTyy|NcxsuXXr zuIYxG{Q$NU!x`7sg;URF(VvWbxOf41s3$+pUAS*^V#mBk)c)^T8zeom;{tuTd1?#r zh7QGy;IdzsISNSE@;;F{)|8=N{U^B~QhBB+^2UCNUI*@n5A`%TM1F3mJiu19oQtG^ z0rOZh-J&jS?v{akyc#JF*~(QTPF#+Sn&J%+!E~Q*9x#7~f1y~|9Sd>L6A!vG1m?TBxnds7o=)0|Ab&qwylF3lW33Jm$qb zxKDHwQ7|nkf0CZxeq%@UwQWULL1LSINfXN6U%CiG+Li3!*YNB$jhu^sL|qrUU9ROO zATc8Os>NRYnZeGC$c=8OcK^?^9)~2(WEat&kX`gN7_HH_VIz}!7W1;gm>UrvU05xG zb~a!_ukOIrXvHd<#X&3GOyAANyXKb)-O#+;5-}Ov9l5$e83Sl6UI8x9XFFy6P5z$4 z^uFe$Q_G89QOGo@C^ACu#2HWE7uth~hI4~y-43o(iN<6@L z!@Q}39SQ&)+?(~xP%Xb!wo|0WgUzH@3K(g4ZFy0bC%n|v)Cwr^+MJD!F)jJ54emVX zchoA)FD38X1fhYq>8o0-FXbO#h((Qe}Ze0LGsJfyW;9kKX|E-BtPip%CN|Q(}V|To!P<#zl#{#Lxt0Y83Fd5$M($s-S zD+Ozu$JV--|KJOT9hYubIivy*G1(zrQ^keSJmqq7Io;q(Uyt@|V}QRuX^hKRR~<=u zeT}X4GPC@}Slevg!otGrnPmiTe6MWKui}RlQ{XmPA8<1v3pIKE%?I>6mTdGWacZrL z%fU+k*TdTf5~s_Ji6@vn?slxU}_GECzGd(JUYN+#gmG4p1Hs=YJ_7S~TOta;GGN(`YCM^jG}da?x~z%+KU#l_dc4!e{1 ztg9V$U|2{ezK3pcLLO>oFA}3gpVrf^wK7#>WEC%#tDD=esX#ByX4eWhOI)sd8LKGy z6QgeoRyA5MbnHfikZm0Mox|{Qo)$g7Lrd2N+c|ObJjOjbL7fr{fefbFw|j>^5nz32 z71W`>J6D=NNMQkV=T}*6^5q?jYyCV?_{~lT?p*09;|FAmFYYg}`?b*aW>?rthKhs> z)jO>+04MuOz?P>QPen1e2IdaQ+-T$AAw``s zY7lD!*;;|N6R^v&g96dpJJ4q@U(JR$&`D+cTTDik-AQMB(dWtbN4orv zOZ^U-!#}f#6-1zml&eB(yd7QL-P1-7A2U;^Of{fvPG#Ad66J4=^-d*)4el_@`oPlp^1Cplw=g?9yK-CKwD0ZIiY&`@8+Z|{ zh%->`>#_y)D&*JfB(KCbi2HV+yOXcH8DVX<4ii^l~TJY+AhZo0`c&XJq_~d zchiCXxYWNauwg*xBYlZAFSfB99v*h8(upKqD{|@!Sot7@2H{xdJJj3jB*Y3@|?Y0O%M6PO~|`l~53=wPdGI4tiUz zf|m0dgOTLqnmYuCt5F&mE};@6Hzxu)V}1-D6{Lmd7o!sF;|XJE^dzE%)~3 z3T>8nRM|Z}HGhcoJPZ6c>+Egp@F*hU+MnZH$TY3^?%i$5?%h#`WD1_+fIOy)sO_+- zg4>#w>uJv3z;*4yb$|>)17)uU>HYMv<)LNA@UJ;!_2(QK8XcYYvD~q1_ijS{H#W)c zAa|1#b{_d0eGme{nI297s^{PKjepos{&?W;dhEZ=l03v&6`8p+T4l_OWnnN_Ir(Mt zY>t@{ujV1hq9P(Pl9H0EqX|1-U@(}1l)sG-;ViCW_)K({Fc24Wgfkp>Ose_>5(Y+PItZwT_5gLQ#YO}qvMfx}=BP+}MQRTI3kQ z;NY)CpBK5t;BlR*KZochy!~D`g<^*keh%do_`_iGm~k$ugZT#p5DfMSiHgd0l=1&Q z()%AJ_g_|W|Jpp!ZH6Jbb&Lhl+7ok%ieUcM2^J0gUx&~+SQ=0Cy>sUdKCSGiub-c( zvatd$UZY{gyeV^3g}U1K-6`bqx$LT{sx#T;AJ1!CS$?3XdFj$6$H2hqLTH24hg?@s zMspe(Y_;9P!$>yTKq}OiT|8RVjhq|YaR}?!(U7#6{=&(} zK-<08NvlhxHRL=M6xeI!U+({&_K`ng_+K2ue?Jc&&Jq8g#WWif(^&`R(Atc12fh4t z8qBz1GHRf#m8hM`3%8nAS<2>0S^Hb}`qZiCPQ1E0jpHfHy#oWP6a&lsOco2{iGjuv3+w90Ax=B$omC7w`aRs2 zGKbDCZi^N9uc^AeodG)_koS3Jm_*9fq^jd+t)C+>4}d&R1ne*xa9`OKRU!`CCLaNG z-xLGHb6Xl9d#q|!+GW~|ssMv^e83u5Wr8-|NkIA3zm&}S0;3QbFgye$$j5I1&<)SE zMbe41m(lVkflZ^WohQP-afIb0%&QL*{O9K#?-ZIXSU2JeZR>D0VVg9fH1L-sIBQUa zc~%7lJuyEMSNCY^ZD;fVc7A@F2gc6<=C3`Vku|#9C}&qzz4bwUl=e$TQqq02oE-m< z*Oj4SDmPaWizT+x-52G|DU~=IpQ+(qRGTKTD+}nx1$|&pYv*x#nd|jaf2uui%!A^& za=N#=)aD9TfRxM%kT^~rZZ8d4ureHW?pk?oTdOLfuRc87zD->|->@sJTbxKS8*L~K z-4LuwLa}JvBvXu8qJZJq7ooGpSto&gDuj?3vM`gf?T9@;3S3;%Edt|b4*dw;E;_aRcr!2E`>HTVk*?rR3$!vx38;7#%h2a$rBDwW+a`Lm z^ULPD+4ZW1c^EB5sdK;gUDpGEzew)Y(^`4r_K>Lt|!8WRn(6 zf^Q64QN@`RQ%0*r_@VsUpuBWgN~Dp|w$ zuoDO5EXXYZ&@*VCpBc%|4PX`8v~CTo1nh>7dRnieL$q&&)~*ojI4dha*>5l3sek3` z3lO1H5kgbb`Jp2B8$s5}rh#)2c?^)FC#@~ZGA8d*TVx`+v&$C>x z%Eh+Pla&`sjTY4TXN0vMbW3S1M+U+8yK8wjtm4VD0U1Bbs-1$<1~miHy!>kULhe`w z_eNivT46g^)UL>cX`5~WpN>-0UVT8%UCFt-& zwX#`31!aU}5n~PG@WnQmO1Y$34By_dI(bZY+oPU?CbHmAN%uMUr zUY7>mU*FX-Dj{<9o`KMSK}qV!S)3k7wd}Y0M@9#@5kvd`)^eD zR5!Kq78z&QGzw>xQSCUUL+qFFiSyz}lfHFhgq>&Phi$~GY2RvV zHB-$juFR^h7~#HKOai_uS@kS-ZrR2MBiL*h!@Bko{Rd#_s-z773cavOz@l1hstyot z6^}v|8>-aeb#f=)GOZEP?p)dq$2vWYTG^mU9|9YVtjuT4cpzlR0Tt+wf%rr~N~1{R zKgrsBee)+zo=}kIJ>1j!qP}A<)e&8fAFixQEW;-NCzYkUV98Pn;MLAM0)r8zGs4-c zk<{6+E*H%=T@IQ-k@qt=Qq|%xe_mM<{R8{LuhJ&7us~a)FhlfYw3v8>_94VrN)*W^pBT z9+pOBKfL?!Ys6-UtFImyi5p#i-KhuP7}|xBV6zwLgfy>?46dN(w&P!u?dU1K5fEyc zr`56xK+Dkc!@F#9M7A1M*Bj$Uy+VxFt%31$9nlzapRv9FPrSflkP(e;+9NMYtv1&b zxB4DT=Le(BcLgUwRh+SyTggMd0wVXt$&n;w6v#z!k*CpDHt#moY8?gf{8D)*q zl~0~{4`?^!Bpm10SsPCaGtJ2IeETP!yE60KtsT5jpc-l{H@gAn^gNOlMP5j~;YdPG zQfYCjT$pg}3d`|x>l^_|*>bC46BVQ-2cUiZY&MfKzS3fiz=UZ5B}l`)>A0FITL1zf z7XY2b&3A1gT0wq_yCz60ualP%Q?kepoDM;o=M8zj=`ut2SQTkZD#lCmd^Lq5QolqW z_*$8O_YIwM4V@%0=4Dj#xz*b0r5a_cLyYEtK@^2)E!MA2)}1>a{cFJkYLl^i^{0EY zQ>*6emm?(%d{9xhrerqF)ZEUF9m+G|X2+?yLCHHRfK3-}Az!GY$e z&2g;>U^9dAdo7C0I<0~()zCOER`gJ7dbF+8t}L`aWbb`mH+dy<($4$CG2+!0^U)ey zeF@4`817A@I<=haLmH>n5ap@J0)W z2;KI*SQ}HVC6>b&)VD{6_Uf78itIM)pLpHUvQ5-Se-rqsPHWk2Mp9}EY_?X@2uv#& zNoM__9UCV~?~pKUtDB#mJ^}23dSa`GlT~;t;TroH3Pu*{UZaNbVUcT;0IW1+b3TKJ zFH=(XTpwO}D}%-8!Vx0k(g2CmfcY2drw-PP>;9#) zwU`gmgu~Z*i(Z`ONg{slGm|fBRQubWa|Y`ZJ1}Yi`o&E?D6T?HtiPecS7G*qEf}VJ z3(ZJHc6Mf?<;6BW1H;}O8SVt51*kbGjsx#0=6;8ntc-(M2H89TQ9 z<1XxR2;^gXKMnXFv1*!A=elm(xCOPi1CBWSSs3{pNgEe{ZY!M-JTGN0iKJ2|ceN1e z_LcRZ<2p)9OHYG;ycwK#x2gH&4$9F3VOVb8lxDdX0KwMx*FUxf@w*gf(HRUCFTzbV z!gtLgej2y+ySlr7Nmc&2<4B%U2eI&1lu#XMjYy-rArR)+UG9^b<;!8YPHrj#Yvq$} zD!`deDzsGE-Sx8}0j;neOnMGFNpGR>&XGM;nwpwQHPNv4rl#;z9=m?eBW*S(zEYf= z^DFsM3~i`gqtiP8!5IS>(caNfIaTFbK;Ah+(E4W&KtTTB(Es^UB)``pUbHSU!zhPP z6J^Yka5&r$?1nx4CMJa!1ueLIDAQuD)VAO&sXaM3Xdx^toWkQkY;A3cQz+r*rC$-O zNrO6w(PsyZg!+`uoO$K6@8++Riio(lJjNUgpCFMlX#}*nghxN7G!;10L%KY|0VK{( zNre@K{T8R?rs4{OknoZIpT2Esx@uX%za zB~;b%&Gh1;!szHI({cwW%1p}#)_>52V37Oo(*pio=)&KMab}&=@!K8tbtYeBpy>7R zNw>*W4~GfO@_#7QZ0sNXTh{RZR;Kyra>&tzNOc+oF!@+Cu#Ua)I=T&V>&Q1Yy^)@E zzB9c~TZSv$2~Uq*o2(OtK)P4-Z-67GM|VnAzhXRd;8cIeCtv);6D5{HYicbilY_le8CJuG{bIReBoYIK&RBZ8aeoTfdz|iu@Ab*g&h}Ve($f#}n%coQ zDaJoS21jfm<>#9oo--2>N!wbiv|@jo33%A#o!jL<=FsN|OQzb%YcG=vG=f)C%Q#7P z!F&+Nt;2h&Pqi&2FHxJpZ*{|hVKvkd^8}n&lF42p;zEt}DA@6&KUJ3;A}-kG zje(D?_uDQ5F!WzBjsI`;>W7`1aLC6T5L$M>?1a9a-vtTZ{OQxv;Y*Fo`!bB#QaaV4;0I#a3sJ&5kU9?V~h+qC!4b91eV%5MYqh!&XHhmU@9F6&bLvPv4sV#Z#D~^=wHpA~`Kzf* zT2c@C}|O9pU`{$XGF&G7lZRsR0QBKq(4iecWL zWf&5{+SHU)R91GtqrbYiMeZ zrPI;OSAlPnGl3WOw1$R_85UY}r|;l6uCdXsA=MLjY!lzSxloz*2q0MWuQ7ateiM7{ zMf|T8t*_WP7r@r{`W>k^nqoc4eSJ#L>IQSa`|dlFaU2$l09J-uywhOB%`!1`(tY!3 z_dc(Vf;Zj+n(i1*F4n*(H1hj#vxa{6;%Nr}V5gmbO$p%9;&Cp=@1Fq!cNIRkgvgOafx9#R0ital$`1!SGM5$MZW&+-+W0zF)C@`OSXtK9DOqhL?*k-n#cc0MHP=?EnA( literal 0 HcmV?d00001 diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V05_single_switch.png b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V05_single_switch.png new file mode 100644 index 0000000000000000000000000000000000000000..4c279e9fb536c20c56e45b2b82532aa7a307a26d GIT binary patch literal 17708 zcmeHv2UL^kx^=LO1r-qmrH(Qdq=`tCqK?9hf{664ARsNF_as=bQ8JD+X;G2hgb1M} z5e1?kMugB32oeJbgh&YiVv_qtXU>_q=iIygb7$^4cl~Qvi-jcS{n}IZv-f_veeSI3 z=8e)DArQ#sAAdM`9s*fi2Z5{!UjGevC5h><4*bv$J!Kzi1@j7xxESmSF~1lZ=no6^ z_i_I=+%q`D2L{zt(>SE2tMcvT(9pmTeRcJKzx@w2Sg^Nxvs2g`u*!zO9~?p;5UFj# zZxL_wJ8-O%89$ypZXKD;91;zE>Dp1@-GOmi|?BCbqXyR{74F)ZDRMlVn#W z_4q3JYOj;3qiCdZzpttM>GKk4#m%Z!mwt~YL)%nAVrmu zDbgCT^g@xu)UMC7s)3enb z6*KiDICOa~oj_e&Oftzc;0}<~q}*bI*xXDlzxD$2tP;zH2>g5wLxKNZq21JYrE6}& z)VLw^+WO6v(d5z+wBk>TY<8+(rBZNEu!IuW^FN~b#ZAlrn6h_vP}?JH6%D~n9UdNz zQl~`F;2pk$O;I6ikrzhXX-o<8f)DAMc;FQ8uva4F(zhe4d|U#X*OL1xCV4C*@|F8g zLm1@LM>sbRlVKM zjXOB-Y-e7cIQ08OGhPSb^O^b%mzncW0&g*RrhYt)916}cEgd)QNhYD9d-B>TH2&g2 z>Wz#?vU%9mt5;K;#UK;1B2xC%f@QQoDQ3FLRBOllJ@n@fkNT1z+xi;E4Ox0zEckst zgI$S)?^r(*RvD9;B%@<1vd`qRD>&b@s;VkaC+EEdiAJBUh^fS|a1mNVb9@c~sm0JL zXi*y@2)Kj}r{TG@xqhNiEtNaWsi<`!AmCvH_er1Y_3@G}w?DqxjfWxq8B~w=nGTl@ znk>uDXAo1ai5b;pzLBQUIVj|3x_KYNLxF40CM?pBg312=Jd{y`b70YM@ z$vmS1Odq%N_&Pc3vj(#*a}UMfmkYUwQev+0xE`rEx6hq+hdU=jn3Z8fAjTuwm0f4= zOwKtj4Aoh5r7aI`CFb^VVj?3WZ+3X3XsgA1x~9<7J>DEi9fh$MK4b(v;e~X?CWN-q z!&g2Q+L1znkdFv0^P#UF-4d&klpww$vPb!S#K-8UvHRp6cwDkCB=p&j`*XIlUP@cU zj!H;1y?OoGj1JPjv?7APJWk^Cu1lOP;!C>WaJZz^TXY{(d39zE5SSQ}QIc|HQ}8gw z03>wDR13#eEul6WvGNfk$SW)?6sB7zpJruYSW`?%t_+*%)%xomWE01h3S{i9{jbYlj?8g&XA%szVN*|+X{te~5z{1SzNbX!p;cGtOGKacI?6X3z z^t65W@ZnLI-c%XJ3rhpJ?2S#T^fsEmpTr2BZIvSN(K)w?dnzJY#7$Kqb1LCXKMa;L zKtj|q!%w-;s}d;QkJ@N49LSBtu-y6Q_%KOC`1gFt1y_H^fj%&QsET%mZ zMT&W|$lffY?T12wki?EE6M;bTFY zFd73^q6`SHdAt;>v&AdT=Uu`FJZ&gblpzn4VR%1rY=)vU97*>_ue^!CV>$y`5e8U< zluKyW76I$M7&0^C4%t$iN((as*2|+815*JTp zd0b-nSj0H8Lsa&#F=(EDc-A_D5fSoB7&Uz>r~>tFK{;M66BbW3e3zw=vr=RMs@9Y_ zTD>*TGHBq0`%|6^E7~qh#XoVh)z05d9kHDVaRZm=SwqgV(Wr^o9ipBETWE67$h0#y z(#VP4T~%arseb3H#Jf}`MLKUqG|7>;@vX*bET$F@ifcuM6Y3Cun8kCgLHiFi)P$a1 zU}x+W=g?_Wo5;ak1!gHEuPE1@XrGS(Lh+bI>Vvwgky0 zh%y_yBOqbd4ogD1Q@JqfnxS=|4J0Rym^e*YHh3G*cg^!@(ceKIQlQ}{ zxmuM`=4S$O{T_-_cQT7ikGb;a(n*cX1h>zl1D1Jc-n92TUl4CH^1O;D@8p7L+vR=4 zHf9YLQRcevNJ=#t7acmiS3T<0&x7G?vK^^b)kI=E1T@vshR>;$u?%gb@mPjO2|;Yj z7PS@QWTTad$!Lqn=Mv#{{#?uIowJ!->=I4b=LROO$I!ULn7s1x%U)|Z53X$~F)77H zbZYgzb{z*@U~sIupE$K^2Y4Q>TPSV#Mo{c)O#+44Y==ZKVfBo(C|2Lwx98XdkH?Yt zS{=Wc1H45p&0W|JZ7JEYbFV0$c{Tu`O!Hk)YiLp9Ihb_NnA~#ZlnC_XqnQZ2gMZo& zs*6{my>E>;(?I;i5*n??%{DNnJBV>*QThFIPTJE^$1xBw<|LBk~QO7bm_j?ta< z@g{ip?-Xs*H;0(QBKDayPu{(v>s2M8);L}5`(d}r(*s?4FC(w2bbgEVr!hJdt;?Fgy4(o0sWVp;TIK#Me$*(Z!Bx1P$!?F@=2&>WL_pHORw3FfwOro0H$l8|c6QeHh*5GFN+3t{P&;DvXb1b_ z4FrE4oqje!d~m1ukZa=kUS$^;jWB9$vD>5-Yn*oEBwEmI%IrHcw`biZ`?tS8wwV5+Ncr)RSGxZ_ZoE<&$&!!06WW;tc5!^38#pLJBp zq}JoHVKTbHZXL`f5z@!bQ0_gnUZccNU%%l)?W#2*QWHx@7|{pkoU7giG$|$M6b0$0 zGhLz>spLHes&<;I9MX!Yx<^7o zOeDJ-XoMXaent*RCPY+s&esS4@McY?v@T1!>d(~CYwP)o?%c~$v0L;${4^QEJN|5@ zU`qwt`a}KXTbbjEl46rj*Ke%W;W05>f|*GSF8YtPn`hceW+3xgPwXHug( ztZL#vngnmEPt*w&f9oFKy>!PW^~HHxVvce$c5_4{Wk?d|di_p(6T2K%ZkvzA_hbg*nOEXG> z&wz%f)`_U5pbCmJsv_)6R)l9*zr%Fs9&wXNrN)H#Jg?SKWA^p|d;d~N#q6rBebm+> z$ABqn2gS$F?EB5auF!G5HD%m>dfb>#H!iVkTW_HO*OxpV{X%B=oOvIdE<>}o9Xz2a zI;a)Vz!JhCUjlWY&%JQmx9#CgdB>z5Owm!zqORvU*WVsb%y_?T{8;jU;=Jl;Ej+HA z(1%13jZMzeucfb#4~hZ31x0bs8ylLlhp6a)fGE;`);4R+E;`Y4C0WP4YMX!J)=nGL zjaH5LsuTWYHyU}9820)Tg+0Wz+A=3qQ?`D&XBzhkraH>grw9A-SztJ=twD+oMj}y##uFFuJ)kW1AetcC6%@ z@9WdI8jv6?ijs-RRx+JB?M~{4$Zr-tc;zNcT5V|c&sw=A9|zqxvP#a5u#CD((It8F zZp7lxovZX}c)Pw!I3OmQwNj0)w`kyZCnsfDm}R+6G&P20(UU&L!Zvwq{NN6IK1C0q zxUAKAqqJ-HcfqJjP=(^r*Xd`PQ3fdTyGjTq+ioXZmXtFMV53 z3lH$nC|>!#*xp{n)(nNv;zWmKB!w*YDqO0O2+WF7yVELoq|owkpKyTh5UjqCzX7Cp zYt?NHPo3edtyy`2V2F6#crXaQ@xl6daqX#7_eVF|U4JSZ+UL)%b-5E6va8T}L)8-- zSqqtfa#LuY+$;g>w#kpHZ5nAT98hSROewV4iPZ1 zx&4^k2Zuv@Vdv;ERIGi=z9s-T)go|HE*-uJ7q$^AdfloH*aghc=*r1f*~&(Vx4Pxy z*H_OL5N7rVwP)Mcshn-y)~u~I^*A~+II*GaxqAqg6T4@94>85`)*X8L6kpuR&%%)1 zGufjXQmJF7vb0@Unt1a>h-rsACF6v%0!Icu@&!q;+^)|Gr)9DrOC}t2CWW*@L#7Y; zfV*L5_}&&?f)7P}!Q*(ja7M&f;XIm4?b-E19|ac3mF|ug3j9x#we;`r_2Cz5C}#1= zMmzEzR$0&h%VYnJcdLtQ@4_>OMO$gjKbYyBCD5yNZ7gwHot`FHp8amk1V@)5qCN^9E4^haAZ}HH^T9^uokZxG03mptPeN7ZHYMDxWT3HFp5+Ik} z-_$WNG0_hej15D10uXDJ0fYPk9h}ci<|DTj#%!HhXcmCkQc|A@32dft`bcV|GSo0w z%y@3h8>8OY`9dRh@mht1iK7YZJSP~gW;TOp;FZOdV!&rDH?mz75GB%XqhBcjKx z-ckgOm=WMc?lV-i-E)rH7* z`{6fA(gb2m?#_p|tUk4DZ(>U2OkTfiWp*^FQWtKTINA2Y53;Lw=3Q6#JhDz|$Vl?e zG6ghw--W#e!EFbuvK<*$Mb(N5Y}+QI;tjK``$`*rh)OxnuE=Cm#?`a{!)``;v z-`?5Sb7+^dx8gEm1E?b|K~sVr`1mn)!k_vtUb&(D@uYS~Ul#ePsE z{}7N^CLcL+h5Umz6{VY|ZEKl)cPKi4W;r<~X*@OhZsD>#t3I!RnJa%_B_z=i9o()& zUgf57FCU?HXZ(eu!8+om8yWHnid;)<22=rM{*(y_mGJrxKc5-&U45*OeX^aoRK5zL za?O7Fz}3swTh}K!7`2Ez&`caz1!=s#WQKROS2Z~l?*ysc4_T@z?1Kkv*RP9XZm5Gy zTW1e`cb4+*j{SlCl8uh4Rk{|XgNAE6eT8vl2=m3bA1}5)RwC2t@d;qwM(N_DkPEa( zdJ=g(RORW8gu^-T#Q{bgJ=y7QZOaKLTd6*CM}i%oQBHOA?t@5Tq5IQUAG@EOtpX^Z z@6V(bkpvBnjsdKSgSMZbY<$})^Pz)A8Mo%;`^Jr2%{79lz5yOKp%7b?_!lAI$;<6P z4^UbU1QEz%KqW&UKV6Lzg+PvNT%{&_L*?U<8#0oRG0!cKl7va&?|hgmvUJ2b48RvFk>}zO%0#V&!@IP^ZCb)b@rGUPpK`Mv)>UL zP`vY<3z(TpG;2)iA$|S9#oB@cK$!UbLI2CM`Rk2;ctHQxZiyj~L%L?#Uc7jb4|r9K z>gh7InoKR$b5V7bKERq*8}7RwFi@SJ9r8x6bSTl~D%=XUP&MO0u-4?29Uagi`{%C= zJbQM`iBjRzZk+s}mqKYw$?7i$?WxEiVGni7=FO_4)jv~#%rcEuozA*<@1Cc>zkifM z%6HnZVdsK^f}S%Vz|=fw!UbbE=By4!30B8o&Eq(k_q7B9Qc0)EY1gh@mKg1TL2s*% z#;4u<+9tH}7`09?<*B|(EVA|q%%o0ER@NVzFk?IVec|kE5c@n>tD|3s4B~hgj;X99 zajPX0lateIHq<>izlVxvR$~uFO5{e24lpQo2&Zxgq@QVj2`F!WL{|F~6}rEc`U5g& zv@{6mp`@EPZ=SKRu<&+Dv3K#+@*5UPo`;VdvDRi{iWd~f1b>wIzP!G~Gzt_~E*n7& zV&DK;@zwE8AJ74SslBJC$5+QQ{Vj#!L^i|yQW3SfNevOSM2_I_vH4MW8;E-!Vng+s z*>77{n}qD!x38znT)ZC-XJEr>B+DEyZTx}fcs)Zymo_4iUA51DxXA(M_}Hu5N>j^e z!^-{4a%HYpr*4z;Z8e1v3ZrG; zzpn(;fYPB9mlH4;%trG|MQr@FB5Do`Iv|}1oLxYg&6k^M-_~+%yV%Z7psSq2NESOx z^_1Z<%`Gg_0sE?5JIwnMx55lKv1tA^15$n;wSTYDUydJ2la-NCzIN@J@c7S#{Po4S z=wBd+Fi=_efxf=|IyDzUL!UnF(D<*Q%>T7c`4wRObyoi^q&Z=c*LC@6fT%Hl;jU(YHDa$?%Th=qrX4nXDy%ku};skU22u7f0>Md}+pXJpU{ykV2yogg6Cw z9<1MogJN2fRo$^uTKXUy4*z9who*BB9kO42q?#YA{xx@#mJ@WvAx3K+QLE*;+EGVe z-*J$!^UrtZ{8du_50%tY#lOJuNPHF#xFAPjgalU_9IO2%k?Ss0rlK@#}ZW`l_I z9pm}?HGT86j|75vu)jJb%yYx&sr|o%ac_l-Q*WhP00oEZ zee(w$WWpG|IOMAX0COt&@{#u5awn^F_=_8x!3^PDlOCUf%oZVd%gnq@-n}Mri`V9I}OCJ@YOUvH@%b7dJ;2Q#w zn~x{C)tcAA-+q7g08u`IRt-gkF;-@04==DQ(-nAu)~#1OcZ1RWt%e(JvV8!p@ae(2 zRI%74#iP#S>u7}9%<4^VeZH&|0+{1B{MZ*r+BqoxV?nh*WSxIVPK+G>)Zdoi7>&Lh zKzkgVnB~hzg%}|00sYJM7620Ab9X-a04aKLS3V3453h?IO+MOA zBh1S&p7-G$$ybskfH85R2);Ok&E{nGrNR&#;|`)R?|$%1shbsobCF;d9UbjeIger? z-R|e++D1Ld6Rw@s+7LF?-bH|$>G2ZBQxXL4uQAkJ*#gsm!h(W!qze+)zFN-nt`fNG zV?U!1kIgL1Q?2B6?0aGTPdYsTeMueT05gpf<4W>2;TdGiPE{yN@j7MN{oZgBf(3Mi zb3zN4C%wSjbGl)weZMi{<1{;2H~7NM?sXz6sem#$MjQeRE@KYKjb+#1aEp#^uIvU; zYq<$s5W8{7%P%z1Rx4u0P0nGh=^Za42u{$ZS!8>s&X9F)nS;}oddg7ytXS*?Jvu7a z4l)1a#os=8+iG=A=$sN+>e*?`ox?&>=1<2APlV=2$g|0ThA_X(IqwGbV=-UyrXP-y zwrB2xgeEGrg9AUlutu1Mn2RrN1XBXAsnZ*;dBC{ z;bC3fx*r{eKO^~G!R^78UslAHe&|LtEL{@>Q&f9Ih7r$m7jppJxt(iw#* zqs22{$DQAdQzl@XV)Na;xm=FkylK-@8@Py8AI@kOCcA#oA!8w*XxVfSGUP}r)yAUdT${f%KU_nMsPL&zF=?feD z$CCMX#PVND{kL<^|L(nd+kN?nG!trIVBieesXpaV%jEge`$Dz{3j>bY6ky;YL)+CJ zf`(mkKf1;i6FI~AOf#sw|CMTT)pQ!ydVoV{A3F55EiJ!H`OH@Jp4qsr?Ay0*9|nc7 zt*tGk?8ht}m>Q_2T_nc`=$bxKL1AIX(o!8Lj%nqns7`-gy&1rf^&h>N0!h?;FiWLIU!vKh$1_6rbUBxTIAtc}T;)`87T0~DEU26|{5t`VC?o(-AH6$4 z7)*Iv>TnC9Ri#Lt<)yUTNWLA0@fclWr^)hmqW|Z()&C`KtHYUt)d*hnUcZNcMDP=K zeaZLXv9V1_y#Eh$K#?%7TpD0) zSrg68&BC<0c=>YCM(lqgD?n&ddxRY8|15BS9u`{kwV_mdIP1m~wsA8A@}w^8XuWKB zloJ+Oj&ESBFrh$bQ#t!=Z~o3*<*PqXSsK0#njLUneC@!Ki{IA+Yg*QzJWwv~#+^AW zn)Fkx=DUXMK3D_*h8Ij%sVTKyo`@9*o}o!qQ~TU5XL(@(b{X40^7A`tF?kdkU zFDWnrypW>CVN-^{)++mKn=hl;?|P&Vtp97iNat7*_Dem33)c>bMc(wI)ere{Q5ZV4E!Mz*SsarFP`w zI^(6|QPUm@P8GPt!2mvDzEj7}EhuUY%?((AJ;6{q3B(>{vuE-9T7FqhfK z!$nG@nnu<#gu6aRzWkh5H);nV~W5SSGGz-Ds%gI9d0bsuU)v!EBIr6dfMH)cYWg-;1QLA%?rC3MR~~w z!FDPWz1bECGSInguf&O}_SXe@M7j7! z#+sI~O7_a(RUepG8ooOqo%{C+g0nUK;rG8wK^*ht*w1zWJD<29Rflh{GaGzugq0<{)LbKHzSW zv2y6x@73h#sVN`=18>b=a>Adx&H3L>v;0$q7Bl7&`97WjjFT?4#P2YwPPHk(S_4S) zs*dR_O-)TtUteFKMV*;oGRu7nelP5|agn~b;>UmFwaYu9&g0{!Zx&N5M{04G;fwUqeykdQL~~x{3&&%gHpmTsMOkTbn-UP z8YXL;7c{;)8h-U3fI4g6{%z;atJKO;)??Fgtn!V+*Ci~DHaY`%QMv;#ka7uT-~s*R zBl}~8@84Q=zaIP_Dc^+iq`z|#bm7kXT|ijnPUDJYYx0pbS(r0*V}o76Q|d5f;gaNu1G7X0nBXZ)O_b#6Tf1%Hi?87x03x=}CnhE)*Eril!H(W!v9*pL;@ADxN zPxNce1~m|bV>octn46iU7O&U%8)OZy4$B z-7U5o0)gzke(gt72xMz51hOq)=ML~M@e9_1;D=g}_RSzOU)P|}TLCT*!&^aq-o8QJ z9=DH%xC8`x`1&ZypFbym>FiPWpdi0MH3bFOzkEX8H^5EdwQX=WILa=+Yu14fi0F6x z{{-AHJ>Xmh*(UM#RBFf zPj0^+wR|t~x+{*GPb;Ol@>RzHE+fpD~e*JmhywKOiXo?7_&%YSoFxFh@^?)M}bsDOSm@g9| z+bPCX_927$VKgFw7U)rLxW?MnY;fY^macsi?rKfkJ2bvqWWYYuezVIBn&iMl59Hky zQai1rq9QDq=NZY&q!<`S43T=k(H|X@w@VKh2_9JPu)C5uWEe?WYUAy4kyVon zxH6ZdAtWUImp8jc#G?q4PA{i(A`UvbJ(lp+;%yMI6A#tpHRr5}V$2ZL-pN(pnvoEX z>drg!X?V5^VND^ebH+s>!g_PwP0N-&{O;YO)U(#64@xJT{Z0n6vcY z;{HkDbEC|hwF8vtZz(-JkA0d}`j@E+C{@IQm!m#rvlrX5>r~dcDGt49ZFD(nTH_RF zy-iCJ`M{+!HJ?RmP_PdqD7Bst9A)2nZPz3@B#HXt*W2?z#MrXD-;U+gF zA{SqKEDTj?A7ytsavv9v2k_jLF~3aRG&+t&TVBKSG*?zv4@$XzBP8VGzpc8wTtSn1 z#`w%{Fv+LfSu4EFHnK&rW&X*H}M5f3Cjs;jG$d>R%~ksxgH z7HTNS^?JLO?9frwrg4$-^k?{;5KrfAqUlw%`nj@;C@%Z97?pn_xlv`oN0AL9p%lb= zD7<$8#FEp#ovR<56Dq@&#&2(W2+8?IPKc8_Qe;oTmeMp_mi-eV?1nsxOJipqCcwL# zW6zYs_2yG2&#nAw9QnN_GlOW55ca!tEKA;|^7(oLqnAXZ(FimqxS?n6#yL4z)bf;x z8?tGUnVttTI@xhkc4uc&T8W@$1|p^sqM90bdYNt-rX3yxuOFr{vXd@To2U79hu4t--^V zs*S^b+aF7wDkO=(#a0O_6b)Bf<^*n~)K?CNpM0#ZEbiZZi#s933tK`+&PX2iIYpM@ zt+$OInrR%NE*$f{yRr42bAo2|^SRVb{(@$VE=H>yknV=h`q5%GknrT-v#O1F&VsLT z0*XBm_b!K?hP|b=`Z7UoRM@iI!JhQJnRHG5u}k}5 zIOW49D9DKNPIX$XGkol~`>im>0Gs(%A4ez%R;4DJDMxT|I2gtSJ5 zQR^*ea>^Xqv6FqtEeAf8Uc&-eX}X*d+2F_?7j407&EzG;Qeahmmekj;Uz^Y)rn5tZ zYF0=X?y}B6HmE(Vbf(w&e$J_u87u~8LTN`R4&0ig)9R*k;w<`1U@HhjK4==T%s=Hbhi62Jka^>-(b(Xn<@agP0@+9(N{{)gM>v)D=5L!^i z$cKq^MSeQZnl{Ka8AmR~Ii4kAwD7gu1z!|PX8@Fo)KE~82-Rb7=G0!EJYEpeWvSzL z$9kbv0qMWA5FF;X8qQ;ynPh6H05l^JRJmmbKa5hi${(g+Ums-eaS*X+kJa4;Hc5csIn9d3`I&Ye;`y>;gTo4260vCZdP^$Nq^gBJw0$D!F6kw4JrGDz{%U-0kvN9LaM9Jl~m|7 z5o8^fV#hbH3aS1yoRF#ea&^!RGmeB`%#)O^tt(A2sTXbfq@4@(bvf=+(k4m&WfcU$ zXwi^H&rRvtn&E)XMfWB3(WQqbbk>v#zOELUAj$96xlrCsX{SLhi|%NL`fb;#WNgH? z8iq~NjG^z)_=1e|J70LG?aAKiPWcA!TG)Uiiu1f?5EQG}ISQyYuw@o!m0Z4#(A8}_ zh;gPM))5;geoNfa8}ZKlRi1rCw=3%| zcpW2pA4*7+uI)@yP$;)=2&o`Pt~F4O;0z^8OLS-x@Z*NHWLC6|5~OLUF;<-x}-6*f?$Jf=LbB(YpW zCebR5p!0AP`bcAUs;>~U}S)O5)+W0ThvqMvfjHO3#^V2m7 zHGvm5YFi>{eRuBM89zjRufZM@R?x}NP10q&ic~|-SRO9zY7_LmPTl5?g;oZ7$Nu5;C)etbmU60V?Mv>w=R{|;iuleYjn3jjzolq_n7D&Q5xI# zc334qoN;~`ejGo<{LYQP;;|F9Dl5xat)@d)2n>T5PmSfL!gAefnatVzGzYY2qOLpB zeWPG?G=e^UbFBRn1%5!8_;LM{u9l5NC=10~L$xSwQBFxv>1OOyzWn1B`EOm(0WLC( zYw&!sMzAV9YnoK&EmM9nQ`D(F?)79VOh>+P*^#&6*b4Khqo&qZDwf}{-4!M++8}mD zL2TtR_Ez2IY%!_Wku!VyO1cK(qVZTsr791X1CuP}2ug-IEj8x0?@^%6lt}yH3nO_i zw-ByMG9G~bYG;5$26vqasZd!vmR8{x{9w^StfHi zbQHeTV|l`4S3+I+KsfG`UCc|Py%ruUcg|^EyyHfPRjjZXLlwiMs01_+rH&i?BraBL zpVWtE3U1%HTN>$p+2JQyg-ffGr?lW*JRiQIF zIsr%DzILj-@hG98_<^1IWQsY{{{h@$Y_1%SyENb?))e?FzVDfZiSsSSMHSY!0R=nrlV)#=dqL(%0Y$GdO09XF)$H@s? zMPsagsppLD2Aw#)NyyFKsO3aH{3sE|)EiBKolcN3{z)7?%gs(W(%shMy5ZZ=X5@Pv ziD1Sc>h^BjT-O-Srie3l2IA!LqFcILx5%HHi?`G8ob2sxylI$Iq%xG;-FV2*<(_gv z_|%JZtm1THO2=>dzTu?I*g2_By_#llwUo%sXN^~LurAYy1ri<1cWSWY z!cfRuY0X1;rbN0-T&9JO);{5fJ-BD7t^iykz0r@}t*Ck)s7Kf#|? zgMlm}u7)T8Rd|)6JI;LiopE?iQf%QI{OPO4%YC2&r9c^p2?i!}0<*@=+kbez?L*`P zCn(V+HFTVDT05sKUH6Pm(SvBs1rK9cOVjS+F`xQPZh6MMk~EH?xCjL@gfs`^b3q$Em}_ zFdcuRzL!I2t!?dypFr(s`VOfK^U9$?B~#w1ypKW?>ueI9*p+sKVD8#;Uz|2J8C~|x z=z_Ulm=xA#)P*wL!=Tk`V3r+EuQ;XC(I$qU)}){5hG%N!wdXd&b@XXk@Z%~{y9LgP z@2b7L6aLf8le8#T$Riwwmp~@n?8e0DR(wBmK^Ml)R?*%Cv7o=K7w2y6Sr4kOtaS>Ry{Jw zr&u1;FS4Lkn%9*ZP;cuBSB^{Y*-bbG3O2=C?1v}8OgSlBqXD(tY52Q+~61aLwgL%6XPSuvP0_h*tmX$BHXLHSjxy5+Tf(1FDk`SI4f5Lfamh6=4>U)<{6SXf&6l@(Eq;~7LOPzFiy?IUd_lEx@=>a2?_rqCrynOxa7o6{Ixt#! zr(my5Ra@%#1gbLved`bP7r?s5I7;Pk!lGVD-GW347w==P9}EPxAr{;82=|l$FDb z6ug7=vQ5&~!0`yYAu%AmPy(NAb*&qW#m76x)Yi)ND1_H%j2AHO?@B7AQAbm>4Bocg zd#F*Zns9RJi6c=2DNDZLuun~)J9%qJs+A)@gdDR>OBly`y_2za(~mOUDkoc?F$ASD zK}*rv32##$HHm~=x-dVLIM=vtS%#nq??=Q76cB6a64iZ28^@=?$P*H`MjS|Bvr^@3l_M#fH42^$lK9y=5&(qJd0eq&sSFrU zR$JCa07{M}Q#)PK?3E)M*lSaB#sJ0;By-El%h`Z@aI09gnoU(8MvO9JH!aQ5Vu=9D zJ6GcmrnHF@0c)|*>AEH*CMJWQJfUzHXIQqNK8p$LR<&*9=>qgH#!aT<3CiT;X@DKrhxl!JqpX2eM5b)q z5sq}q#Z|b!e>SeF%cbQ+&<^7e$oT5zaEnQ9l5dhDFVUxdrXYS6?R9~%(zP&X%9EP& zujp{L1GU?^+DjiqjvUMwAUiKOQFHBVnJ)|`S8i7qP!aNQ7%3`mS8IZA=RlmQ0Wop! zLV#L;pDGIbLfg($}9TTrd()X0raM5q zu4L2VJUUI+nfWpso`~ttdo_`$`z`YqTZcs%h2a$42TXs=h8I^gA(o1N8c>unD6epN zIfEEkupS|6))iRFIQ;=&LB{POFNgKQQHJ?qW4sRzY4DjGX1^D%PHls5R2{~t*63~%N(*f*b^N)vL|A}~X z&!X*sG~bkOTJ8zzHPN-Ocyd=5T6;6gyyXy~xNE;?RFW3joHOt$SqFuvD; z`%9bp50y5$+bVTmns_jtYyx>;)V$s4O^594C>UTYMz8_gff-X+gj)q@)D5$Em2x&g zYci#)xYW_5&l#fQCgQ@UL;b#y2S1MO1dK4`-qDX+Adqi=j@ko(T;93m^DhiOUA!_f zGExZsAWL_Z0p=@3?}U<>BF8)WC9Eh7s?7W#-)Eqenc=rrD&Ii+tm@6ersd-p~Q2HU?DHNM6&)L^(&C1|Q~=Mjlwd zt}b-g{LtmxU=Cu+^%bmW(0rw0b#)a@BVS59)qSP3u+Zas;q%z#m6by4lz`fW`T49Y z(c`tz(L2a)B4k(4J#6X%sFS$US1PNk3+~(zSTX$?of$~wz#2oAOJ6(7r3!tcnJ`ve zS_9VpibD5M`^0Q`{e9MX@b?*9vnVXxN&8wj6o@6Yn z$074tc-hSLV!C~{(4~)2%XLJAPkpAMS2f}3)2A2DojW&Tm?45jqhry9XVYwos z0XD@!9d=?fkU9|&5k=b0=qUF0RM7`ZGjnrh6=KxpkiJ%{8BYs$clQ?v%a<@AA)$iN zQ3r&#gpI9jU$LI2rKXZuMd1@nA;=uXxYT*!jrD%R%%^Q@OTT@fh!)A-TL3NUmT6@eE3&s^7DF&u!7l4QIj|*!5`edK_*Na1b4`eOW zU}ei{pVivIsVS|*xq))yt5>gj0QT7fFv%W($ubm_@Vh*O;&IHpyu3W!7vbU%$bRXM zTY~*Tp~>e%cSE3N`P2hCOKNddi#14$##6%QN=bVh7Jm*J|Me7K8Vb_}3gNF&dko%E z(;Rd7%`}L+F^%PNx#OYmvmt9I2p*XHI1*GpEiYSC!C;U0RKDq%eD2<>Gu=3D&_a_} zPv6=ohU-T`2Z1>&Pm!omQ7aMh;nqx5IRXX--CWZ%6rec1x!%0p^OY+Bz1$s&Q$(;Q zq@=hbxv6s3+vx0%LBHRv(T@|Kj-T`25gpxA&I|F~WE<9Cr6_eWoN-b7X?bIKhgt0V zk6?@)l-&ktuH9LCx$iS+m4;i+pNSUtIgO-+y zI=9&)>VwnFMohqFuv#3Cp$K{tiO+W~Gt%(G?x4>Lzb%q)u1)ErE`z!ANxDu}%fi)z z%K7CQ;pNNioYj&u!#=Z5;|}J9o~4}NEbmIt6ZC1asONE6Bw0ZF+^%QT2hTMEvDZ5> zYR?1pVcY`DSD&{e#WRWIv191z@@hf&<=>A~mql#1O*^YcQ47Na&Bof3evXb_27I}? zCLp^NwkSX#544-Nzd0dD*08@6K?EPrl^jg9nKB+V5n}4)FL#VcqH&w)O+Am_z-Rah zWcZXK#yZPqCj_Dc9=q@-Gt8#B8UPisbYn_`Qq%347$fUGp`j3u%}wVa(m0&i!%UyYo9h7~ai9mx zR4P;$1mZNc-|x7CxQik=Zg*CX$#KVLYaK!2MCLo*^l*)lN zU;r4Ng2;ztefhe)bV+t1VyJ*)u31jz+$wBx@wch+v`}06eZQf_4v3R9_-Ik%?WOip z7`sRwO{5_&Mnxg(Mm~BKslL~@v-;ZF0&+2=vcc4IUy%gFAiOdqx-A#r2QQIUT=Tcx*g(_9<|5&q*N%m zuP@~w!Rs0k`9UbS^u|PYLjlZ)^`p%y9b2%`K+iZR$@slUDi<;HIH(ycBoJwWfcVm1 zg#hc}utokwMx;#6ytVKkR4$uxb6~d|EzH5wlcKVBbu`(si6!STVk}=Wiz&_kyPE3*xx&V@2st2v))id52 zH}^pg0&$g=u5H#~R8oq|arNs+OuqiCLv{~)gLjD*GTOLHv-|Er`D^ND(;c&lYkI&s zPALnBez+#N&mNCN3>Eo13t=KXJNc{h5YS;Qp!);!Y8%1u8UaN znPB?`Am=i7f%B9#tqzUPImpGGa#@+w(1dfSX$5d~=p`8E;aY*VvHGj2wvISM7eJ01 z9Q5z`I&s&9!;u69gYv9FXqJ2@{-QE>BCvcfHDE-BR7)!J z;N~o}t{En%ol`nXu9r`+^_XC9Bq)J)BJ^H2D&1WKYY!+)q}DnSBReo~n-l#sXoTaX zvYYz#@-y_D8X0unC6^ne(X=M)x*9$>As$HqACI9~TQHqNDK%<)Q#(Bx7OM66+R>?A zBM9U+$T9-Iw6_lUj)sTI7&|FZSJF5#DJtIAY>%LBdk(Qo3M-$h%Biv6R(@*dJ>KDq zSK2%5hae;%Udx3F*HoFyz(oWg{KI^9Xb=d5)&n0YW%cD#-@$6DR7$zygC3*R zc$ql57K*vBzvvx$5AHDy&Qmv|h~N#kqBa6;);ckH7QP4zvGo=uC9SFLHKW;}H>;u_2V%rHF0Z3t}P?gfvyt3C=BxG7{B-MwffB&V-#9) z^+im+R=Go34tz#wrk>Sr5KgsR5zd=X<2{mZobcsC>76xvpFq=X(DC#@#{)D}$n`Y4 zfa^nuwGlIVJ#M*DABpMyG-G)8YEnf#LzuR1t2UozRp2O-;FF1ksOMQmcz*fE;|?as4;ZSkkgxAbobZHx7l#K@3*S zuxfZ!H@_{ip$4c1j|Mu1FIWsMw#=wKz4Nbyk$#&)1GrSnGP{g!C~H2b zTXi~5omMZ4m?|L0_*T4ldQ7tfer2iZskt8F>eZRD70yh~o!9Y_vgMB==U=uJ06wY@ z%p8y?Pf)?{fziQe20@Q-RNI(LBCX-pn#M1UXjH8i6_@cltGuPd=sW@Y^85nuD0%QW zYYJNy-s?(bSC53)mIDq$35*8y)-Rl{@VVYtedUW6les`4%m=Os=~72L6!zi8F*)Ff zxfG(JAeo{I6M#H%0MD*x^8T|_1xE*9Pjh-@tqlf~ztpStckwtM&%()5z&|2(=ukSX zpldhqjA#>j_4$aTwfZwfGosAo;=ehEglpwCHEH%eJ8yUynaJ4lX8j8k?=1=Uw6rDc z5EL}?_4VDmmJ`9pBm4s#e*4D(s0FjjYin2=!_$Am2-P<<^cnC!Ru3z)tGfW0e}hmn z*ZbYw-Qz8g&*0q^P)nb|J0UgoU|k^}c@$bc{4;a*Ptxa4Wa9reOY*-YEb{?us|=v( zoe;*InT+re_Id2ZG&$v~s zRM@HT!|otI?BKj|BbT&po`F)P!I$W$M)m_Tr$y7u(_sH2%4O!4@|q7!WBJ-~2hUiG z01Bok7t?21A>jk)A+OrCVbahL8dP0SYEL#{z!4vUkTt6q@|;3l4sOI)l;iPQeV#uE zsKk(~tE+$2_&uvoYIN28Dzi9J6fd^?t>&*4K~YJdpW7 ze4c~uvsC0M7Ek>D4BUIWN}qkBxYB6tHBe?~a#2M^9eWJ4=etq*msE%4tzRA2ei^

*SXZJ(HYGHyA0o>@I>q7Ki4?2bo=rTZ+RK!NZ>t90Y| zN`yJ_z-Oi6e?VpX%uEIT;OuO(UVExwdxGZi6tSs%T(LT7GY*;DG1qSx+6iL)8v)Ui z)*fTeDpw+)kI|O%n(T)Af>P*%T1-`sK6M97v?Zs{3-DdRPO%pZUY#JKJofo?QW-jU z-^jNuW}U^9XIu#6#wkFxz0x+;e70^QlcL<<^ZJe;HrmBD0?eyVgaGYqU}#u)tKHJs z!QRvT+Ft$bV0_>^-A|gKP(yxe+2G%4bNb@k@U#JNs@coU_pet^MNm%T9%}5UVQ}gj zA2ZN>4&Yfl-wM7oI7E-sEafb%si^HxK~X}f^d8PM@&^tWQH-)q?mS6;ccY%ZU&_D> zAAz)4OuuSc`11UUeK1^XeWJ=!7~IP7kG~x_+_>>(9B|$SLT1T-iOK$7Il=#`U;o~l z86Sv2L6x|2SH~7&f5=uybI^BwyI@~}4IJNK-jA`0N>`a|KFFw;3&19r7(?Rp#T<9&~q zBcWNef-SH43(?!><>3Fr<`~bu_C0VOyIJ_xEk$c#G3-w<9{8<7fCH%C4|bwF&%|&h z+r;maujcr<5AmY5noY3Z1nK+w&ay}7J$q3!6MrYu&&MO=r`c(+Ij>_R9^=AB2UHpe zm@<4>`m1kl`0`gdxZ+kz^cbDqIMH1>>x#4f>Fjlcks6vk1en?-Q}FUtdWP zoCUxb;$#Zk=miL=D#!5lUFFv>oF1#5cKQY{w;FWXnSNUz6Zn&|vlq65Yqs;_4v;=J zU*4>73(nIf5X9{Vh8x3jF*QS;vR{w(#3$Srr<%P)kusudQYV+w?+O`pN-h0T8P@)| zk};J@3dOGw#>k}~iV+ngK>>qg(0(qD0IlRChmF-bBcBPdzezZeWo#DwgvOV(W~cHM zP}-j4Kw?mPIWnWZyh(Wkv1aelN8zT5fzA*EOrbMR+BD^}7d!To!0aRfFOqJI_^chqhe*5!ya6{)}j?@~nf>Z>a_0}XwwuP9LrVN5}3gMTwAhU{@1>JdE`GTeSe$if5%1ex253+{st|tra&tz zE149%6_|-rifQrRaR`hk#{jaXxw)BNklZ~zWmb7EQL~%Ca~@&1;sd57$2|7W=b;D$ z!qf7EQZavUsrWMfcZkt6*MJy&P+fU8-!*B3C?+O0?`Hm*g<9zaL)aHAFbX~tN+C`b z%uaG%{ks5l>P7$i_viV-otnCO+!65=(~5SQ8z_xndUMjs{fvOw#FB?=6|*bL%Xv1c zJO28uX8(8>{0H{A{oj4^)1X-el!nq~XMr`X5GS|0#MJY1^PGZZerf4W{7L$vl9Gs$ znSxU0JTjH%xQS6kg7Dz?>n>i`Y#1MZR@KxD_z{!ENZTe+Y8%^~BSG~v{bF6ne;m)( zFJJC2b)ER!jgT!*X^n87;tUW7bEsv9SU1Ux&FxDNzFh39F)yiL`9W z$;mO7ZTqw3=Wmns-*|!k`HMfRCXXY&b=S6a53bUtJS{u?n`hAH2QumkOQ?hO|BzP?f@ zpQ#l~rQ*kE3u8jB*Q=So4bdb5*L&sP!=TxGmiBLgbm>kMx{y!v0SB3P(dJhSx~4dv zX`xFfazj;zj}nUpfIRC7gtGs;>(KuI?E7yQ~*$+;EtDY?IgifDj1?CAZ{?Unf5UU=I-i|D^^4E>*0`Ti3V z=zrAI{#iu-G%%j_2EKVd71BD;UJLvyg6Be&j6wl>lzzCs%-+`St1DQ;-F1V#4F6!blWd5 zJ|X(y!K1od~TR7h)krW*rZF=?YulFu<;BEpC zrw$!Ifs;x5K#Mxry16n(;&0In;_pHSt`lI!qoa&%0bei#wjA_;-GJE+Tt*tUMOaG% z7^%x4H$*?9_ch?lI`etsJl`X$gLeeJApiD(>5maWX2hP*PYv51RnPAN&xT&-OT$0WRM!zGR4XZCC z1K@IlCgN3ucQSA}O2Tard<%dGbo)N_{vOeS7>&te9vH;x^;21XKd7-6TmJac z^_L)wmcgqFUe=oPt&1L@!6{w5m5Sd^w!d zMleWr;V_D)uVeYNJ7DU-%OTckbAK$jeNYRwXt4g zJd#FfrGEb6A$p*|7d9a#K5I4;3z@#0v2_wyv?LdCeWS>toUQ2%eEJ5Eiy(<0&pmeg z$(coC$5o^1^a1_z^N^sxTWRHHqzoO2Z1-N7>*ZzHvk;7r0b`$FOCq> zMdpc?M3yy-Qv%r4=!k;e^pi(JaXbl#sV>Y}{oz=r<$g1NfcTHk^R z2KaG>$oYKtFp2+`h)DbTzQ>is0G8W?KOtCiW|U>0w?e3`Ci51*7s7E zfd0M(a;{?1k2OKWsPb<}NQ+iAGu~N_rny3bf8yuEKlf1lB_r;C>!|*pny>l$EyDYX z9a6xsOX?|2%|oQt7={MA-uEh-yS_GxSL44yW@f&Em_wWD+RYX0wXDd>m@HW8WbuGP zQFt)kV+$iL`71j7DyUy;FM4i$_3B-;#TC9{!Ow#8N|}19FYF^XEX+2uD(sz|ojpA~ zGH9+{yMfqIoH1;k4OqK>;WHwCxa0u#8-1np04621^?J*Ea_h6jMb-)5FRla@>SwNk zM}i(9hWuZ_ER6}DAz}f4@HnTK1sq7wYvqqW3M}3H2^dHJQX|cO74-bKM#TUAC;vlg zZ)o7Z9SC82;HIOaBVdj{A$>s_O@2Qvee8Z69pmLwL4Bv4CthipYTft}n={oSA j=^OtER1^D=C!qIj+WY&W{dwR#kn7q;KbHOA^z;7%|G0z9 literal 0 HcmV?d00001 diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V07_chain_sk.png b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V07_chain_sk.png new file mode 100644 index 0000000000000000000000000000000000000000..d58b71634afbdd8e6fa9c51ba51a4b6b18576ec8 GIT binary patch literal 19640 zcmeHvXIPVIyKStfD2!ua6agI-6ckXJbVo)|T2z|USm`134zYbQ3Wy*@YDP*RRF&RQ z1f+xrp$CZ4VrY>h5JE_Do)>5K{$|hY^XyL27~Rmc;Uw@FxZ9)7;K~Gmd)Tdk#m*;;6F9*^H;r1Q1;$_*FA5;jIMjTyP~{Z zoo*cTz3u7cgmP1mJtZ%D=EOlqZ*O-mH90xtzx{%# zzjY3nZtz)#M=t*OgQB;XChcjy`|(XFKUySYmcXwyu;79QD zH}~!c!eD2&td|A9JG2EX1?<6}c;V9h!%-TiXKTTSB7UA1rf_{$5Jc~ zkZ1czqOK@1miI}5cc^yWr^uk5GeYaz7pxmpO-T#tn;~z6W%QnzKDzF^xC1AAhdo-x zLQl88S*qvF)OYXHI}mNR@Yo`vVpy5vYEptG8mSnWjJ#?QPYz|bBl>DWD{YKX!Q-Mt zHU)zSCV%+w)Si%*I$y_`XHLqYRYS%_$rnTr!5)EB14NSK{nd5NVcM&o#%2!uXl-TH zjo{G}m}EYeOeK+dzG|#tFXE||1I5jOUQ-igO$rJ{>|BR&?n<+Nam`u$JJQL@_1 z4?0vDnjbVhKQF8G;WLdUx1PV)kEn01!f=Ku-CPz~du*=H8YOmMu~XuyVY2c``rwbP7LNcA-5%4lc3sA?%9gWOOg9mKM^~&u7B zMn~(0JbNoX-V=O$Cq>6iyleBFR{e%e*n9zrV0j~qR_y?l_eMDMW#4TtSZli`Bav`m z)l%F?eI~#7@!6M+Lh6~8#rAvFNk%Q#j*HPQ7$nH1zI-##Km?x)1pA(rwwUj*XQnenwRpLj{vunoBU14cTuVc>odws-_WYcw znvr-*8uM&Lj!&0sx?W&pWYiSG;{}m9qd7D55n5>ZvMpl@FC)YrMwX8}j(&Id?p;N@ z=F{EJ`SDU|{^`prT(I{0qfoC~C7XV7^ld$wu=@wU6ZJCvlKu2gVG?#TujTyT2=;AB zV$09A1-xo3Rnn#fjWW{4W)R$I3$v&*6vp#nut(@pq@%{_QhB|Ro|up6LQSQ_L}M;P z*`L{^Cn9kv-$+Y9O|E>g&ssPQwTe}##hp13bfywz8?x9-MWTYN1LwUk)+aD7&$PF{ zM_co*{ajR3G<^|EKQt59Lhl#W_;NOoLn7OyTQLVAzz?4ZD56yb+-McvWlS zsgACV{k3a9*Kn6eBJHW5UfkCvqxsx$%%ru6iAex&l|A|J;lno=e4;{j@LXxX5GO_N zfX9!o4C{)fUJb`*JW+H0;uou*MVCx=J-r+xtUmklvZi01lCp9<1)pp&Y!O=4r7Ic- z)q%pY<8-hCY>yvP(O4Qx{x=Ko+i@yq#v1U;@|li6oPWL=!EL4FYI{8|%Xog#@kt z{M?+ai|rF_-twD8@Pz6WZgsVvcA$>IV;KZ(^cPmXLFh@kr=W=J0gps7Z>jt~!LMnL zMbB`dP5-LZ6h)OYTdZd@Usyday<%uLFH*nU^pOD9Vo!`Yh@xsdD(maf13nV|AMd^? z3k8K^zYnRfaw_S~V%AVZ`MIuhLENi z^o1r~CFWY%Yd#5OjWeX$5#s}Zuo99wv1tmb4h zwB?epuKV5Nb30J}Iv)H+3Es;yWcM>5;fum|=4nVTtn}B1exU@slaKOTU5G*(%2*Vh zknEyTsmFC=={as%{qzqy!Y9Q=SK?Zu=&B9coI%2s z+Q}q)_J`5v?IEreU5u`_jGyBAr6i0sV>EDPot7kIx$UVKPEXm?Wp1=?0b~1zWMr9T zFA^kPz1o=@++2%V>Bxj(gchGo5J6Bb52(*q96o7Xmj4-E37@#wMfW`uGaSD`=*&Ux zCt>u3C~cnqse5T0d-1y+Ty#bBY_PHKsIjL+M~F()2MQ}P7*pO}P&OoO8#Gr+jWsdG zv1x6zQvtgdL4`Uo6wAlv6;|awPmw8SyxRpgycwtW$t#GF>A*e~+eVH-BSz!t@BKn0 z-gBK5wb-rjy|k==cT2?>uB#fOL!o6@_f8egAb<0BQeamO9Js4Wt6@B}ox-C+y%v^1 z6SMC9+$~!@dnG?XPr3Q#MwRl?)A}rELkGy#`yPo>EA`aVLsqES7QIt1boal(AHETH z`8BJa&uuK^6#CWX`D;XN9PuNEH1c@C?T2xVgEylzAGbuQPCfUm)pM6A>QyJRJ4THhscIW{dXVx^uILK9sK)X{TCFw;fiEDEejn@>aO5#Hj^nPNFV zGn1EVt1{|8{_%q6sGZBcH?}XYy?J@KL3{a)@a6<6Xn0XgmJX?A8icz7;#YeE;UU8B z^a(pf)?`-nq%)R#Jf6*+wFDqQ2j@v?wRRE(H&d; z-&4vbyKt_V7atizsg26*eIp>DQHT_m#yBMD`FnCa)x#K2hdLv28q@Ne@Jh^B3voJ1 zc83&3{rQM535gq(rjQF~V&vmmWZL_sF}srVc)HW=D#PwN9wl$wLjux=R=`#jKR>P} zN@&fZ2Yh4)4+PJ<+@7LX>E2;%nKGGV5Q~E!=vY`-m=P)g+Sz+}B;W`$-r5a^v!#J0 z4`+;Fq%ktq`!3`4u0@>J>Q`=8xJ`M~=-fpjSx4BTrxY;9b!*Y9;}XK+=V9`3TPw~! zZ;X`*`(8NCp$Z>%aOzUL9EEPmc7Lal*miG|;Nz{>hsHHbkmfa4r$&QlVhO&!-rn0c z*F;NBw?5OI1Hj28BrUi#9&E7e-0ODPrUx1UQuWIA(=$GO?BugD>f0kY#%x7cxmctaSL}99gT)(ZK ztp`6Qp(b-m*xOUAuP;n@T2FOgvNcmk<4ePh!PUDRALg_$%JT6@$4uYJ#tfOTpQ1|9 zp!&+mPWv+OD%qgp6#1v-CZ0KgZiv?2-k!Z{Vrwq_$ZfXjy6+wH{Widwok1L)w3orF z`>TGpU6Wt@?KZom*9kkG1e&IJcVE1JsO+?J@&>Ehu}3lL33{H*r`6{##fed}wYgLm zTUo-aS`5Ij)z4E)PZQlo4-sCRp=Da=7D!VLKPh)6AuM?~x?|S`&n@r!l%LJ5#E=s2 zU%0)|>quNn|B|2CE_cSFFT=1VV7FFSg1)9~)#vnHtq%dt)?sD0HHMP(x@07-n07{i zEU2Nmf+|(OR@Ge*l@4*N2#w`TX!FLk3zhFB-5<5P#~2UAJAwtgdgpAR){@hBYoOb!w2SI4+M$bQ-z zyzi;TA$S%(U<}*ZGlk1P{q33VqYlz{U!ATT{1Dw}HQAaW{X!XDX66BU=I&<#J5nQZ zhsuOPf)$YkhO%+OE}eIsG$MD`NLr0z8D(k0k(~)zD>DebRq(M`9pr7%+#dVaC>{2f zv0$ehjs2;dAGJ;&;ft|jBP?ph_B7p&qKDZwwnj}(%1GEvEW*;ialiYG^g3mX7b}^( zid|MCZiQvXwBX1cx^K z93gB)%O}2P!3{((qW?uQl&)~IxP37)3cH0JKHpTID*?mq z8}SF%B8?pN>9-9_wUOdb;b^rnT2a2q%=?}XUM`HIu@9f=DTJlv zsmDz+bYxzf3Jd!*5#4(NE*&czCn2nQX{yoQE0XakBaRa-1Q#Fb3wFCbx!M;!lr+hJ z_e%=AX$wdnSFJQXKl$2}ec`eGPZ&p40MPm`kV3Rh1c#J5d_SR+KsYlV@n$E>h8(Ri zzK?M1oSRTJ`}JkH;7@{5%nhgSES_)J^fqU9l@d*=X*A#0Dki&of?+PROL>Qs#1add z1Z_wOIv2EBRMfdlVwkO@gtBJ0kDm;(GwiC6$Q8NzRr-xEpCfUXE20ZUYGm0}8SMM_ z4#4Q8Dq)OQQ6c%?ag((|Rx0ciO2t9{F-?Idx@Zpj{NN?#rQJ#=tGV>$d$4QSK7}0{ zQ-nfZf+9ldG9HuPo>kpF_0lr&l4Mx?fo@}sg?HD5H;IMU5G@m7 zG6>0|7F$|FCQ293|DyP7Q$XzM-kM>-p>tO-Bvl zO-Fbdm8NMQbH7dZl*c`o494)w;K5_NnpiY2S9XI&k|?S3&HJzvbGLx8mycg)6Uy7( z+3sOC{uDfzaYA4)DtO}XeeMxNp%mOEzVWP38T-|Osj8uBRMk|YQ`i#i)5OcGs@WPC zzchkO5i=_IJqMa04m?)#u8f{&9g^0QGko_MIr&pKtBzIM)L*H|*7)!uf!RvZ;r%R) zelKrFKS71YH35zTPgJHTK&wYXf|nYl^<9y$v}8Ag2|Bi6GkWX~J5|AL#-BF^tVXV^-v1)S0oL#$E*@@Hg|!3#1|RE3sP3 zW=)dSkwYb8958OW7R*pu!gwKGluj`lKLA^fp+5;Cj@6BVPTzl(6|Ew_F^)bu?R*|e|{HFe}q2wrBPUJ+SK9Kx|4(m zZ(~GG`GOu;@QT;aaF(5u@npY^WPc!-Qty)iuuqVyo_$UCpP8AF0nEZwUMcFci(<@b z(vCS|CEBzO2*q!YjEiOu zwCrzg_hVlh+J)vkdGdswZ&|!qg6x5qg=-UljS&%~jLtCU3XLM+S^E&6 z8lxGX0*Zo0W**wgN}LOtjI-FA@sypE&gRtct5~B!*$`~YsmQ`$at5dv`w%Y*&y=-T7d;hx54AaA{ zhhzF}yo^pICFtE`RHg>RV1h;G0>T0|o{Gw!+*(9`XB+VD42G3RW3br6_*IpOK;5Up zPWa-vsrquyp8^Wav<@sAp#n`ja$LmOqA2#IFTwNv=fMMYA^MKOrM(w>EkuexPhG5| zpCGVHJ1MuObe8&kDlr=A0dF>m>_$*_PCrUHb^?V6@K(dHk{mX69)85WG+iS$by$x6 z*+cxr0XW<7i3Z1CoZFGGxebld+79-DU>iE^I`hm-%W8~);Rm03n_BcKm17`Yi!ZX6 z&#oj)5%=Woi|j1cqxjH|_y#<=qY+cBCADtDcXC-No-K2Ofam+!IO6vVpkiP3I+%Qn z#sUG%No~2?n1C-TdOn+LS2l1XJh#7+&*KnHOAc`hb{%5P4Y%Y)bA0E=VcX#io33?F z&&?S@;bS&qTFAGO>n-z>COzf#J$lM-FvZG;_G2tN(cjAsNy00H?8Ad6tIbKkhH zgTc1^(69*xJGgP(+6&Fzi9dICcJ}o2D25hnEzHiAiqk);WTL=zjd_L4P0&(i56Jc! zYA2_-COemDO&FwuR%<+?{c*~j)YqpRpFQxisn$1_6qJYrNf8n@kb3i3)Z7(3v$(aj z3}-(B0b~4(+K>DGulMku-rV0W{BFPgKUk7u5Wig@BWG802C&Zy$jjH_uH+4VP7Rn! zbZU(`x@#%v$Fgg0?s?N&rTVM)ncPE~)G}YSw)o=gR|Y3;kxYvhdkI4%l8aTj$BFtg z0;O0i_LWA{=B(^&J$!C#BT`SVqjGt@&)2&Nl_QbJn*on^=sP%MC-pimr3~VbA|Xh{Dz_Q^ zc5&-?`_2?6PW}y2$0y&e^z)^@T|4x7A8^&x*0nt#$Zvrl$6|~|k;PeAO`(>1s;a84 z*shn&ms1^13z{6F`Q?E(qf{kt4-6)H;F%%VpTC&a{|gm@zc2M)lv&QCs=szByskgMeo=!wKlA~u(3 zQ-+C1uWQ#1g23#pK5lXjFsPRm7O#K`>Io`nVyJRJIlhK3w;{rAl;DU$!Al(tZ$3q2 zuHHMPrm3mvY@Tsoz^!h{-nL-djvYJ72-0PE9*@^EGExu~dCtWpFVO5FJ~!JQ6yMJd z&SiBgiNmI;qobpyd8alFA8;cLMdU`BL&0w5^yc2C^OurUz3fZ#++N1ZA?$iwF(FH@ zLd;(wWo@ezPXe{(1|sDk{W5@$u>Wv_@$q{(x6M9JPwV?^t&r~me9A3}ZKkaP$6+wn z9K+L~VfcdsqX4v9o*#zR_Ju>GK>MJW%LG=h!*#Nt<_+!IAc9;s47dsG)qXCh3 z0-vOmH;RYBu4#JwwARJGd$XxKvizj15GC^T>DqqTxkWdiA^HII?0f~%&G(ob3}%w7 z6Z;ob%l$JWu!Ds_F7RGBPu5QeWsP~a#As(uw#R2hs<>TD?nq9s!mvAf>KL-rm;H6J znn(tIX2cW*TMx$EtzN6iQ5P*%mtXgWE`833+FhP=2OFxn*N~|3j!Hqu@?M_8cUkQQ1n!z+__|r_N&)mm;0?CieTnw)( zZa$5!xp~p!Jy??nfpsF!zX4Eq|LV$Jj>Ls1jaBy~6*SMeefH(}`d$Ipwc#xl4G;YL z-llG@?+p8%R@~y|R6fGGP(52^?J+vXU~ty++|sf~;=ya@PN+@vI<;)#tDGZ| zNXc%?_(f_SB{lg;5V*I|>bVrHz@e}!aCnpjXyqP8=f;dIlo50m{3of8!t zB3pvef=_cP2?PU&Q08Gfup7<)08jB;)r*2Pf%BuhiF5UEa`h~o`64PM&Cb6e+yrO3 z-)5>_Znda~Os1KE9q-s^bL}k<60jp_wLxmr&sDM+^$U0=O@dc{nN(L-$4m&KElR^+ z^7fL6>m}bE6+ufavKF(@Ax-Iu#gzf2ElJ4^1xsAO-Om1cxpUrRFZjxEwsxYi_9B_+ zwG?1y^(EgS%d*%ZJBu~^Ec8pQfk1+Fg$ZYWg7A9S9V=+ztm=XTRtDik(r$32o6Q}} zPy92DGO%CwkC{B0GUPjaiaVWzz;=>QaA8g6wZMr_VJ13_B_+K?OxjWa(6~;a0$hgN zICHf?{f!WWHjLeUR-k?s5iSaY z9GojPsD@T=QuQk)i}?6@g*AVL9KO0ViP>n^)pFR0ACQ(&xcQs#2$Z=)ZjL{s5@|{TY8jb^rY$KDbh8Bimm^A8T4GM&!Ml%#W8Q z;XvDFm*GJh=;`mbpkWQS0)_cJzQ+KH*CsI803`gxv`9o+l zU9kBxpb;`bhc5A^UO6h7)z{b8^WnqK7_%R=^9nGFELQgXyc?L5jJAa2mn9hiT_2yK z>b2f3x`M`Mqp?Z2!5>-0tLw7)iM_R$R-@k5M4b7}Z567+Wu>K&lgE{hrKYByudoHq zi`wMVx4pf+GePg>=5JfEL~>8?Gs=*;{XcfKIOl<+nI`k+lEL+gF_$%I4!tv=yO(+j z{Wb2BKMx50g!Sd`5C0X|UXo5|pR}^DtMT^$HG7V=IlyR5*?R|7$&)c&cgy1M#dsfq?rHE{Lyh}s_4w;U+kmNAx^N_hqL zJOR@dj^!d?_Dl+zce{I4AYEKU+f7_A0jJ7Z$-h|jDAG({F0o?0?~qs^Mp)1;;gMZvrcY9GGEqlZQ!#gO z9qf+M)`|y;-G>@V59n(|xA#{G!C)^X*CjlqJAQUw=lPkSce<-Rs-)AgA3)}ZQXjwV z{-6`WIMe`WdRS#Jo9U_U>k2NQSUzP?q8`D!T8 zPP%+*$g6Nu3!S=kJLy2CAEoAR6Sqi z4Y=4mDHyEh(H+5zFTmC0S?V4%9;t!_%y8W@&##_iI|O_dM8ICh+jl&VRvvOZth5Ps zN9`N;>$~d~3s5^kxoNB#2l-*G-nah{?A-;P5KI+5SIb;2R{(3LvCLg3a6o=}5y4;S z_lA^6&q^D^6Qwtr9+%*iNxM}5^yGf)eq9XmZuQn-)cxi^_6J}sXAd&Kv!2|pQeC|CsdWD#gYm{~?7r>_q zkY>{@C)(uiVOA%kGIH_2e795rmbzzn*v3Y33;0~&!++NP`0qUc;!)*y0{dVw^ciC_ zFIdI)cq>ZPuG-ZLGm0uI*+Lw?K>jPr=iKypXgwP`0l$4At51)Zf&t2dU{NI3_;yZ z=UDR4RP@y6K|)Si5~%BZg}ia7*7uE$+BGH}SHhlp&-eZXl;l{(_2dUSn_J^SeSnxS z%f=y5``Z2c_XAubS%VoQTq=Hq#q^pNee zfFCpmg2cV!56F(R`HJx;V|9SPR#H;R0@ElK053(90rx^$d2vXw^z?Dkx-bWVJ|{Si+E5bWP6<}Be~hFyJiEqbbD^~=9*C-%M@Oq zq<9%utN! zy7NFbE8cA0UbOj=0wCV+U=d#e3f0tpkz`yG0%Jx6{;(cS{Y7} zKKxM<#tj{8Sv5y%ztWNsm7OOMtjL{GsXy)p8A*wd-;oJYR$WPX$Y@h8B^8hkuJ5 zEaXrenbb`pl^NG-UL!CQZAn)Q#!I}R>g@pRUepc+!hr=1yE~tt#mr|sornsVy-5Y? z8azsC#nBm+7*{xeDrXjom0iRWv5^Lg`$hGsFAG%Q^#f=Yb1WMf1CRufL+10m-|f`P zlUfh7S4Rq-m;FQr@2T~uMFm5?OrT=cC|yXz*L{S z+^o|5B1~L*VtvN<07uBDM*+_bHA8~yxUh&5EQd=3_boXEHolub7q6eitU{JwpwQU) zC6{${s+ipdL{ajxZkWu5nIq79^Ye=8)KY;NrKvyr#DJ_ACnCG3U%&PiBY;{n;&(#V zQw|7o-b;fHL0NZ7zXj9c)ke@pE=MHU7m@74z27SEU&xxNIfk>ThXu>!>N*up@PX)` zYoLAy2T3#6cF@4Sc54D=qAacQvf3PagMICR%8~dcC0~m|o5Uwz(ZZxR9A3LJUJlJ} z2fp19-0}*sx-82wa$Ld0vtYk|R=OCTxWBu!-!8cmT&k)};CT&{a^_xrmJWMZMI-i% z5*pU4#U>T2nU-uXu@K)gP^E(>yz*g0Fz;?6?LnZ&c400+1d~_w9e?Ppo1u0M<-xH! ztva9J&gv1z{t@D=j$Hy4)vt!7u*;Fk@MJ^O38B@P*EDI+rZ-R_+sGgcSsSphs7gsuIE37QRfdw}914Y60Ukv2 z;E*0)>nA~G`^!hP@ap4&PV+%<10tsKX3{HKcy8zu71!$X9vHnu8pv=D)M5G3tsv5N zy8$C66NH&a*t+^WcHqjM#%Y<+XO>-2-xNkhxm2X7d0)E!zbMHwciG9pX57bA~r$Ua$w72?P9D8;b&G*(=T z%Vj4-RxNA~6yJG~Kk+^-?4hDGZziYue|)7E&Y>2l=Pxmu`P|TtU3$)eqh^zyRr8-( z%w6Qp0!9=pB5;oxL0E5EE`fM{L^uK`4YxxZNf%kgDQnRSa84A5?Cy>!MMgtHR-zm) zyNBOjpIN6)%K>KMg^w9jIhmkt;JuC!I`a`$bl~NF=w5$l5%aV0K!4IipVT#QDV>%Z zaT@HePr>5Jig?Zdg*ZiKUrh2E<6NmMlG9uojLI?{&-zKWf>PAPy{sn|2d11_&j#nU zL>CME{G#czBdaZqSuDV($d6l{te;4xcJY_bPFGUPakN71tho9bX-#_CO%pN=l}BaA z7y6&J<<=tuXROK7W*hoGR7^#|U;~!`xuGBy!tA1&U{*|524bp-ykxwQ(T>unN6gi! zp)qA~V}7IU@u=gF8=IO)$Pd|Te^3@ehxt>P&y4jU>#r64-D75A;rzwRsS?AiODkOn z3FFAfNNRp@XRe>uor`PjT?uG(UY)8<{fK2^Ew!&ciu?M9n+xS>qfB0t-(Ca;G+Mxa z`{ua>-Dx#gE3;t-JTC;|UF=K@6@A{G^vl!>YVjTX*=lje_RR8;?l^Jk92`t0E-MWJ zT9*=hVJ6_{46Na0rxs^+4NGgEv`(}O(s8irBiGx>#5FFGP*ilUN7brC3I%x}1-nUv@FAro00A>axku?fKb~_LM=V?08FD zmWS1lqm|CI*s*M=(8NfYXOhIN&4=gh7SMP7=Z7n)K&&wa2Z2aL=oC_az3}rYXbp4Y zH&Bmfr^FbmQ8T>ZpnP*~!A!yZ&{5VC&sivwEDd5W9JU zWGWZpN`Gbhi-P&@ROmkFunk~7FWG9lpio7)(f@>y2O~qgtgRK8G134OztN)mbzafY zxzCM|_U9Kt^U5!ihAsG`c@Cs()*#G(6*h;O>AOW^J3EcPx>4McRr7&_`EP`gnCRk_ z;z#oifYw-2$i1eWu{PPP67rw0il=>ie6p-m|AqAh+N3}9#Qgoj{|eRqn@RT%g7IH0 z`~T`}{9D5Qh7?f$Ap+78jYfk5dJw7p-~&Xeb0el%3O$DSK0R%QcCi{5jT01pPjZI; zT;|x74&3R+Ox#}oy6&aNF1Xz1yaK`0kU>jj<&qSoC=Jy zRZ}u4esl|67C1_9o^b%Gr&506m7DwTuWnBIs(S*>dre1L)0fPZ@?w(A;P@Vwbn4C0 z-VVU$bM}EMz6<|LZ2gh+sc`bEB?PLcr=ej-cq0gFpl&q+pVmwsrnIDFfRN*<-B@So z16ZRahdc^5Kk$Et#`%+l;lE!>{(c8U#Qk3Sj5pAfmw&plvH}U=88{p+C;2y}t0wBY z_D!aAwKo)9t!vTspXf%@G#Y$lWJF(C9}tMn=0}zC(voofKNA1BhDs)J2d4wH^X$Nw zpZg!(dk$8=yFY{^+8Lc@T)w9i7@#mVUv=JI6d!;s?X9LvR7f;j<5PLN@*Am#pF zdCdOrI{0Vn;Gg{3gWnrRY^HyUM$eD@rgTk)Z0GFDuo%oTQBX?0@U?RNh7m{o%-(d< zJ1{Wt%GdU8{`#^r5@}W}jk8N9(HXS$h=NXkM|sZ zJT)~nX4TN}5E@|z&Q7|k2QAI+Zi40ua6DjoWc$xgAeIKW^>hLEaiP5a7Oh7EITV}s z?y*^3cc;b3C7M0#*#n4!*y)xmnr+vx>ASW=ZU4_<$C^73)?i|UsPIZsMiG0^R(9f8mwTg?~kL|5~)Nn-VBOq{(EAq$RI1LROJuWiH>UL&Dl3Wv% zhd2Q~NC|m1#Qqs>Bl`LG4M~taJ_Wi6$^(4}`rQVZ_@c!H`r|z_1t%MN)~Mj)8Uc8C zIH!$Wt`jG)x%*kaoo$%;Iujsbf?NI&WT0miusC_2G9pwun~X5Xo3AgDom-C;THDiZ z|5-1G%uy2RqJr>hLXSK;)XIlk>(q3cY@k`sHLFeDY`6}hm!9cWrkLAt$H5$U@-J~> z{Uy*aa?uS0&0_hD-dx2!M!mipcIkkKJUAEJyCWB77$Ef3+6M7BqB$UDOI!zE@21k$ zDZ7G4YS^4>Dh=#+_Py2nVB^jIBK-cz;&6&1p7HG09u?~vo0OFFxC%G;`JeNauJw=` z(WMR!UlU`F8aU@D3+_E}K?)Q6`dq}lul=R~Q(9hL#Bbj?iB|hB6aq+mfc)nl_dGft zMgj+M0#$D6TUC1Ao}Ql0^;5uZq&bxK8oK2ghw_Vl*Toj10)fvQ((I!7mnr{@vA~R7 z!kMoYVRhje z|KOrqbFBNvol==DsG3UPlx>c_hy~m6-_jcY20?$Z8u^#N*cU)3X)i5=zJ9?}I@;&}32PO3eC<@w)ULN()3dXtzqC_4Xs1~Fnft)T zvq^C8@}&xW@clD#&SeBG&Oo-47%!l?LKdPtTR%-agD?FM7`IXQr$Hu@!0PHrYeQ3~AKv2B> k7cJs{IH5oNewn|K=k4$Csr)i{KG?Q7Ka6$OuXmLXjF20qMP$=s3a%3Mx$qkWoquRRls$ zm|*|`Lxj*nhk&6cY8Y(0|ITl~|2$o^76M})Cumg%lEo<01PH_ z5c<2t71IrVms!w_AAc|p&0HMW6>9IC1^>*>cN@y}8tP6p`qlB5KC`1zZ@$|(sUk7u zxMj!im6Jz~WgU;@?D>WIB(D1D)cgByB&r|AO||^8vuREr_CWWyU#|R~aQ~6Fbp{tT z`cTzm_l>rO8yVC6?%B`y{Y}fvu6t5o0n}JqTlF z!{PJAbOYpu*#?&=;8u)9bMx}^^B-h!-6Pp`V@9Q+%`)A6wmw-dotf#ugU)HIYgI%o z-h?pfdJ^e~mECj8qFc}LRPQ<^rd!a{>` zSF*e=L0iBfV@snJJ?PXiesDNV2AO{D;pUbWB7dHE6gm6t*zj=XQq64kDytNwWgj~6 zh@KhHc>~1`L8(r^OLGW!Q=j}rqI<5lP>A1m(qFKOh3CnX*1xNcS_;4zzp?ESMA_hPJ;;)MQ*&pL>%N=Zl7quXiOz zC-;>ik+OmTr^GzLdZOUG`{3xPLw!r?9HT=)p)X48w2&j?pzoHQbV)ep#({RiMsI0; zNW;9n8+#-~eC1uD_Qt)9&#`dLZ;vrQ-4kmdF?NRRe*jL}%ck7LC|tlD!<_4m*(Vx@ zYaT@MdXYyYiuW9qJx?Gbl9`_t-#K@vgpWGHHz$PS711a5MPCdZ>D_8@Ql9IH99!XcxgFVK`t9*-n*mx=JG zwQ`J6?5W^e;hh252wCdDz^zUDusOH=0Vb)~)S0#E@fS31yWg7>+h}8D$@KQtTs~=R z(`d*A?PaE;qvMAm3NQi==0HD9V~k_GQimUMFPt10h#a1E)n=ymjE!Mh+AG{B?hYyf zxkNZWbcF@%bS=4cmW+WbW`uQJ6FKrjh!Rnz|6D&~B368KIsm!)TI2|sguu!$losli zYCQ(sro9wV_kCBp^hAP2%u%7^4}{`AXC*%FM8pQbKXlMc`zAISX+%S8roX=W=f-W6 z$>#WY+6)Pu=J5F^k;BVEDN;kd*>~MP)&v(=Owvtr)z?m&-i z&O8`1l}>4A&XUE~8}!7ZR)2BO7@M1)@38f%wD3c62A>?i)IP@P3{kO}afNfO!_Z!M zDRQP-d8jdgJSI$_&1%-ji?25dWiz;Sb#)zjr-FJKmpc~4hWU{iGcTRmwY0R*MIDgu z3CqHLM(ZCqGZ&3~!VzKhnIWTL+laID>7eJBD&V5GG0Z|F&wZRFUo-t8gPJm*(eE6c zRoYAK(ak8LM;S}`fYO++S32+)-(Xq02iGx0oQ=&rA*?M~g^F+Q3kIyD5=*ltL z0vXnKnCA_|dP%J|GgAj!{VSvs36=@;53SWRrk^4=od~o86wQ=X?wEl8wqy4BP^8@_ zxvF8a3Yo+;no6l%&BX>jt5ku|DfzsXJsKgD+SVF3^L%Wx>Mlnc1z%6aTspW63_ng~ z#)hyQwJZ~j@{TC6kCx@ms1mI;RWdb)_>UAKUtK<@VzRBXsy>o7t;abwE&xf);E-Iu zQU@X;_R@OUD_ydlbmCjfK9g)?t;|TX!puCz^*ds1QU12fH+%`2IU+Y!v35)EtOuc)8H6MY>Na)nlZBgoi zPR*_99xqISm6Pk`s_^)Ha1tfQw8#>rDTf?sQ!$Xi?yngxbYK;{7M?YoueVzwhP4M} zr##b3Kyj&K33HdHG*4vpyDl}Q1oJlfiSiD0Sgd5jqL+^-#d^2N8aT~&p~w#s_%Qpd zu$goT8+q}PVuvZ{rq;=n>7`_*Suq!yUA)XJyBJBY71+>26qg>T{C8w$z=BiCBD|a1ZrKN4%hb z^l+c*PZ1yj2|qcC2_ACOk>`>Ho8;!Hu+>ka5zD6-Y+=={sd7j{edy*ge`&e>kk(JO z)>g~m{dFpw`u3fxZIT$x0z40-rDOM~UR5J1(*fD6BG)5{vt2#Et$|xi&dOL84NNZ& zfM^#td&=*R114b$GiV?As)`CvC3WN=p3$!mMK|cMvJbKdlhtIrmuS8h;M{jLdr$PT2%n)pjOA*F@+7n~v{pP^1t#?gcJ*Zj^!y~%l9nASmj6id4ZHwXe64i~# zDT$Z}l83{kh4)rsT_0T2NZ%xmYG-NgPSx|1X_ZZQqC-l9hbTZXSZQ^4RZE8QW(u(;BsofHMUSp}+=|a0^vgV7lkw_-V?X^V$k<=>W zqfc9DXL_V0ZXrp$XkyVccfWKM-P@bi`nou*M_#;x`!21aXk9z;;*9m)lv3vPyV{qS zx86rdRGk*r!RRpx8|3|h8EujxtyS8CNC68eBJsvLn||}DtLD^kH?kI6R=16L-9?VE z(@RWFynw*DHL>z(EbQ)O^7uNaYV1#jp!LROE^W>3RPMJ!oJ`CteGs`m5i7Eckbk({ zFlej%w|6r9ouwI$oF}DWbN>6CIsWoGi^DXj^e|}$?HbbfA%&>TDRG7Dq>#;)tG2e( zD~+O&?+SloB+wYN4pol2>PR3$ot~T2Ef%q0YB#H7GEr2aCWw@!YMOJQJU7YSe-;yp z^MCQWor#fEfZP2#7|mUb=e7%NvCYu;EjGL+R&+3&xvPzFqpQV+pH+jL`7R5K9|adR zAEur>$gH4lG9?l<{SD<&7P}H_O8di&M(ko=O}q7^;0AP9IVbywz|aqi=T94)p*_o|GRoS>Q+vas@IMtn+ zp<_=AQXs5P2EIQX>lv8ow-mzh>=)no7#lYz6qc(Xj<6|=91ck(-?1yS*)ZjEnJX<; zc7Dx`WX4*y0N?KXoS{zc=&jx7wZzX#JXP|3_Dm>9VH%UpGX0M_w{SoeB37=l#ZU=y z3YGO~(}&pLnWWxnT)2)A{exr;mO#`817 zGp1C{ELEh9l#dRl;q|+H7P^kuc9p=7KL{NuzxponHQqh{IdNj2&-o=L_bws$;+4Z& zo>AXUM6;k7F|5C7H3ue`f`%LSY9||&{ZbK&?(Q~ZuGj& z`P)Uo^sc3>wsue8Kd!#+fw%I<9`T&Dyz7f1K;af3T|n~Y{VG$Ng%a2f4o zI8MmylxXY=tOydtW9evHx&>(O>hw)Bu33235yRu9IJ=)pKN0EaMVr z!Wf^d;{?j@XXG}CLTj-D9}u+Gt72K?)+e(kW~Hzx+=+ZiVQ&v@v4H9%35%f&+LV0k zoG5yCW>}1lb47RsrA#I*uRZE4vcxqYtaee`WS^i(oc}C811hVpy(isPMQ)uNe<&>G zcT-S>;#N%_bml~zM%KzA4+3vI?j0%RKhGRe*>~2YpF1Tk zxSYS(ar%4lDcj>}$Mv&Sw1$I1WV|{lxY*q|IDaKEK5KZw+<33@FzY8w=K;Nwrja+F z=^bGzo5uRL9CtnZ!+CM?OAI`v_*U8Lo&Gg_I`#%i{bd)6FA0jvT}UUA#?bxD+p(uq z6pd;wzCb4)D$YC`@y4wnWy(6%-6Om08TPcWwoBC82KH!OHCepG-1yz|C5!nAhW1eZOz~njWu1zYem|jtag>f7*?eWVzea-h`8#no)Mi(&t0#I| z?GJ*zEZA=M-OrJpce3B16R{nxx}B-=_UhBtk7d$e?XMJLN$tCw{qMD%Ud&pKoPCD) zK~Ex)VPSlq1-j^(f#)AUz8{_mo*v^(C*9VnA*t2RmJeBW{*ghA23_z`*Xj{-v<&4I zGd;(rfz?+)#hA#anD|=kG7ZC4A53LN@u{FuHQAe4pWjzLm5_JxJ)^8J#a7$zR`WP! z)|U2%{Si*nFOh>evO_OKph%($DcKukZ|0jZX+ zDQ=REMO$v@Wiz6o28LoK1owG6jA59MXf~(`aW_w!(tb9tQK|8Os163LVNOjf5TEx^Vw!;@vBDHK&`dE zCQP>jR3Vm;pH~87e>BgG-k@(2Pnb4P+}CRY%YU+~VYfo#G4;*QP0X7g8y9mx9sZ0w zPp3;k#r!t1Oo~2@!<w*$bdSf$@z!-)jvsfhy!?{*@;8QXjSnXxEVbq zI(GSU1arBA7&27h*35?bm~(05L}5M`v|Xxi7eGer!0;Aa<4Z*p95$1k#lxnbGhSbd z|0dPG+V}QKeIyqkBNsl`Z?b1v;h!G}Ytoc!K{Es47?^Ip{s?37rBi~`oUM_)bf?C0 z{cS2TzJ31bba3mb5KU(e)Pj9!rDmsX$XOa8<6HpcFq%b>c48h44GlCe{GdzD+CB%p zrnY%cUrZ-2hY2>yY6R1>(=7NvR>)|LseORaH>Kp^Y$KLn!a=pL zz6GE}7d-1!SVO%-J!^p=;wM8pAd(1*y<`my4L!RilW0cLwnWslg!u6wjVt0#wBBanKAu6GOE*v18QmqyBsCtHdW+_^NH1q0Fr91yul2*&#a7$P14IR zA#BXuQBp{3nQ_u!UPw#1)Ryq0*bg5hyg5HxasIFm`{S}WJAIkS4l|b(>`O5yF$@Yb zXF_<{3fnUO<*W^j-slrGZyS#*;rwCZBxkaN+?rrjfyW=^(dg4oQVPUP#_F6EvG#JC zmDn!Qyn3SXj1-q6Th|murH)#HrtR_v%^{3lK9j-miH0x?Ljy8pim9B_+78+?%gs)^ zj`qq|5#ix+L{$N=2EH$ZM6;P@r6zKtI&4QT`ktdl)&~#*KSF!>TI;|xQBJ(%naTCE zL^fHXMndbA6+mv5FQk}`lx1hZ-l`@0w~xpYA(*n|`GLQ~C4b(4kjuTaS-2n!@`ys| zPCj=TnVEU-wI@hM7778Nam3>8jYYAIpqg7QU?iwN4+12Ihm=yL4HRfec#Ek^7>=P| z+j&@wTF=LiiowOZO6uw~QuNO%BQEhgUzB+jq-m>g2IPAU;pj|H zv^%~={zx?hTJi6ro4hE<%AUs(bPs+~D?in0ZBgmZn zB{ek_pb3Qz(48)VI{`ae#=8-V)We8HmN~nt)rRqSe8S9G{Q1SjMR&`b!2nlmpVn&S z^H0?^62%Uc>dGoAL)wJ{Dgo8@Wp}O>+Xu$KfCtXa&6z1c_c{)2mhPGqLiP0a#v?5s zB_t#$o!%m%l6KOYT(c^>L;S%ZBtIc=N?Enw2n=R>QZgMJ+<%6D0M`GHqB8!v)qk81 z`q`|;PQJhPb9;&m0%v}LdUS`SRAt7~VE|924Gj%-FVvJH=EaJq@ZM+We;U$aIa!;& z$s}Vw%^0S8l@p0@*4mnJAR_ch%EY2zyW@q3A zXMV+if972M5kcYM;bz6UM|=aSy}XnQRLs5@dmiw)!#@<4m6Z)$tGeaUS0v?*2L#lY zJA%r95_d$VC4{#9xvRyr@U|WU(rS5IGad&mb3_G=MlYsWyvoUuFuLCOXeAgRu^Tsk z)z`kh1?Ff#`yTNR4%|24UHf2@HGt9TX7wYH!`{^sF_T=@d_NWxT}lBnACv&3TmgWl zyr{K7NdS&%R#sHx)~aKww!jqCL7RC_Eb{Yjs}Q(Ik4J9a*#JGtdIcF&2OUC$Y&a@9 zoHvie7;CQl0Z1Y`iu9TvM!Fvn+-ons7BCq+a4w^!fxY z8A`DS4*EXGD6%d;*D$NKd;^di0=3WOvz_Ry1lxLh^}xDEZkD`?0}aMLDAdMaQ(<@7 za=n86MxzA1U8Ypl8$#&fyT+U2MQn8d+4;-;J?4vZg{8)=;;6`4-WoN2*~*i6zF{$) zifx!7;WjGs`+M^qijWk%*+uil$eMY=DhED_2AsjW3hq)J7OG(A#XeDmU7#+>PA#v6 zNm$CKQ9$KsYADe;5zI~BM;9ei<+2*sD)xdT*tPWQ!Z>rH(TE$I# z0IV8MZwE+dc|Hs_@ckB%V}8|9fXd7=hVxb@=ld6{jcO-wp{_OV$nH~{%}mnX=t?|_#MDF>gi|kJ$_5MY zV?Wi3$^Q@KPQ?K%`MXsGoy_2ed??P=5I7$e*y=L^_QGZ|CWe1iJw>mVY4L~xz42WZUCdb07m#D zB4rC~0G#MvDNNZq*b(Fki<`ET8SI&%R-msei~1oWYJrM~>L_=Qei}&{CNHpx%#qSZ zTrE5Mr*^>Z?*@zN?kB8OR)n+WdM*}TXt<5_(zDMy93XikeL|ZRqTtv8I1I zRdxI_2jnJmVDE_O^1~S5K+XNFi}m&eqOrKPOUTt172_17VO~pqL7G}IKDnvjVQiWiRJ!i+anmku%PHG6}UUOSo-MCQvvFJ!^Zj5t89-%~oBC!lt zE9TOhl^Z(IeE*u)`nF&fJ=t~-z6D`6jte$il-a+8aeIS)H_LNLkwod+Q8fj;GTHx}Z3t#-en~m+OYO{rW1|<%3~>gCMoy(?V(9ajv*# zLH`g_Vaw*me5rU&9|$-pyQ&*?y=!QxlhIW()>Hm#)0$dgydby<1wSZHf-maeR*sYw za4H#X`R~v^gs{D~J7JCHv}1OT5)g&yN3>{K^}~IL;Vv5$68U3pEd(ysI#xWX56Piq zAPc)&A{!h)NenHH-ZuE8b|q`PD{6!sPrYY8RP8-dd%+zXC?Z{|y-{~=9WKVKi9y+8Rjhv}BCsI0dq8EcuZcce57|e5UJ6N{HVg2$P!IyZeCv%eh3OCa31^Bo7FrP_!!>S8ZOh`UdBKE&R}u7E<A!j^yVI}c^AE|un}+|_!Ql(7@ z&bhfC)|hGp;>7EN$9&eAhtl>ISO^OA;<j4QG7nB$H!{+Xu`cD;57VeR4 z$UM;Z-e{;Avt)W81KB#;yyQ~!`_8~%Tjf8S`-C=$njsK~3p>`W5;ZLY9;*yfQY=v{ zrrXd@kK76`@h^EXi!r%1%aI|{?6rIgfuF8EkEh(QGzT1MF+xn1pgr;Pp`xoW*lu0l z2iTXYJ`Uj{^A1*#JX+n7sKui`yVa+wpY%qPYD)zca}86%ZX*_Oqr}aQT{w1?YX4@2 z$O!YM%}CX0t$Ay*e(XvA_iu)TU{_AE4vm0-rM~jVAqs?W2ChU2V>SAV3^d5@MV!i7 zRG5*U=$;Xf^hEff%U48=Oum8r`uk3K#aw{9X*4W2D;PA;{KNZ=PT^38TPG9pOhN+l zsNnPQN9FVZU^4upDmb+pKQ``&2?7{czIC}v9#WfeYoj|W2VJ0fG@3cvO0ljQvj;eW z+U#KjZVT+!tKj0w4*9xVlgPG^Cknxcr>5v5!X&B08imA;%g9W%{UNn9*QfakF(6vu7Hjm&-x1aKFt2 zxB>5+sZ|;Q5TsbBf2hH0G|Z&QP@?8lknc&7De5aR)NW^C?F>kiBVYUJb%pQ5@1!;E z7T8O55E9)W;F$idL*77R;jPX_(_9e!EoLm!JIa$(ZMF0~do<9K>O*N}!!u8uI1%R? z0dxa4P9SI9XxyQD7f6PTA--%VsNWvZU%Rv#=YdMnrgWujY_!fUw8II4iNMbms=0+? z8v*X_?)oCgNIytj;|8*eqAZB}WuPFyLI@AtqBPuMj;?^c$x zTe=}R;%lo;SPY@RS{j8^F@YrI{@gUeYF^)A7z_>e%wiDek|=DVfVY}b8ktvZNG26p zV$F_}>h z@@n85X6-o4#7sJ+TXbx_<&HTE(vEb85SUqki%o@bRQ#nA-7;0S&(<umVCBd{j+K228_d;6hRZn}O*E9tx?K7Vz5BYX9p&zNp$3i(+i zsL`-V#_Bi-Rk$e!_OLQ5ct|R0y-C~{=(M=q!l?D8$;%^}mFq2{K8A%>rShHio5`ve zpeTY0Ct63E$b+?CBoeNJ=<`+u278l!f6tTjJ`7WFxln{3=>Ip0Ts;L;$qgPe8XM)_U*ey`d_MjCZD)h(LW6C1e$Q%QI^|b zhJL%uljroMCChSYoHeVb2%o`XvGRZ{>O|@&qoShXO&Zh}Kl4IBT|=E$yHPKlPQOuk z!lIEhfv>C#Qc@_p=H}yLbqVUC>G;9H!E_*HyU=Bneh^CR|60F6%a;xe49vAr``_SG z|4_Z(Urq9Vyh;8)lUbfm>7e-+?|~#KDf)S3UZ##IW`*gH_4rU1mX}Qca(-weS?U$| z^CmqUmmaNB1Mne$9@bM`Mg?hGbmf;_Wo_}Yy|1;OUUj$;_UOXJk&zJ`kZ}YB2TKp` z{L%>4s`H9=Z&-Wrib_ir08BKf((7UO$*Ho}6A%TsvNFGueblBe_Mrazb$n(i1r>1}IktH(TOn+&keGs7jf%I>^IUv#dhtjwF4aryJ^vVt7E zS3!ShSF3wrI$JO|CRjw{21Cm+rdrFcz4MVerCYjGQgLDCR7p zz$E9{*cIk(r@MfAq5y3p$Q3y+EKD!)iqyevlshKyg@uJNc~zPK?7@|mnl$uy4s#)M zTgO;yDTAByh0I&I)~vq;eY{SQ3_MMTsMKCV5Rq@xdIbju=O2-=G=u;s0D+zvE=bFG z8UptKI3~t)2lsd{qI}4G2IUe|kFh8PFi&s$g?}53S1c$$-*mUZyKkt{198~M$f(3^ zCk!U>-S|P{{Z&8|Y;0QO(4Y=lF^|`1AjM9~mTHGUBFF7%AHBl{nP%Wg-3qMhZtD#3 zu*B?7E-+YQ@|p32Hv6lLSO^7(1L(bU?f5@j(ET3^@Na+`rSa%1f;K5~BA)U@3(dUzyD zWt0BHJb2cY`(R0)Bw-?ASXld;Dh8`I@*EA=3@A2^Z37=FuN)It+CqeKRsL zAniFKmk6dMLc_PpPbBN-DFKA^u{QvH-lnI&-urKoQF;C~b|pa*DKb_^U5lLWeqlQh z<#RaTz4KpFnzC#I%?$(x_k0I;l~JCofVc_|n64LCnUFy05Ty66L|fXzZSkIiGepCi zd~A}{F$u~Sy2I@2eb6diHtwFDMYrj4B9VZlI1#IS z5Mf{t&n}l{t*++UJe3}w@udz8sb{1OysoIK`kH7#nbxpc*47&QC%F43_6EGbLYH1zWojV?O#ZO{EB#6mF$;^XG|3 zE`<0RYq5G(e-f$xE-SG623@%{%qT-GAir`;k0F>N`Bhr&eKZq2W{AZ^hu4C!520x7 z*+Fg|z^n4}vw{tb;?weCYHZ=og9PQm?xB*kikXrWHC{d!yCjZ0O79D0bh zE6Iptu%_T`z3hb22M}=;MaK$tNo(_SRP!%O*F@}-JK=)OFL?GhZ{nbSLh+1%73TaryQJHYaU$<8>Q-n`lLarh-tZib8xeX zvZ)q4cdKFxd@|}5e(j-9l7OnAB5p4qbmO_Hd<17 z@$Fr^f<8qQwPJn@by_Z(7mFxBrvgZ`G%qJXUH|u=w9B%;_zm*aT<{T#HPbFx| zE&8t4Ax0|bQ#NI0xa6a!Ld}O*KbIqLgwqfPZR8WyVQXmhO?E0ElN+DO1Nc9%T?%^` z33qBH=6WH+H(iQ*>b3UYkn`b@?|jdeL2~q=F_|JM$0Vq$$esicw}P%w`6D5*6nsQ8 zRWetfxfXe2h^}m6nAquhiw`7Jil-N0`QLomqEHC;UIKd9p0|#%6;lVWM)w{aJ+-++^<1gd7|VsQ?#r2Ca$F9p5Dll{4WhU&|J7) zrEW8HfJksr&OocS<*kyF^nfSHUTCw?=l=cXHO3b7uNqmifl6KC!~sMe-h- zM4s^)cze5NaqHv($c=oti&aue;aiwJ7 zVt#I-ij})S#c&u@A>9zPFgaf=9L;-amFHX-6S!2E{al;T?L`SVN-|Ukn^`D8I(5Y- zNG&nXGXXSoVa98Fz6!L5zXQ?w3o$^M+&S@i*jXqs%wqKI>}Caf?I}G$FQQvRfuPD8 z$F~+nKTVH6iXE(5LOi)Rn{<22-gBU0xOvaRLuScU&LI_K=azcu#1NnB0zNag`@$IA zkXRq#MJm1IF{`EGu5jfMu!t)Y0KrvQLI>NZ$R9~}u+lF-F=rNqt9A0h6uDtX{^_zt z-3NYs$gR`2&}t3x4N;gKbX&c}|EB8zSo}Ce=s)-hOf|9wFVE7c}3O2WD}6KTvlW+CvZ= zAQ@s$9+OF%IxZXn??Y?%uU%Jn#OANIXw729L(EYxa7E@;RuyH$2rnBV*x;%|FKZ!- z+MPltT&u?ic=?Gw?XHj<8lX}&bc?s10p$kR+7Ia_PlngntS8pjY-Ut*N2C&QeL>CWIiOPru)x;6jj2h2w8qJ%o=z0+5@x2$5la#QgH8 zo+Dw_9Sf81u3&h*m~Ixk#K0rGgC0?(of;iTb)sAEam{s=dlPjy#A~+w>Cm2=r=d>| zfW)A(`&mvIX@TC%D;CzekWxL#7KZypF5}BR;Lm%9h}%EwM{JS);)^E^*oUyaaO{;> zJ&P2*el0)UnX~aOo~z9E_luZ5?>F?I4xVWD6j9KtySU>GlwZ!vI@DiYtXn4FhR4$Q ztIXvm1=3cdW#_y;DPTjiQMYXfQBE1k4ap7-xoSL8Ihrz37)~ih4Tatf$+fHmNk^9b>JR(M7OZee_0x+bZrmU=N9nh!?J5lg z6Z|O{3UPCquo_>YOlJ#+U=7s8OnmsGBXm`EX(^pmE9g=Fm=6TlT{hdYU$5l=aC0ZN ztn9)m)aD8uf!Q#xosGcglMIBf2eB$KE9(Us{fDc{eAyxJY_pn72R13p>%v|r#hC6qYA~1fWpo-oE$NF>7PUN>8zkWYc~)MP3D&cPIK!E zw%swTLRF7EhIQt$rXYFs7Pr@J=0iF}+?(v%S1B&M_YWYef+;ST(+m_El)Qi%&yAF& zilu5td0u!?tG3E!<-(&^?CHxIi2(Cw=^1-90jdbQe|r%a^|kl*U>wUPqMt+$ z5sxskiQ?`9$%-XBni4IbtLEwXtovnXM}^mZ{(TPx%0C{vLT#XUkFJR1FalY~?TuvMmV}nBhh(x9=rff4h9Ls7q#0W$ z%fjLva3KmWE&B8bgP)e0P|o*u>{){bJ=5%it%t&9vx<-ZD$VNEIt7TJkyo)|8K6O> zpm_bLya2M6DyVAnxF`l^QYC6h1?RTn?bc`9VK%j&e1mdgHn*0(4_E(6)$0~frMQmG zcJp$Lu%XuY=UmYQQh>nCl~(MRNk>p_P=4k|eIXiQH!DwGHvv2}l=XAt6$qEJ0NY8& z!g+3U_N*&5K>-Bb*e{98TF|%zP*`SxKA>oqqdDY5?qlgir6$lIqn~XVN0uqD_p;&>7 zYL+3{2_xG2pHwG*2SCic#ouX;gI4gdug`K}03gQyDg5Ctnc#mbf#koJj%caF&;)6G z*Mv*}mqCp*6JD-9N_L-j?MRg?yxo>OM>#)@QvcWB-@_VR?kfI}5r4JMG|=w0Jkkc= zFSwc$F|SF&w}e^ zRQcQ+Yt{nUk_@0Xb#-;K;*$^mTb;;X3R{q7|Ba}e+)e|tD37d4?9%n~x(D#*-`dRq z^%;eqsm^IZ&V*fQbWr zm8*braJT#_FzpYeKs*x9iNyNxft+Y`^GgUW2(kR;2E3sl_etWky>z06Dd} zOe?6j|I+{ee>3^;FW7j#8BPf=w_7%?_Fu%%ParY0_y1%X4DA@+ljnsC-5|I=mHcWM zU!Za8eFpjF7~Rvv2Qy!#(O3Yw07-P-U5TFV?$<6B5LBKX{!}#z$P)(1Z=&Hk5W72w zDb8k>&;6@-I2H&yFghQ`Q5L7>oG+&HG|Q*Dcs|0q=Fom-tsRl^Y7tx6$?7H z1EEe30G}&@7l#gC1G+CiIY-A1AcKT?zVffm(uxRf;zG@!^Y^Y+=GqDF8>9FXL?Km0i!hDwl*B> zV$)-eQMY2{j=_g>t~CAvscZ~S6x%B2Ce{a1D>^n~9$-cS1+{@2&ceAueXBpBi7wP4}dQ7k7r5c<% zhe!Ooxu9nF+&?t{n~CbEzljMzyoCKSW{lBr4IXlO%qxFh&FM?1m|N*>*3O7}`g{fc zD?US6Rkg#&;uuND7)%@i>z7v>cG#empQ}BN_`M@BO0RaNR#dX~c;hku7gGOVdhH+d z+Oxk__3x8G{k^EErU%FXaaC0zkZMRtN$GG9;tMh(OwEI|PLy4ena`lfp<%UhikN|o zWUU(FcT!bwu0>+0al~>%$Od@r7`%&=esELxO|gsGr8XmpG`1j3&NkKE6y(6t-1^^- zr7tZlDR~W50Pgh7U5lQgk`iN5!LKg8h)b*!J{vDHyo}Y}+6Rn0MYBSTgMhQ(YHGru zdAm@cK5_Y9ri56#CazZkg4p^C0F>_n{1Q=4`T<@z`r@oX$ zGp}-h|3Kv$H2nrdv=coD6V?ymDcgmAX}ysTtmi-f=Doj6@c;29zJlJcSSdd|Itmmg zCtC2=9stA>Ylh4HQXzriN>+F4Iq|RNoCJiQDzRV=v*PRUFKD4)Nsqn?EdJ3A<+C&E bfi zBA_532@pCOgb-RNfk1%d-Cw*j_kCyXd*{x~o%zk@_ufBHL%wzIwbx$jd7ia=a#LT6 zb1(m14h{}Z?dw;KI5@Vy=iu0RWX~?}o3fnJ67ZL@-!%(AV=pJaz}r5K9D29??z(&V zxx?*`2RQop!o56YrDV=YDM=i6@$HI@Daxo&cioNOy+yeWbcXI zbECYPw}~|JR>Ww>wLHt4^9UY^K10J_qn|cid}A5HRbWkFXY4CypS;u9@nvOcO|?_C z+;O7g<`OQ_d57;Dh@f@YWo6u0XGS zT;TFK@S*?U{cRi^pCS+L=is=g@bDJ+RoRJu^OQesFHb1qFHB8MMNi*gnNOERHL})e zFjZEI25aF^aM+na&$`)Y-0CnboS}V9JHjAEW8*v5-sIBKjqs#_jDf7I?HA=A-nxgH zCn8xDeUmpxwH7sZZ)b$g)q0bkh&KBo*N0M7gMDB@g&$9)*!;FzDCm|umYOdEC*<~p zug{M4IV8lTd*LomMH`ab&@Nqh3gCkquQHOY7-^5a#C27B!Vqk)E7C+8;W zM-K-KO6KclTG?1xTaW!nG>F#{3O?CfUg4SBHd{R^XMWq7S}zvv3Ql}6G>(0Xr02&kx-hTxLNVzD_m-FV1BFk;2O-X1m=Ak{=D#Sf9xiAX$i-ztgQS# z6c8B53)^es-(zZkrKvH7xY^4vw&{*`j^5A0FB}Y_eH2c5mq5VI#t97K@HzY)ZzBWcsvpEV;ikJfJkxV_-7?L2wy@qcV?R z!p6nNj&=rVdXG*_Opt@~hHZ?$SzD(mov9R^-J6*^z35#!oo3^5oVz{umEZjP-7QY( z9!K=_^qTycn*<$c!MnEM8)L$}36))%0z7pU`EUKF3Q(F}US9i?D^$EH3BHzCKls8@ zlsf&j=w?JCr=oBuBZAu@Px>4|8Bekc<@v6r#-=}NnyaJP)jJhQBz^uf|bVfaM#pOEUf@J8RM|GTH166NYEk9`ab1Pxwmx2yzTOQpm znPt}|(^H@kM%96xjUP4exFJ7$Cs8k&^^6I#>g=t;F^YNy(^dwqS8HRgh2Z;r+9b!~ zyu!x#r!Nn1lkx0zd`#y9x>cTfqFs?+@y5l|RX0K%W~xWSo7|Gr4JN+4D>hdK zd8^k~TQo)(4DfHvsx?+lUOovjTq6VmF*@q>CDjnS*eXt{{UVZ}T2`ZnMgNE>kkk_N zHyL&4?&(QV7`oetYe9e|rt!)VrQ_h>0MCh2IE!wJNLTW3%Wi;Sk-hLzT zwBA|IL^P3B1S5F#qT5>3S4u_I!nL%tsQ#Ex+tIPS0L%!>@50R6#v7TLnbRe0Peil& z0@<7jeXnRZ>SDfPLxG$LZYh&SSL-kaqlF!os+%FuJXkCSo*_v~!EZSB%--34!o32)wD2HK56^K$tX>%%HBhGJlD&xedgA_pftV#6V>l27v+h)$E|zDJhK6i5 zHf?Dk=J*6e^HFE|a70)3wLB{|`qwLYz`^*&v25n{Q7`1T3LyRj>^*Z%kD(sj)k&lC;n>4E$+c zZ*MPdJl8RwQP<gGpvDX|RdrOOVs0gDtOyR=dfFURi z2jewZtM(T?JNY&<=6a_C$ESJ=Yy};PimaVRXsK9PR+G8Gd&IrCoojIAa?n+M| z-N>iu{Nb0GjF@=oj4qE@sjlxS5xS28fkg;dO?J%A2o{mSrrVw8o1OvzNGDFZBw*N| zL>NzjjWC*wLWGtmozlHc^bNCWoHS=EVbfSxJfbTQga(IUA5Ky?ONUYj%2`5z9^)<7 zJ0q6b6{YV7)p}6(J+9C<$r-99rtAa`(13Z_q0q8$YZnihwjQ-I-J?${DA3qolJHW= zDXve$r7&D@L4)ZIq0kva>{MY<^m;z!g*vPiF%LqYImM63_>0%4J2x4lunp;k>O6`G zi!h}|3pTiRVih}$p0<`IF5$jM3NV?Af`LPZ%HJPyGjYdinNvkYg%{4P{QCZBLB)5Q z5sozw2G({A*3Wu)iL==C6g97RZF&L%bu?V4H-4#8BjV=eyRL4*6*S0eDhpSuW-C5@ zi^V-#s8}%7PTyT-_);^WHRrrA^J`RZ@F)XV214XW;3$HjEF6Ilpn}j!jD`GV;d<1! zMYl?H9H}P>Uvd3)?h*J{-B6W)SG{*;wOB$u;)jT^KjCk6HO<-biECS3$QX?fs3i~Eo!Ib~L zvCEWKcSEm#<;D%0`??ER#tD1kA>~} zzJ%pR-*|EJkERhANm|oW&!tv#T-oR#$5^_<{S(I2)i+R3+SCbO%&GME^E=!EKSf=| z+=xG=*RnSy;q|foc-Ar|=5y3@)@h>~6c@eD0;}DEeqUcz1^ZKj>g5{my24y3*jxt^ zp%z|;#YQ`XlN_f@s^?U2eje~h@((c8G0)7@g;opn-y5?c)>e8#I()OZB_$;{SH>ei zAmL5aQ!FW$CJpp1ie(XGFjNz5Hyv*KJj#V-SoC?NicUi1FI(0PsqD{*=?BIv1z+fRozhHb-vbrcmjAsOPV)(vjK$) ze;zNbPs^NE&JVUFc=zDfhiTEI>@W#7Hg~K#(Qm5EDqdw*v(yQ`^JXIluM5!J3|14J zD)mrLwBJi+wmNy3+XXwy$aHk5&}?+0E16f%!&X1=Mh^}FU;cp5v9?5Ec&uXF9!O{l z)WgpwEw3@~Y(deCyO(7?8>G=i+b69(eVOaGs)B824Nz^`33XEN^Xav|6twqxW!^f* zEj#O^o>}4xyu>HH3q$!!To^TNs|$Q6?dOD#Y7fNPGcxV!e)M3%{SO%#f7RpW$&>an zvbZ6z;8@VvlRc!^n#jMbxXohkwxk=}6{n~%X@}jzfrIHiHb6S1=gu$z;Sd|F_9G;* zTPR~A&MKc-7(JVJIW%4Fg2jbzjA|jSA8Y1I*t{pX2UKR>lIthNea%RPc%n<*ubXBO zu#m#R+|K9zmZPB|A;WDn>Sm9R$GCJh;p881qqlOGNLZNrtMyLEYUG8P_F5f zTZ&qHRQ7~;r8uR{8Vp?2l^w}{d7HRLFD+AkC&eed?YrNZj&UpadRsNjI_+AO#`xDn z&F5NHwV80U{$(+C1fsL(`9Yhk+kwl6KA0pdPQur8kni2yLiCI$ua^(xlO!KK?Zyy3 zdGr5Tx7=$zMkgB3>JS&saXG?nz-&s~?#vbBKC7lbGB51EK*=!P@ws9Sbu^a9CScY1 z2)ez~#57=tqsp^_ zTj{Ah$;NoOY~5PXoc-MKcr}%Vpcag9x3OuMWVgEYxw`eschg>C@oiJE-&A|BT(Jh1 z&)t#mkW13~YBm}thmZ|l8&ARbGP=akHQ8RdigBNEpl zcMWVM#u->K557Fxcf`3;u^JTd{fj{x^{8nV_>zU0P!bL#f6NBGfq99914&-67{41S zP}1AfSwsuZ(3B1^UoVH_N3kR@FHmRqV{^;w6NBd%IZxi9Z1gZ~#dWVqpn>h@z z%r6r4KV}3 z%sqVvYG}07FBPvxk&B2z!Bcjd7Ne(hl8cuV#N3O+XXY(dK*C-lZ6Re>ACy_GEmEPK zwt9LtS%Z)8>Gpu2e9x-0#=7OGe?;A7%t)`?chj|00&5_x*zIyI%1FKYxS*`fv?q2P zM+b>P=_qRzrLhT*8++>Zte3t0fYNag%a_@ctLUk281eB#eee5XvXlMg<35OrU4?*x zHeqkho)$25HAjY}TLIBoLRhxtsH(d8+!I(s2zzn(52HXZx3JWZ9uNjm3n$ zZ1sdW76Ckk*mfe0qq?$kIc^U|T{yKy!3 z-UFygwD5yUbiOp5@YsJ5ifpsB8+I)ovjgs$mjr((uNP8PbvPV~;#OvF-yCVX^`?a8 zuY<7+KH1Q#-5+@)f<}cm=zu?lk4}c#D<4qS<4?&5Tl~BkPPIs$zIy-O!TqYGw=)%9 z5TyL5O3f$Ri)+#S$!+DX`Egwp3q~X35+17g#~JZ~^*M#w!M&n!N0JTJpDSz-7d*yo z4-g|gUA01HDn;k5>e|J=p`O^c3aA&%LRiw+(<=uoz0%tR%Hf(belyN_gdb9Ha+=TA z8!hq4id97a5YMf&XB5K0nn7}MNJP@Z0%g0mEk*8vVgD3>`3~2crQqIPM}^%m>eGAw zYRG_45KXw_M)i{t+ep6BcuS1-Tc)NLafeP3gT}gid5w=zAplqIXc*EUpi{+GbBX#T z;dPaHwoz;-7l^ZcYJ6P}<2I1~yLP89NSofQt>qm>x1k!9S*v3)zZSQiIHTqif)>kF z>k&ito^77b+ebU9j&tF@SdMXCJ1_X`^ zjy%)cX{>R^p!J33ZlcUU3y5?RH51!9PsC;9k0p>!?JPB6pVT`Z&~=S>SyH7PjbtHR zk#v%Iqa4%Fv)RnX!hdy;*N50|X6SM;m7wUEju8pG3 zsP!K~mzS5*0osD61^D~>=W!|yy)}WsU>o6IFYQiJUwoEuSc3+@1_LbnnU;{{hP18u zB0eiCiy#0{2V9F@LW)rJ>({SWH$g7C8A3y{@XV#bTMsyyb1Kdq83gEwHtJ`q2GyxJ zE{%=5@I*ZYY8~y%OWBygyf#yF+n&HWdKi%nL4a!Tqk@8hc>wvNYtKrPByFLfA{&>~ zoi_tULLWbi(F_jWUONP`Zv$)ufSagqa^k1z^a_^&&czHzg5;G7;YFNPXf&DtFsmxf zb+d>#W};nr&${1~@c5{JfPfAq!&e{A$W#km;qP>t>MlUw4N}j&9`!G<;!bC*mz=c19Ou!4*ZcX9AtQ~u}#tC zKLFk*Kjj0Zy6YSF7Cijs1h((5>j^~Y=cs8N^Zuypl@dVGtNSIt>x&%GgQ@1hmA9mw zZ=9BEs}W_tLhR13h;hT_dNGKTcLz4lW!^Xyr;u;>a#=ga1iq_B&d_k6QZ{aHo-sab z&KD_uTK0y>?oDlxbgELD+}Tu3f1*@C;E=bLw)-S{)Ou# zDg#Q8gANpAja^dKrw{pdN@jTl2dSiTJl5ri>T4$qSNBnC6HRxM(_rGk@xWASwB|q;`g0!#W4W)74luP zB}p_=8-9-~&Kqtb?(yRvH<;F}qzs`QZ6$@PGd1rM=ib8S?$hm_Xzb5#J0OsWilOVQ};it=DPB?Jzoz?u;bWE2L zpmJ`_=RHY;+=ICv5}@UEsju8}-EnIZJfjo<%g2?TK~Y-YO>%QIwtTeU7`vI~KCx3e z=+ZUDQL45p;Y({0f03`s1l{H+AL5r=t3PR=Mwx$u4RTdBxNB`~4S+{ZJGb*+ySfZB zc3m3o)`bZmijhGP=WyUP>#sBN^ z*J6(J_(jxR`7fZS{sWBGKTi4ga)5t*#JZDins2TaAw1AOdiV`(9Q4@RA;r_DmFul5~UAfScIWXxYXj=V!#;v5J#B>y2n!MZK z61t$EAo-vcc?SMT1#P8USbB0fa+d{5ky#&Qmj1=R5^1cDjv^@GL)6md5Y(Y9s;fAGF4_E@5)01>6(F0?83n0-l_9yrv zjrYEDW-$e#)i1W-f6Ojm#Myav92{@?xgEez{k<93zdrhZFP`@wclj%;;{RsiBOsJ+ z0p{s26tNG^IP%} z5#J<0BO6!movmN$xc1b|(-TccTUz77f(@&o;B)JOO)9uf^DZynJqdRHDA}XWreW^N;p%aHzg~QVP@re`Nsw zVWoc;F~)!2BGt4lFga@)O4+&Z$W=g$eXDZG5Kpn*+Tk;&ok7&^=kFiqXWaZEKR-V^ zU@9N-I);3cfiwYOR?hpS~?DMxOVgCCD|KBOm`G5Ms-iU+n!;3&V zMMVNbN8)_PeTvedK)`o%W_C6XWTB9q5W2ffy7h;<^2+8U8Uq+>t>e2g?^ad3BzOZI=9P1UXzZ_q3$z8<|IW3eKJkb4lX<*|LEJ{d)8J~uC5n7 z2L=YBIaL2-zB0gkQ`HezoY@IP2^N>EUQ>dcQaDy$ZOXto4PF_Je1J1U47*dzIw+UM zk4Rb_uDq-J9~X1|KW!=hA^F>X--!JMkE=62_h$p{0tQT%MUF2=dPwa7p5tLcCM43IJula5dbZ46far96{d%yyy}i}!Qain}6&&3o zvGW@cOq`v|!~nALT3}!`*3?ju=8?1zd{fZhxEuuK>Re|jORs!DB5e4tg6Vt^xSX-m z;FY#JwlvW$00lM>-~G#5tf^f!h!)n5mE7z6I`t+!jguFd(*rJA3-Z8glE=9JUHZd+ z#{|TQ41*|SasKNSt|q|SaPQ-liXCrD0m2q7kk~f;_?(!Nmv;)1>Ht0A+I2;4J__Xf zBR>MY19^Rv9c}~Inro{KKv0*psBZ09>v$*Wrc-+uMe&yqyn$9k&KHOFpYX6`l-XZTcQ1EClh3U$WE&i+1ea3pAVeE-id}z|{<)jrR0v_rcGJ`ahMa1#oa2>%)$F;w9moB;r@rtwz~4n#w0x2%;4RB+|JG$`Eao_Ve# z^<*qTLwyb~;%{Oe^Lk)!*9X^0I4~h$E;`v~=zw*TDVN5P(Ynp7 znvIbtGC=TRfXHQa${f${6~2_7<9V-bhedk->unrw1|U@*r&rJwK-*%(ewo)No^BcM zOc$i6xsigt=L;NA;=o8g8fs9?Iy9P5?z<5%s^);<%!jmvWJ#}J9htKknwi@QA?@kS zBmmti*#U(OZ=Hr>cTbZF5Q(n#EUi3NQ%F^K!!ztX@M9H^2uNTDHwVa+;?%JT%{Z?C!-4Aaac1JT2Fp*EuYyUa)PdNGPoGRkR z25qFSMIlkNxukl8AClgv0y$0NVjm>20Wa_jNSYcKfad9xldemwusW)8PktkO8q6M< zGIJ!|z&#p-P#=MUN~t)3E7k)tpX~wTN#@N3s4T0EFlLrYvsB};du>%ORmE^iUz`uu zX`>vTO9SQ6A^7fdE70@p{{##)y8k7lHmzEA@_v^u5a%*Z*kL33WuS0%t!6AfCQ2y8 zb%QSrySPe42o9+cK7{GZ(0K*-Y@Z$^5LOX)+;W!Jq+l% z4r>V4&Q2J@%oWe@q$`DDQbRr+lDS=HDK9J39=J);E>001^w;Cook+2Nz zARvUR_ndBzEVfEqQ9G`{nySu52m3TJ5KTv99dh!UK`xX4iD$16)Nh4Be%=D3u}L*( zT4trY9~4l~9s`r2V7yQr&STCRC+zHiNE{&d+RSYISm}s=arr0I5=)~ zV<*zwnEKjshx5Ov6xYmo;?;yyC>4_%B#lC=j+vvzBp2y-SZG>Q$LGF&o-=s(<$Z6R zJ6vM@eV{{AXT)z^MB`czr*@db%@+zKHqRn`1K)OV)=Q}|Mg-fWHUf0RCPrk&d!m{O zHEax*%lMHl&gSN%<5dUw44|20%EJj`b-aNqD+hv;l+i3V?*jNygNpLn+Tf>FJ7X0C zqM(N$=BtHU<+fl$vQZ}d6*uHP%MrSL1(mC{@J?sV&0PWqT7876Ggdj^F}T5mO_Bhg zz-x@sk27s(fE?xnqk<>vGr|H9v({Y|ZJtfE_VorO1@&c>uA24$c}d3mcyvvusq?}T zx^7PSjK|JqVaSKF6pu_#J9MfsB*IyYoHp0d)ghO$zHe%xS~Os^jl$v1LxGcjl>VrY zzPcHERAvkzTmtVWNe0&=AlaTSQoUiK>X)%Z;8fm)v}dXW#yz!+(e4br?R_IRZ&XcI z8N)VzVTOL3glAb6Z_G+Nd^jN4RkKMfvJ0q1jIaDs9P{(cm$IlDR@)VwY?CqX_nhhO zW;TJtK1BWqS~!`p$S(5kxuVniPi9n10?N!q`>OFrI_4iYw32&GN=8m4942u)&DJ!J z3LsY-7|)`LtmaDE%$kuxVRAr;Z$DBWEMIwuZ**&6xdCZ!Y~uly;?=dfz#(JfN;BJK z)R|@a#|lMZVZFN9qyFuQIcujdk0-X<@eebGcx z=sa$HrPnpAH8rm02Z9Nm?`-0->INM8*POK)o54cs3|CWF@50zOtEltkq0ttLm7SU< z-s)r?HMNDtm35Mf=*0-=k+O`GJ60?^L(KeElF9oNrO*ZTZ4o|(s*5kVv|8jN63~O5 z@ktT&zYB%4y}SGy{Y@ol^mJORRvk9x(6{Yes&DhFwvEBYt}B8dS z%_4`6eUo%*#-+R8TrN{)1vq*a^epQnH<7$><*RE;NOqWncXRdG`Ppa2cGl0PYxLLSYg!XMTLMdF9lCz! z^BM|s4qNK2i$Qj(E`90qBkDypaK7t13GEmQo?T*lL@7WzzhN*~(x~uz#xD$WUdi}Q ziTZ}s&3&-d%cbIS?=C_UVlo4jAV5ls)wRLuQEsmWr`xK1UGlj}HUm9^H2mBIuD<}{ zvRZbl&;M}(p?hoN5LiraS{GV`g(=90a5)8q=5dDl;dS-`vMKms)CZ(ckeuB-Dx&Gy zEQ1Fhv0Ma;OF+yxTv@buw|W|Sx=N9FrQcaA)%a^5DN^n3H;vtcpG|vII$t{ez^G?1 z857Ic3^lBuqwS0STocfDI|^t_SqCBIdxPsmQ)u>G&jZRaRU;xCVzQfpLm~n?A2hw0 zt;gH(vO^i$n+jsjb{U^-!q7s`99Un!-?z>n(c@US%~`JmAaapf8T8V?@ZGFzo}z&w zo&-q39L6Lfwu@XWvVH>MtEEXFC1mOIlnSMqyK)Tq<*{uuBwf0{t1BP-WVmF^Vblw%G4-bEp37i`u zm#wUYtEi~Nut}u!{9SzN(6^0Jg45~oBD?rJaRGoc`A^mFZ2OnEoT2&Iq#HdW!9a;; zXl|}pBgs!`9XC^y2LL*S**`caU~6kDGWQn{>2cu?XS{{JZV@>tVO^|MPXXhWx_~kAbEMd(5(Ci3#k6p1pM1ISN}NWFG0z1Sfc`y!j%pnHGd4&@q3Rh zC3C6jEhAxC{GUcrQC+PF4$;JlIovQdHh#wW<3a0RH&|sA6{vFOPjKW}g`n<|MQjg= z1ZA;|%B?Kc88$sP7Y{P#+o4|;7K4wQo1XT-^!!DmjV5&-{xQ*>u1L^3Y7E7&?(=V~ z$NkfRt#)?pUEN<24*~WI7pw%4yOXT{fl~hc$`${|EgkTWF={7Y0(1(z({nEJJq>%bvV2Dck8uB#&mW~t*X93V=Dg@ zF{I@e5YS9+ib?Xg=hU9Y1E?XU)gyW(@XUdMfnWugd(*-Ste5?lE?BLtr=^(!@Y7vs z-RjDfD{_j8T!RkryL-D@^Hfd$C9-pkBTLb1?45)X1!H=m&IdPD6ujd$)%L za{%J?DQ2H2V6m0>#2k@~wo*F%*UP6h^~$d_x~K>C0ZhhT^`&ZXnXkhy92esr-fGS} zd=C@{@sE85_?SH?#}XL^NGBh0<^U+qzNiqpLw3%VO?Gbj5gExBV@9Wc^fcda$V~^U zVJ!0CmztDc=6^Jo9tGk1z4wlRF8_0ulFKpbu8W@h)&cQPLRDq6y9ETnT_Y#ov`ct3NH?ToX`LlE`)SO+i6^DWVRZHKRWA$)RH;X%4!^#gs;e^f@OO@s z8+kZ<9#Ry)`YP4u(|5bwV32kJO8EiyvvKrW>kS)o2Aj?L*qO)`u6Gsj;OFTF_cy+} zO>EEcBKixxHkO1pSrMR^ zC}whs{!d?}ss>D?AsmDYH$9oUK`{ULQ4PtH zH`3C{UB4LJmR(_AO9B%X`3HEOH}t-o(hV&NFE4%U1Pfox$Cuo5Ldqp@9UJV)`+HU~ns ztpz;jT17fLw)AQ%V*reW!&UufFGlzX={vTQy1HI_@)(u>qz@1!OzcnIZT&d@_c2nj zWqG$Q@C}&$j*<+Ji-6b)#BVwOl4S$?C%w|J7230gM}cB!i>K&q5p07ZvM~57C*YwF z9!xqF_>mvJ6_6kKFHR$VG8FgT)8`wxS|Eyo1dBjO*ka{C5i<{{&F>l-I%e(F;Qu94 zMKNH=yT!%D<>u;s2nl$X+6xHr6uS`k1}GJl`atuj9$tCDczNFO#!M|Gs}!_8XqIU? z#y#kT-+Wt<^h^}w@*ECEfT@HS0`HCsl#fuEcQgC6XPJlYNJxrH2KG|7yuvm5il!H( z)xv+&qcnRn(gLVLHb6CsloV*oKp~GyM+9fw{qUqC8El8uPVZRH)M&`$63t!FLaG7tJ-uCy#m~RKu1#$)y6UV=1-) zmfro2DYj%FEsX<}3jK|%v3&Z_7JSYr_t_xOd7g!!Ycd0_q9&@)&)_V5FC-n!jQn-G zm8LD?ikw%!k&Tl&qo;HVH#qi(I9`~GwcO$jFtCqM(*me&eGKZgpqnNKe~diT*go?W zjNTg(k!ZYw=(pc%?>laVJ_)N4R!JXKjnbm&Yp3vyqh2aeVNj|{{l}HkK~%VafZBRe z)8ryltw&D%E?0YG48rv|VqF1^wg4)z)Nvc*(pm*3(Jw=Vf2#|pFQ#^3M_;MykGxk)Kf+0|x0ReWJ#GAE%q2B`3BKuu@l%mSa?))<}5GaRk1AD3JxM{Y_?iR#9p zkn`>KB+B=%O8$*5!IWAV4@yJKlN!QIS3sPQ@YX#|m;lcaNr zCa}x#WvG|hg%MV9oCD=0w_BD>$QY$VJY0P&R(4mO$9|2UXSII!$*JAV^V;{Es&CPR zl20e+7*#;Nr@mM(YtIB!pTG;GDi6LYN2YJt!CtD|Rj40EG*WSxHh&^CBjrmPKc#^Q zmvOvo+r;n~AyZ{MP?W^`f^F3G4+eBmpo52zV@~d+Q|XP7aQMp`KIL`C-%Jie5hP_ zw1HWrr937cfjGwsphOoY%Q_DBl9>Q|a+Y~!ZfiR((i(g0=U|@*YE`H^@Od#Mfd26U zN%fe9tMzYc@)_S<2+5L$B&8EV!9SkWfa(DIC_&B6{k^L4TTPNMgKAqO*1N|bdzO;YMi6sP1jl;8mhRdFw=DMg3_MgCKV|)~-dPtDVmYpWwyJbJ zX#MH#mdfEAIV2>6#g<;?yt2p_sNT!t+#aEnRo(t0Xl#aaj5j9Ptf3eyI$&{NE>>{M zQpnKuVwp^IZ_AtOIDshEkhU|laqSxHF|;vAlatHo-7mQHY9dlQzk$viJQ{)0h@h{@ zDsGMWeIDg452nhI3KtYyg)$5$=EeaAPehsHb>v01y69*=8V-!u*mx&C;Klc1T<~q< z3niMob<0e9ddw(n;Jl<%SbJ}2W2BzEbZ8SF?UblGRlg23avG4{Ie&JBl*<>(SJm)A=kQCcI}4Fj-Odp#aa8xcdRpV7Fe)hZu3g% zgmj^)$1o`?eyvS3+LOG`^tPKC_8w%B-(z^j4uBWRa?KAjF&3(d%9&<0`B z?^;ws?lv}%49BfgP|dA%ym5Ys(Huh!GtW}hLIVyFj@JEj(80v?k;9+}{N@X69l4iQ zFCAhgDNtIaI-e3V_AG*Ro?k#KxCv<7b0I%56NAc9G( zng6hF3^aFgRxJyR@ks~f9JkzG}`m7wOmJ1TgjGSRK3^0 zUto^a`;*&%Kmbbs3L7fQ8&`jybc0*BuUI*T#RGN&zz+XAsqz2wJcNY@(nz2TkJ%!v ze1Jfyr8P-B2lUW_Cbt>TJuHx^o2KubBJjjUhh z+E(DPITZP(eG>|r2*hy6-0;EOs&1D@y{d=$xhbp)S*e<0aZmi_3RI**&hYJc0ypngWetq zK*Mt;AT$KA!Ll|UBZ?}tD5R5E0Yj>#Qt?V}#8@FC6+uMt(}(|9pN6qRWP*DE^t*ff z0*EmDFQ0pNuALbM$XK2_F_bORK-txPV=kkhG_>uTfxWzgt8K>0XyVv&jBdQ7VuASS zu8@(XEMCQ7KjJHmBOLE6M0RZ7Ukac+C7^5?BgaU`{~C8_biMn>>esHsZ1`ozIyF$? zW`$_7G|q8MXF_5d-iFC*P4XNUJHT9?Sb5EbID=zv(s*0gcB}oL7hvm~Z0(ZB0ir@t z0xUy4PkImj$0=#=*mE3DA@kX@2{O!hX&~il>Kxs;?t83;F~9R$nVv#%0y5nH#wfZ= z6dAVowt5@KtqKr7L_+c>GAJ@Vd-{zs`PHx9pJLKAi+kZ$S1S_JVvxi;mY}=G_~Xi* z9Jfva-*e?;T+K+>QhU=LcjZT2*F(F?Md|JQnICG6MYnNW0PW=>-$6<)R91-V!V*cN z@0ftDz<~DKU1u}n;YFeyB4H54?*|$?MglK&ZU=6y-Rp=5S~|?kp?@%g9YKg1IbX241{JY)*zV_e$_-xdX^BHo7$aiGTAidHb$buK~=!rIiQ58?S2A7kTu$zMiXT`GsT< zuBq$UAaR;TpLI#aYU%c^f`El0i3tgN$XslbP5|u$_PYvf>dWkoFY^F zROfG{*Yv1ldpE0hm6Mw-MpmXKvFr_yT)0g~_Bq91vm2nEaJw5aK`@yF#^>=I2n~Y( zJdj;_!k%Qw`M_?_i1FOQe=y2nt#bWpXAZsyZW%g54_ZgpPNZFO736p~39yO%**>qr znpc)(;N*iDM+!VYjJ0ed^{BT>fG8E3qg`EiU{Bp=M!_MT-ttGe& zt>Rs+%xdGt6}EHyxCjRA>bQ2klc$oL7e;Uv7K@IGIJ%rz_2jb(l9yXSrEJR4WAUnh zUhGbeA6KD+x5QDRoE_9Zkhy_99;J&Km;@fhdMRwm#wn~&P0f^J(x8Q>EXTzIkrO+T zIbOQD1cmXAjS#-7>oY{~XQe79XKl;dIXEty`whH3@RTvrJViGigm`|7gm+4Wdk%fu zX{dNR@sj0SYX^bfV;9FKfEAv54E5C_Ggs=e_?$`%$C@#*V&&|EaYUr?hWhP9Qu7Nn zbS!fs)6H(BBv5^%8LCR2aNNcb%LA77@igFry%Do>1uCKcqA{qkKfvCGG zM(LDM%dBur)+uvMxx<*jNfZg@Jnns-691ax%R?Q z)z0r-o1eV0o!9P9hI}Jigb&B$P)dsm?I!v)sE>`r$oIA%c$^t9P_Xq>*T7R93~Ce@ zIhgKVracrM=oR+8PEcO5X#IPeeG_NY$3-_Jueeu;8y#KqddhKgJwHonn%olj2#Sbp zjq5l#ntz8JDA?;xbtsL+k|`LQuQF#SoM==2XX@W&#k1=~w^U7_6d}|SGxSOHCYnFJ5)QgV2|fz|N6Kv-$q@#s@3lb7LGRYwzzF#2gL!E-fu|+R z9s(AjUSTniz{)~cnB$fl$lN0P{@5+V2O&{1ZuDS{>F8>N>-gJQe#8PsWo(`c^B6sM zCZJtP7zI43IJf~{{hU801`1&jA|KX;%OzNqcPCG`h4{h6^fzXrl_nC!W1hS{ps_Iz zh3Yx4U*G;D#LaO_0@^lY_CT&mI^Y@Zd+&(Sw?4-Os;?O*^UXYuxqp-IXoZ|qO??p` zf&~iH`6!uzcEEOS-xS^r&W1LLVA3Q&xX3j_a&CTU#Zxb%q~kEK?n7MC)RK_e;o-cR znO1aAPiS(>31N#vff_fXlarHcK$pfbNUp{V@nUdrJSqA8-tA<~`#>Np+zgt*+H8ci zsO7C9WzCvoCnP2H^ImBB2nYQr*uKyjZ}i1$8f=MyreDy6=!nriZgy=$HJ_f84u!4uCm~ZD=<*c)Ba06Z0zmu~`%26>|06(=jUSRA z_q$Nyg%Ag-TKq@Q@QE*Q7#3YON3)}C64$~h9hm|kytg=1D5{qx=k5W*x)@+-hR0_= zY-CVWQ8g!ZQlum`BPpFoVaAZJL9xxt4(TCoP=6o;B1UtEk6^X|;@Kr(E*)|C4;= ztT>Va%08zQY!D#x@b}k&837)n-m}VIJh8Xas{8PLV-C-MrZL4w`2Hj$kp;AcFlZl4 zBAv~}P~95)NubojToEWpB7}0KIm03g2O-YIL#e;FWC+x0W*>?F6<`~GU-j5O#7qAJ z3cUY9&0E1x!#`_DWFZtZJRf7K2_upsb=Dj{Dq95)17l$YKb@M8HTe@6|GCO@oZx)v zpEKUl_lZA?G(~`TAEL;A*1j>Vc(z0(KMUe&(w;lpRv*3r$)na(ZsN5{jmSA+@D~^f4*(|-vXKZ ze|DFDtJG})^h%WXtY2t;=GmOuLm>PHn%b^Iog*#-+@>_%YD??~syHD4o)kbobkUwkV(wR0%j?l>7DysG&7UHeWApIIwqg*A(^CF62t zYx8-#YT)S+YQWu#qOVs#PE7%x7y&soW$nH^&ES)vzc+AayS)uI*PJ*Q=-Kj z;GD)jixo}$N(>AYMx$q<41yK|Xc`XR3LY}59q6K;Jm3H_Z~_dp8XLH1M-VvD@Cv*$ zIOT7M`QO}<&3&tY2kGnr9;B1I1GHWhv|GyvG;z#uOo&zoLUqLavA@2qe1&nsg?OMv N44$rjF6*2UngCp`j?w@C literal 0 HcmV?d00001 diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V10_distractor_chain.png b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V10_distractor_chain.png new file mode 100644 index 0000000000000000000000000000000000000000..26add6a2251d849f22310c28e923f96caa575f70 GIT binary patch literal 22997 zcmeIa2UOE(zBe8R6X3UR?Kl{NdRb$a&Oi!=Q?Ow(=n(F17wjMfk8~dlCnHrZf zSAW=X`6AZY?o=6;yN7b?Pv4#>sy%U(|F!KY`R9iod}}s$CNbmaYna&18D+=u07Xk* zm6h#|IUjgDJ##%PN5`PK0ekvDlc>JfQZc1-E4Z33+|y>DTWk*9aeD&{w#5Lt;RD+b z?uNl`Dc^AhpZxOWfBu7`$u>>1R>hVlc2UTD9MuC8=cYGj+d4;JH!(9GQR42)g^ZTZ z#5)O>Q{Wa!k!U=B)`oE9W%DxKiLq_xS^bS|PrlrKsgQ{l@y@*@QZApguRfU>LJeaK zSeTodqq!tfhE{;5owJb2#-~W?e0*8iDs7)gtu4L$ z?oCR%FJ8;kDw{h{c%QvU*B6C7PygbKSY^>6Wh<|y)j^LG$Kd1GV~MiSgym#h-iL=e ziUO9th_xABBUl@enIHG6Aog2^^j|!d?e7zy9LxA}@0iqvdpmE(9RFYd=U(zlEx~iixB|xW{QM*Hc;13XcGTK&#hOunWA3lV z5T|D8=#MV&eSf;k4C{&Lx2G;YJ+vDcF<-?X9*$dccQAl{3O4j4) zp6qPtlUE;QtyLE8O3#=!y`ph6?om(VvXLmC{igXBr9!zOs;AX zYo9XBsfctOvI)$KtxuS4z8Xg8JN!`dkar#JX>nCmRc$xgDX{NcF19lS&&r&vQXl#u z@w&w?jhyLJx2)97;^G#$+okT3JD9WJ-xP%tA-0=l^vs2$&6<%4=;lIb6j#v z;sxlr1Vz;;H8P`^@_6{N=3AaSeN8reCWny`(0x(f277XUO1r0z1@&uf9pU;1xJny^ z-D@DVE8QF(IuX-KPcLkGEvb`-7EsV*Glad{Sbg23M8454#vqoL9pw3@`XZS3GW)#fQII*fmNDnrZZo8UH-nq96vK` z;Ig4@;&v~GJKqMVna>v*lBc~LOUGI??ojU}QF3Nqlmt4up z2Y(vh9!>w6lFW1M&af4BgZExUcZlZDXbLr>uXIs^FD6fMv_*RSC-91BM=C3etvkIM z5fRae3vHFZhg7PG2<5S8Gr1T=_Ufv4rD9=L_-rprM$tiYWi*mQ0grY0s+Ne)ibK+u zrg{$P`>@AC#wT2`wUhUA7=rb7eQcd$9hOtkMdEe@C`LMwfTgo6C;OEUmNiy9=`9V7Fl1FKz^^-w9@}v z;dS9-LhcD%s*;Rj;N_)vn5d5~=j`P_DA%|NP9a9Y1G%y=GqXF*EX!4h--oP#((2*r z(&bb>xKA6fW<&OKg)bt5>o9|!&dxut>)u1dyE1iZiVX6{>A{j3d=*ZsQuO644PT^R zgpkj{l(5xDyet}-tF$#qbQ7QdzUobi8-kTeQ$r0VyqlI-ef^%)v_2W%Y425j&M%@n zL#x%ziQ^=EA>`BbDGn;m;UN9(a14Ea*I-)`^k9F)yggg}$~o$_gvRzU^sq-^a*y`h zi)D~_Y>+iRj($#PG`jpHdi{Nzl!E4i%0&y*76hwHB~CgT36gXQ6Qs>aA&*HQY?-Nh zkbL*;<1CSoO(0yI5X?D=>?+kdy`Aa? zL=;W4sS&;$Ic8{kZ$!k%2(`L}(zC5HEQhl*d;R+`T1RrLXrYdtwG3sY9J&bHi4x|n z=k04pzL5=^db}^PYnEIZSHNDvpc}E(nUTwHTLYma=e>A}!|K5d`L)Z%!z~MDBZyB= z+MZSo)(7h;nSyn+A4Ri>8Zx5JmXqT5JPj7N9?MX+%*_VL(rA~s`eCZS}Vr=fd2p-sbHWzyxd1*ADM=zf4c(Y}b z#MYL&vgE3$!wLyE+%o*ydT*0H+$BFK+tagcf?OGG?0F7`|f^+d1r zN~OJ~RSk1Qpz7`Cp1oe_f3uA5e3_F|F#KiX9Jb-%x{IeIHMxCCS@I=v|l0|RLqn|7^v8j{6RE*Cz z;EA1I%JN+OMF#&e$9umbbDOrZ@})|pnt*SZ;gp8^g2iP-U|9C}C>?~e)&-k1MfkVu}KVl1H zJ@`$$F(%eyc$<~!O?yy4NOqx|FxOG^fIs7ntg2at1}yc;%a zXM0x5+CU1m`B<98j>WC=JE5l^FKD5#!6GHR6KRfI%8Yq;$Jse{ImY2trxKx2^Bwsn zr>L4ejz;eoU1m5AhJezuOT29eWSsce>k|5px(xNXeV~L`syyf&8q%tq(C5P(U(+^G zqd>mOC2=~Pgip-0Xi1X?^f|Xqh2`M-r$Nab(Y?ZhT2E=5&XdSnnT@@g{H3|h9ysHS z-Vl^B9c-dc{#fvp4@)bIqJP2e$ZmSiE&@Z}xve#?4pf2F<+I|)naSI?48plubJ81e+6x)*2-BSw8<0;-zByc^P9X_9K2TCH zhZ+th_g0kWmOo4}(xJStt>|mF5)#VUR;F99vZlf7dw+4jbPSU^FbE4y)58KK+JPj> zp9WvuQx{Dnqt@;!MdoaI684dPcsE@{E2nM=o1bVK(6(k>&aWVX=R2cQ^!U6zkI#J# zcbezntZ7m7!xrQs&{g8N&k+bVy#i>8u_Z&&0_Jf$pEi=oW4JeF{nN>~LWw;>f-H9LPzuy)%d~i`V{|yO^;IwAGQF5VsxAjb->!zzO{mYB)`m;p z)u?8RsmHOQn%DXi9=4sP_Dh%rt^7*QRU%!bdl|FerTkpGdz;@vNb)d-qKR!}cGNv= zH^|bT3dph<9Hk3L^7BP+vR#BHVMlL&x!uc>+O7_txe@AIB9`wyeup%tfltG zI4&y*3G1y&d+k;`@UJ*kzP0b~?SLn;9UEs$O3(A1@`+*Zqw``3KZKn{8=UAV4)ShW zjEb_sI%rM0_`lnYXD(VkWt+ZuLAJ+7azN99a<7Q&O>eY~K0Kus7Zxz!@BuDdj5DCP zR2^UWzL8? hUqn_7hH>ErwY%`fc_Pc4Zu=(@)QGlu#n*4SORI^osg7e9IZI*bo* zn~f3QulY4zNJk3`ODJ;0-M9gSNb}`E5FWUDp8CiSwaa*xwRSQb-DZazu@5VgAZ}as z+4_OJ|JLf(mgWy78}Br3t}QJ^xj$IE(30c-D_NpUqC@;*-BR1Wefw3H5$!{uZxDj} z)#&R^pL~$lKi$ZnUf5O_7Ne!q=yTx){MU}7W#n*%v6J1MpBm4sKgLA<@N4^2?$ESa zXjtL^f4$i2?UN&|_{h3J=r-Xjkwg2a`71thUzNyJm?3XiK}`SNW^jw|tG7oD48P8r zWmdhq7$P|umd#(K5r5QQjFB2pogWpce!iu){lU$6QvpN;j2ceP@aMxg&TyioF9Xc41sN zKfV_~?kN9(+7((-N38`pH4=kjd6I>cX=+@Z)Oup)>D=a&vaKGbEARJT?@2GVL9*3w zVZ>ZBwiLqdY%wQzu)F`HJZ{uwHifCJu=wO z;zRS`x)U@H&JC8yL(QfEF5O9q;1duKu*AM>C|A`-r;DV}XwYiNhA)6wz+TwphOii= zUW=34&l&|oSPeh|(xvo5K$Vc+pP~d>J!ahglyP672-M1~c9YzinwrR|$A*XkL2^B< zv(FIjI8c*TZpRpy~k40;c1^ z;P|)`)xT2#PR75_?h6dX0g%W%4A-=_H%*wZN9DfIVGqVFGswD6YI&T~REAq_gmAIL zRQ*PDcbeMVWfwtWHh;>cAf#oJjBbg-h);v}61OL`P37MHM5of_XYCdsB|;;gAz8o1 zgJi0qfR$%E0^kD^-i<>G<|YMJ2m;hpyC+D6bSl&&vER91au}7>)skfVl^T^E-GE<% zy+5iHQ<|I4a6oiNS&mH;(`_Q}C9{)Jr$^oO6D3wty1ToV0f-YP)DM{MTIO?D@goWf z3N&2wYN^`lYuz?=b!uB~R`Y6WSy*M$mV;R`qiR5_=Iqo~ViZyME);ygOU_jRHw zAiQQ$Ta+nvI&s_pOAmack=$jNKxEdu(u%-ZAxG8$$Wi_7&S{*2XJzA17&WEoc~eu< zj1q~_G>u5?kw}+9Cijps+s{Xjhbeecl`8%Bk9pGkGASGBxnA@Df16;XH>ka=7|gQW zK;y19?sCXE#KSYxaV=a-Hww>sSF}3%ay5Z`GP^<@x0RUwVOn6t3OR>N%xL#P#%EH# zr5|>ZO3}#e=IPM`U~~neg+^9)&MAaL)_LUXRLmtu&i)P*A)sm5dJCs^qi^Wzef^=G z<8@)3agA^vBnq_8BQmIBR!9J~gnnnI*@bbzLdZUDT9|Q8kHV2;-tjITKpjZ@0xsx`dfV0?r8l6SOOu^IW47e>WGydy;)4^q-3w!;LO*y0E8WO=isLg&N~)fn4O(9Dl02{fHH-Frw>r$;`B5I zD)G@$u@84-$~B=R9j{)uCwEpot1x^Nv^YnPZluFTsZ?aoH@_6~Z~pA#S%5P|v=!Df zd=t}8mH2Rnp&yNeG8PuFa~$>0*YQt6w*QGB`5%Y+&oB)}H2*q>xx^Gj$26n-D=zk` zM5~T6pIr8PDYT}7eYIB?B9Y-kaSSnqfPn8cT#S?vWK(qdXu{Cwfnc@(MA$bs5=v6>>YVl>TGi|nttxzA*X;n94>(HchQ7d{pkn(6(+ba0hS>w- z=Sz8M3zeR<64i(`uMd^?mr}(RpBypvoG&;?gBOQ~hYy&JtyEp?*lnc}<-LsQtFBh> zL|M%&EST+-zI{!X28--AN0iRw1}^8|%x#>4tHL+Hnw2FJHvRn-^$(ZYA0PZp_W7Ul zNRm20Tn`>oa;#TPHp*|Sc26r!vL8Fow8Dj3&3^o7!jkSXo{y-;47sU{1v-&3t;-ye zArN?lri2D$mdcGb`ox6uER&f%Q($r&WYcr=pTg?x+uy?~Lt-knud@@3Z*s-f3TpbS z#|KjMRnOyUYHY?llf=fzDUM|gR>j4|Z#B!VM@nwro|l!iU-05jAnL7a_7PxV!E39=DA5 zaj|>XTR@!e?~Ex2HRGSmQvQ*E|1$~r|Bf8ag$U>>L>Qw#9~64E>T@KA{L$G0S5UKM zvtwgpiwOku^YqjCpgWbWve1BX_AwXB&tVs!{z&S)l2S-uU|^nI)d0WC@V0}ncXt!$ z1cFMRq>;~$d6o`=9(I<&C@!BprGRX_`YFVI{XN8zOG`);O3LhIn=RcNYC*4o-TXa? zOwRw*KurEQM1S(>Na`ONvV~fUSwiv) z`=5VR=ZhT_Q&3UM{rScg#g?94@db58b>vS;PYVG?JPR~h^u5LnBM)zH1<%62j*V%r z1MmbS5>RS07pgpqIjNY9b^M%D;ZR45!zn2^;T?v~dn`@1f((1nsq_in+TLm3?%fyk z^z_tpbrge2n|z2wqQ~0DAqnC41#Grm7Q8669)w*;!ZiGQqG4$^{EZd)ZCJ_vpt}>Y zw*b&S=XpjIq}_8cOL+nZ~byizsHwafC`mp!Y#KeC3r*5o>5FxD1} zw~iP)Nw@Cd{~SIxHdefUZR1au3OeDO*E8+C!dpMi3ROLaqZ@VDl+W#Sg{~@3=IuStc;00kv|+w`rIXG zsA*^v4pq3x+wTGeyQC0Dwyu!96%iip85-;WwZ>v%Vzbr;W05sh?fjqBK~DcLMCp<$ zaf`mxgvaA8G4fr+u}Ly-rz`-v9ZkAYuV1A6ZpRoC;k06&Ast_A84qz%Ui#=VB;8M zWmoNGYu6MWG&?_U3fd_TKvx$xHR;U*uFwOBI?`rbzaV9RskZsq)e2WrZkq=Fv0~lS zlS6zVAZJ2H=|C~0er`8nrPqK|-o8H_s9PpT8}=FjnbprZ*oKArz-}#i9>n%D09lA0 z5M^nGSPy#(M+ek%j}YBYz28B{JqR(;D?q1_bCqXb{yM{!QFP2Jb*tYwkqI&QO-g2 zvv~1-jfe#!N|{||w;p?Rav6|+%25je*~Z{JndaMy;=!$*83W1;W`3eMLTe0AV8uyS z!WX3D`%8mIkLsaVcZ-XYm|>9tc4;JGdgZQ^#H?CxuAoUO4>uYaQ9t!?7Xy%GmuEpb z1nAWr^$-YJTDGshXnx6w5!pX>BwCq^$O=#d&iMotmEJdH#^-IXY*HT5WPuB?VT zb*p46IEtkLd~sH*C(H~YKpshVyI+*i6TD^2H!M7JRmTt~Z%7Umo}?-Y*1=T{R@jlo zg=;l|fYO!`@B5+%sc!$TmjgymiM4`?v`fru{>{wl3LWr=!!^kRK;80 z&9xq2?<4?2K81u=eqr9u;gHs@r~@J}139#m!c8i6{ddI!zitWC)w|aHVr5<|E2BDX zDD5ejdd>7!2mGRJ{K34Y5M4Ooy z*>_IH^n%WwtbOH~Zy{@NVgJ6L;G3hFUY*MsGh}UZR;1SOL#^ z*I*X0P;?hTQY2%F6FSPzxrF@@Ps%GAP9Lc;*Xc>O$jD+S_=a_UuOa^I3#cpHPXS^F z5>O^sj0y>I@$rI~W3iE{vl!-T-sBpjiVFnzkx5|Ut&3*hQYQSOfDXf4N_X+huwVC! zz;{m{3~jFu8W-zt{9tCH5Fdg?Y4a33jd==b*_$_S?w0`+&Am#AXL#sT>L-|Dkw7~m zQ-c|~RyqJW$-L_-?BQd?h#`4Wf}9LRuk1tcyZTxLz*^%3HAy50rpsCB)8ES1| z5#GA9TDHQIsJ*V`K{Ag%mF(J6%&k)z4zH-Zh%yEZ zb|9IMdyoIr_RHuILm7&go<^ZKelc|*WEGA|>qc&y)+=d(2#O7`&{1Meg&urs>I3!K z*_O)LbSE>EqR_`^Zjh{-sTX&5q7fcYe3qvuMv5oqY=FJ{Ir4h|5-d!vWo>Iex-rGI zD>PJpVtTs1prVtn<>6{^yv(8Q^w`bU5)vr5auc*nym}$kkAqTL-xGDYSV2rhf0wO? z@YJz9Vvo=+Eo`E~GpjqHprBHaVcR+%dF$k@$De;ST!o6C8zQ>YynCi`H+rUHJbzlD z#w?yH6sq!G%8+%-hA=Nov6cIrqo9t$Irap7)O(;V#_-j`!;$_Hk?=|yXF!ZJnj)LI zpG!7JWm$I~p8v>X`Z{lmVE~l}_=PJmJd`zwZ)rJ&yTVK+$SVe!es~<*ou$`sw(N_7 zedz|^9h_y+^&{=_#JQCGAe#T{6XaI{Gmkb!xz_o?rtGvdX+_UBfy}u%v}MDF!DG-% zJFBx$>yifaN#X7{l^qG5rUoa}SbKVm(xYeNi-Xw5WY9f`sW1kja=g zuhAQ@8KnZ0ac{r1GZ%UE!9_};Vt;5V19v+_ckyqA(QjFICkR-5+9@%1S#RV3A zMtS0dNV8eq9J#nL*wl`f2rsD87x|?v{kk^rij?S?6`UWHy!Hhog@HNet^rNdBzJk2 zyhUBN;C8yk_^a9l!uxu|G5y6i`310 z5a#5k=d#p|sVC4L6U3PfH3bZ6t*1AasK+k6Qk(=dNLOt3#V|}iolaW8F6kkHJ2SbK zBJzuHB!|zi2^VQWbNzgq%nZBi#*h=ltVCBh_qi%hE3PnR#Me}y;b2b+B-HcQa+>iZ zGaWMdJS6G@q3uwZ;cw&@wKUJpiwK7!z6ekl92-uwcD(81Ne!&1d@HV>Szp{%=V=A~?@lwB#|f*@MS3S-XqxDR^$HQWp|=t1T?pHW{x zQEr*F`ob;W!d$PaR#1SdCzZs+Y%OybpK#U|^ic9+{I!~Z(9zg%f7sNWYVl`#p{#XY z0q0Fjmm9CI)@Yi?Y2>wNo2Oc0q!b&p1oJIw9-w0|21L-{ohq?scAviQ4OGPpzzG-w zH6$aVhMGy`)uhiNLT)M`A_NKymIl>&6hY4j=$E$lIpD>w>Z?pq}q@!e-NdB2#} z_FVx0%bs~^HaX;wLk+hhVn2H>|5Gn-WwF+ zfkexk;t*h}4Oqm;>t~2|p6C-Wn64MlyfVb#>u18>8Y7<|fS|bmqM8{}Y{Vr8s1t}# z$VgdpSX{h;sjl;gHV#PZ8%W}ci<4$!S}H%bkQr&orT06tnNGlIb@IEF2F)}mH-nU6D3b2|N=vfgy^?=DC;h1BnsxU?lyjz{x$rL6X_M<^5Ti=fY5ryANTAs|k!>YXTfxY7MX+&K< z_1BRhCo5|LSfHI_Nyqbn$>7cA<+DLiWx2VB$tC&IZiug@mgCxHR@c@_?2`vxsLxv4 z6S-V&|1zx;^&;E^%*l$gTu1q$#V z(D^^h+VuC)gTKOIpt|~BfKYrsN5T~_yHU7P`Nk;)SBeMTNt*^F`t0PRgaKx5=bz6H>^CJ0al%Br6Nz6+3@VxX0~ z=hmL@Q$+!B#$~Fhm1fl!4~4F&vHE6H;g(AD;@|0@-zm^t8$y9F3|c3-6%{IAsr-I8 zE!ZExBa@l!+`StazRL#zys#dEUG(K1W#F|+> zYb9;lB{ksvf`I~|=kK%HCG}U)n|kRp1wwv35wrve=0W>v zgoA+fd8*`uKqzTSoJ2O(u0J*+LgZtkOQ5s!fz_222z_)IJ2_{4tVe(D@6tn8fWG8G z2)gr{4+!cH+^o)B70G7SG|`y!w#2Hm9|*3F)$aj=O8$`SUdWmGVyo*gZ;rKQ9*~w!vaEC+ zt6!PQ7WzF;2PvL(@5`>r6tTsOr^S0f%D>h2N=x;BNImX5-6wI&-EKIdKQF+I_$`7LL&0STOND&63< zW-6M0{2UCBhz4xhNI(|*7ghF+_H|hp%vp9zi?XCqz_j$Wnp(#b0;T{nnicS?v4yAS znk_K8?%sL(nyjjCxz!0`M%8*mRcFj*sm+{$_84e1hK*k zxBWgrA9{tEkEUgX&s77M0*kQ4_pFUL0RwX-jOh9Ki-6|7%!%QVW_=wmIe^!@+yel* z9`#MfV6dNoTAp;J*wQF~-kmw)3hpQ|0Ya?)l_umbXyMk&p?jJ>(Qa*Nct9eYa{|=Z zXJ(ktlKp0d3>7xhPs=wWp>KHZX+=z=a(EcB7YqtVr@p?(iS2;z&83Wtl=@g*sTlq& zTqs+s|Em;aNA6C}D(fy57|Tq@V<-&&Cwb92V4Me@H1A8o{*GiGP6Xh6!D z<4~b&AOM0{SHbqx4-QTtC+0qWJQ=Ne>C*v@K?eZM*MGHe0ZQ?(3G^xe@32^GAlb@B z!qEp{7L~vyT?xz~K>YTx(z)e!B?TFg4~!=o^EKnqpIq$=Vz7xNfQ}?Kzq--c59otL zJY>R8mbdX)^n=V;5IPdy?gWgXtx@?`j_ZjyJ?0ResLfw|Z4nT_(yW$+IeP+h@aTm) z2QwL`NIzhiUWj-1wwXz;GkdiSQxEC*u!qE0yKQ*1R(E>j%7#sbHUyN z>_s`#{DHNuV+E#8c$>5u(OU6<<}&2sGmd{D%WTgD2e9u+QZ(*_~Irch2_IC*FkfrKH) zyN;bS8OeygIXwS_l0Pr}(5vdw$zj~v=X0Sae6XV5qcHQSH4drg$`pq=mw@Ndb`kNC z{L`_lXAiY!iz=_T-d9D@iKIllf8g+RWvs>Apm&ft!Nb~kc&arbFH=cZ-Fvx5?%V`0 zVd^`|>Y{+|Q6fwV2D8Zga=WsbVHCiEbK{9gtb!i<#(lgIyo|7{G%VT+U%h!lSPZ&;D9i zSg^+>;%j=N$Lb{Op^-|+)g-V6`XLY7<4L19*+J4e^V)6@_nt=l|vfY{5TVX$5*s^)Im1F2nn#6bhNWcB>E4sB&&(k6J?T+)UQ;VD)H2&%bZRRbx zDkn3?GLGH|;tei}8Gy&46lV>WGq&O0|dlFb}wx zi{H<0ggtu!oxYy-)V?3UzL{47EYiq$+t(;wbMx>@c^0!UMKtfi2$6EIr&LaVtUu=$ z1@eztQ4r`nvcKHkdnWme7HX-W*u&NMHA*M9Sc~YLm7Hb_z&7QLUxxSlK?(A0V?}J6^fpcBg}f3hd0r@08DoSxKHIzk2)e zRPkJ0jp!S}v=Cc4-(}k0E<-rc6j&kV323h8TCG)LE*oHvRio|oZZ!j_*4`n-u6=s0 zsxai7_FOf*FjnSc*29tIA;yyklzx$whbt?0csuON80cz4ie zmS$M92X+7k=rGmZk3&+voAuUv&lvPbG}0U8;z!JVM~-;+pT->|$$gXywFEQ4A;2r| zbx-%MK91%2)DaGnBB;MK^=kx206L3m);g~A?uYD^va#_e(mmnpZ@O1!=E6cnkO8}X z!D9o=WEXUskm8|rb(k=2#jaG(?t4llkop+H7&1eRl9egh$em~CiNv-FZ>_JXBS|~3&*Hwtp{Uuxcv5mH*1S|a zg2Z0n7;sze%agDH)us(y(V=AlWhc%yeKq>~jeuPSJixvLiat2b$ZM1xsw!yimq{)Y z}g%?$PS{YWk&F9c2OFLLkpML z^s45e`$BP4)>?_Y@bPAw)q+jir1L;{HDQClfSDWt_a57{R)tf*GDUBzJSK$0JuN&I zPn0&8EKXvK?0=l4&AjO0L+V<2LX^CqL|rK^E@Wq|)TrsHne(!I1^KlIutNt4ID9;_ zBw@{#0B=`)M*`;m>*Evt%ro`O$>AOv4?}Grx%o7Gg^ ziCI}lCp_M4;Y~lZ(BOR|e6dDDy2f0ak(V|i4pZI*N~Xk9sf3{Ax5s2^Y$AC|9xu_3 zM3T0s+bNHP(^qoD``gwB6TLxbiIR|YA4!Z^D-CjubZtg|Ko%>b5-0}-bmd^Tii;rW zi?W=clfX9)r|XQTF5p)e0x2@M3-+0j&O{QI){)uOyZ$&xr?ED61A_xT4Ren3B-J3MS zi*7tA_)w;Qa!sr|-PLBzVl(W=j=r1?Z7uT1rTdfIWL)Hp^7ER$`KZCNBm8Y zzGJac`6Gy>^uTechWi8zFyI9WIrKuX;m)ZrJ@CMPNprsCxBVB;sVu$bZ5e-g$Wc_{ zo2S#waePhrlm1PPDWl%T;nK#5#x>5kNH9)}xf~<~ zdjwDg=M0)!iwrXKS~+IncwxqqPw$?Wnv$=BVDl;$doxM5=?e~CN5@{h&3ItuwwM#M z(z_Z`C@y-bI~LLBWC6@o(qJtS067=VZ5Bs7n(i7}2e`v?s2^L@{iO$N-ayj8FFly7 zJO}OLK#J1m+xxwB_0m!zQiD1NxOeVCdAWSvtS-b6$_;i6AF(UF59IGxz1_v(6OLuv z{7!r6*PVb)o}UWX1ltW#qjJo2cV;@+af9;p`_=pM*b7elNf{C?#VHF5tiuCnddcLg zQ=CWwZ@$J!8){lE>4B|Z{7^cdpPtDVVLPYABsG7o{Xq(`?p*F#3-HM?K+Jp04_A4N z)dxW;Oplh^n>>I?&xp6s;5!U9Ad07d0aMogPPzFTRkV=50=$G3wJ=z(rlbtmJnZjT zoBksc#^0Gq{=OpsU|Z>t5U9lzs(;4L+E;@LUjY-#bD&~|tWQ5A3?geji~U)*0l#Wl z`ZO(@bKz0v=czdTH#Q>KI|*=N82}<%soKj;J5}QP;Vd+1|AzHKllJcl)D&opIay(K z?B9j)s;#I2!S-jdDG=>t!>8a9T?S+bzyl600I}g38g$675iI~0_{{|Ncb?Q`3Rq;% zJL$#tKwXz+2I$!j4tHVAl}9#;4IC7o2xCo~mhQ#cRh=Ma>V!b>m-10BNeSphfSjcI zW5ld~c6=pb8Au1{8a}2ee@3WWcF@dH3G{CH@;h{ii?qz+ew3 zS3J#n9BbXe7oO{px~>@4dy*e8o=0FW$WzKL`@jwWM%Otl)i&)hf|p0h6O0VUE$D>4 z-_LlaAZdncTF3|tU^?y`Mu0^4LxB4&kLf#QSr!{7{OkBdyc&1MM~IiCzMX@Luv>4> zY}x$G^{LldoiNzZEU?CiUg|EwAK5nFExD=;x7D2k zy;L0Z-zS+(Zol&6(+$~P6CxOF2WUn~T^dZTx1U~Q_C%x(ZmW>kLCCk~Eo3$if+pf5 zbkUsYv6MpQsDNiUzca&E6h%~E==%uU?o>;_oS$o*`>I*h!i=^eq>Y=6*YlPQyLajy zcC4Dc7dX^GC~R4oIecCW_B|+|CKeQIQyExxi-h0aHl!!`e@r=6ngoi$3gTO;1Rf=6 z9p2(FNDJ6|0Ijpa^4tnje*0(Wc5~5d;0o>sz;3}<#22t?cPBa73BpuGE##TJ4+X@^O??> zRUceY3)*M?G^jL%z6mft9^aWgPLjd0J~NdCF)=X%BvP(dZe{4w^{BEo{BL+U7H8E* z0U06^NACSi$~&@KP0Fff>1SN=Na(8<_Dzim_WvJYFZNKxYR>|3ibciJa~vdg;k3Kk+owBN{n^-NA=#|Y>2TW>l_aURu*!HP zkF!>S!FIR(;c)x^2POI!RL1WPH%0cpcr*8atu?Mdj?QhS?U)V5T*q z_QX7xqZnXcv&@<4&+)%o?UVQfm?R-ESOdaxX(mT(9w3p(xz9!;`LiiTt-s}QFknC) z;~S+{Bm+|%!>a17ioy1w0Q@Z;`L^B7!Xk0W_DE3a1Bkcpqd=V`-^CJJV&>%JRAT6O zP*Z$jEf_Uj(g!)>tcHQ&e^cV#7yn}73`|Ps0dz_|Qpy^!)z9uK8>S zFekfcDFXuS|8ZFceaXMpVE&zs`;TAzgRL5V<3C2pYQ{ z_sL;z3_=Rpj0)`301nw~dHuhJ*Sv6 None: + parser = argparse.ArgumentParser(description="Analyze smoke workflow logs.") + parser.add_argument("--maze", default="V01_empty_room.json", help="Maze JSON filename used by smoke run.") + parser.add_argument("--tag", default="", help="Optional output tag suffix used at smoke run time.") + args = parser.parse_args() + + maze_stem = Path(args.maze).stem + suffix = f"_{args.tag}" if args.tag else "" + p = Path(__file__).resolve().parent / "results" / f"smoke_runner_matrix_{maze_stem}{suffix}" / "detailed_logs.json" + d = json.loads(p.read_text(encoding="utf-8")) + runs = d["runs"] + print("runs", len(runs)) + + issues: list[tuple] = [] + for r in runs: + label = r["label"] + cfg = r["config"] + queries = r["queries"] + transcript = r["transcript"] + system_prompt = r["system_prompt"] + + if r["summary"]["query_count"] != len(queries): + issues.append((label, "query_count_mismatch", r["summary"]["query_count"], len(queries))) + + if cfg["observation"] == "text_only": + if any(q["has_image"] for q in queries): + issues.append((label, "text_only_has_image")) + if any(q["user_content_type"] != "str" for q in queries): + issues.append((label, "text_only_content_type")) + else: + if any(not q["has_image"] for q in queries): + issues.append((label, "image_mode_missing_image")) + if any(q["user_content_type"] != "list" for q in queries): + issues.append((label, "image_mode_not_list")) + + has_initial = "Initial maze (fixed for this episode):" in system_prompt + if cfg["observation"] == "screenshot_only" and has_initial: + issues.append((label, "screenshot_has_initial_maze")) + if cfg["observation"] != "screenshot_only" and not has_initial: + issues.append((label, "non_screenshot_missing_initial_maze")) + + has_mechanism_list = "The environment may contain:" in system_prompt + has_rules = "RULES (domain logic):" in system_prompt + if cfg["prompting"] == "minimal" and has_mechanism_list: + issues.append((label, "minimal_has_mech_list")) + if cfg["prompting"] == "standard" and (not has_mechanism_list or has_rules): + issues.append((label, "standard_prompt_wrong")) + if cfg["prompting"] == "verbose" and not has_rules: + issues.append((label, "verbose_missing_rules")) + + if cfg["querying"] == "full_trajectory" and len(queries) != 1: + issues.append((label, "full_trajectory_query_count", len(queries))) + if cfg["querying"] == "step_by_step" and len(queries) < 2: + issues.append((label, "step_by_step_too_few_queries", len(queries))) + if cfg["querying"] == "subgoal": + if len(queries) < 2: + issues.append((label, "subgoal_too_few_queries", len(queries))) + if not any("subgoal" in t for t in transcript): + issues.append((label, "subgoal_metadata_missing")) + + if len(queries) >= 2: + second_text = queries[1]["user_text"] + has_recent = "Recent history (last 3 steps, oldest first):" in second_text + has_action_only = "Recent steps (oldest first, action only):" in second_text + if cfg["context_window"] == "current" and (has_recent or has_action_only): + issues.append((label, "current_has_history")) + if cfg["context_window"] == "last3": + if cfg["observation"] == "screenshot_only" and not has_action_only: + issues.append((label, "last3_screenshot_missing_action_history")) + if cfg["observation"] != "screenshot_only" and not has_recent: + issues.append((label, "last3_missing_history")) + + steps = [t["step"] for t in transcript] + if steps != sorted(steps): + issues.append((label, "transcript_steps_unsorted")) + + print("issues", len(issues)) + for issue in issues: + print("ISSUE", issue) + + for r in runs: + label = r["label"] + cfg = r["config"] + print( + f"{label:24} q={r['summary']['query_count']:2} " + f"steps={r['summary']['steps_used']:2} success={r['summary']['success']} " + f"obs={cfg['observation']} ctx={cfg['context_window']} qry={cfg['querying']}" + ) + + +if __name__ == "__main__": + main() diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py new file mode 100644 index 0000000..97fe4d8 --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py @@ -0,0 +1,397 @@ +from __future__ import annotations + +import json +import argparse +import base64 +import re +import sys +from dataclasses import replace +from pathlib import Path +from typing import Any + +ROOT = Path(__file__).resolve().parents[2] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) +V2_ROOT = Path(__file__).resolve().parents[3] +if str(V2_ROOT) not in sys.path: + sys.path.insert(0, str(V2_ROOT)) + +from nlu_benchmark.config import ExperimentConfig +from nlu_benchmark.env import FACING_ORDER, FACING_TO_DELTA +from nlu_benchmark.loader import load_maze +from nlu_benchmark.runner import ExperimentRunner +import nlu_benchmark.observation as observation_module +from automatic_maze_generation.mazegen.models import Door, Gate, Key, MazeInstance, Switch +from automatic_maze_generation.mazegen.solver import solve_maze + + +_POS_RE = re.compile(r"Position:\s*\((\d+),\s*(\d+)\)") +_FACING_RE = re.compile(r"Facing:\s*([A-Z]+)") +_GOAL_RE = re.compile(r"Goal:\s*\((\d+),\s*(\d+)\)") + + +_ONE_BY_ONE_PNG = base64.b64decode( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO5nVxUAAAAASUVORK5CYII=" +) + + +def _extract_user_text(content: Any) -> str: + if isinstance(content, str): + return content + if isinstance(content, list): + texts = [blk.get("text", "") for blk in content if isinstance(blk, dict) and blk.get("type") == "text"] + return "\n".join(texts) + return "" + + +def _parse_prompt_state(user_text: str): + pm = _POS_RE.search(user_text) + fm = _FACING_RE.search(user_text) + gm = _GOAL_RE.search(user_text) + if not (pm and fm and gm): + return None + pos = (int(pm.group(1)), int(pm.group(2))) + facing = fm.group(1) + goal = (int(gm.group(1)), int(gm.group(2))) + return pos, facing, goal + + +def _turn_to_face(cur: str, target: str) -> list[str]: + ci = FACING_ORDER.index(cur) + ti = FACING_ORDER.index(target) + diff = (ti - ci) % 4 + if diff == 0: + return [] + if diff == 1: + return ["TURN_RIGHT"] + if diff == 2: + return ["TURN_RIGHT", "TURN_RIGHT"] + return ["TURN_LEFT"] + + +def _plan_to_goal_from_prompt(user_text: str, budget: int = 6) -> list[str]: + parsed = _parse_prompt_state(user_text) + if parsed is None: + return ["TURN_RIGHT"] + (r, c), facing, (gr, gc) = parsed + actions: list[str] = [] + if c != gc: + target = "EAST" if gc > c else "WEST" + actions.extend(_turn_to_face(facing, target)) + actions.extend(["MOVE_FORWARD"] * min(abs(gc - c), max(1, budget - len(actions)))) + elif r != gr: + target = "SOUTH" if gr > r else "NORTH" + actions.extend(_turn_to_face(facing, target)) + actions.extend(["MOVE_FORWARD"] * min(abs(gr - r), max(1, budget - len(actions)))) + else: + actions.append("DONE") + return actions[:budget] if actions else ["DONE"] + + +def _state_to_maze_instance(st) -> MazeInstance: + def rc_to_xy(pos): + r, c = pos + return (c - 1, r - 1) + + return MazeInstance( + width=st.cols, + height=st.rows, + walls={rc_to_xy(w) for w in st.walls}, + start=rc_to_xy(st.start), + goal=rc_to_xy(st.goal), + keys=[ + Key(id=k.get("id", f"key_{i}"), position=rc_to_xy(tuple(k["position"])), color=k["color"]) + for i, k in enumerate(st.keys) + ], + doors=[ + Door( + id=d.get("id", f"door_{i}"), + position=rc_to_xy(tuple(d["position"])), + requires_key=d["requires_key"], + initial_state=d.get("initial_state", "locked"), + ) + for i, d in enumerate(st.doors) + ], + switches=[ + Switch( + id=s.get("id", f"switch_{i}"), + position=rc_to_xy(tuple(s["position"])), + controls=list(s.get("controls", [])), + switch_type=s.get("switch_type", "toggle"), + initial_state=s.get("initial_state", "off"), + ) + for i, s in enumerate(st.switches) + ], + gates=[ + Gate( + id=g.get("id", f"gate_{i}"), + position=rc_to_xy(tuple(g["position"])), + initial_state=g.get("initial_state", "closed"), + ) + for i, g in enumerate(st.gates) + ], + ) + + +def _xy_path_to_rc(path_xy) -> list[tuple[int, int]]: + return [(y + 1, x + 1) for (x, y) in path_xy] + + +def _path_to_actions(path, start_facing: str = "NORTH") -> list[str]: + if not path or len(path) < 2: + return ["DONE"] + facing = start_facing + actions: list[str] = [] + for (r, c), (nr, nc) in zip(path, path[1:]): + dr, dc = nr - r, nc - c + target = next((f for f, d in FACING_TO_DELTA.items() if d == (dr, dc)), None) + if target is None: + continue + cur_idx = FACING_ORDER.index(facing) + tgt_idx = FACING_ORDER.index(target) + diff = (tgt_idx - cur_idx) % 4 + if diff == 1: + actions.append("TURN_RIGHT") + elif diff == 2: + actions.extend(["TURN_RIGHT", "TURN_RIGHT"]) + elif diff == 3: + actions.append("TURN_LEFT") + actions.append("MOVE_FORWARD") + facing = target + actions.append("DONE") + return actions + + +def _inject_pickups(actions: list[str], env, state) -> list[str]: + out: list[str] = [] + sim_state = state + for a in actions: + has_key_here = any(tuple(k["position"]) == sim_state.agent_pos for k in sim_state.keys) + if has_key_here and a != "PICKUP": + out.append("PICKUP") + sim_state, _ = env.step("PICKUP") + out.append(a) + sim_state, _ = env.step(a) + return out + + +def _full_trajectory_actions_for_maze(maze_path: Path) -> list[str]: + env = load_maze(maze_path) + state = env.reset() + maze_inst = _state_to_maze_instance(state) + solver_result = solve_maze(maze_inst) + if not solver_result.get("is_solvable"): + return ["DONE"] + path_rc = _xy_path_to_rc(solver_result.get("path", [])) + planned = _path_to_actions(path_rc, start_facing="NORTH") + return _inject_pickups(planned, env, state) + + +class ProbeAgent: + """Deterministic test agent that records message structure on each query.""" + + def __init__(self, full_trajectory_actions: list[str]) -> None: + self.calls: list[dict[str, Any]] = [] + self._full_trajectory_actions = full_trajectory_actions + + def __call__(self, messages: list[dict]) -> str: + system_text = str(messages[0]["content"]) + user_msg = messages[-1] + user_content = user_msg.get("content") + user_text = _extract_user_text(user_content) + has_image = isinstance(user_content, list) and any( + isinstance(blk, dict) and blk.get("type") == "image_url" for blk in user_content + ) + + full_mode = "You will not be queried again" in system_text + subgoal_mode = "SUB_GOAL:" in system_text and "ACTIONS:" in system_text + + reply: str + if full_mode: + reply = ( + "SUB_GOAL: Execute maze-aware end-to-end plan.\n" + f"ACTIONS: {', '.join(self._full_trajectory_actions)}" + ) + elif subgoal_mode: + chunk = _plan_to_goal_from_prompt(user_text, budget=4) + reply = f"SUB_GOAL: Advance toward goal.\nACTIONS: {', '.join(chunk)}" + else: + step = _plan_to_goal_from_prompt(user_text, budget=1)[0] + reply = f"FINAL_OUTPUT: {step}" + + self.calls.append( + { + "system": system_text, + "user_content_type": type(user_content).__name__, + "has_image": has_image, + "user_text": user_text, + "assistant_reply": reply, + } + ) + return reply + + +def _assert(cond: bool, msg: str, errors: list[str]) -> None: + if not cond: + errors.append(msg) + + +def _run_case(base: ExperimentConfig, maze_path: Path, label: str, full_trajectory_actions: list[str], max_steps: int): + runner = ExperimentRunner.from_json(str(maze_path), config=base) + runner.env.initial.max_steps = min(runner.env.initial.max_steps, max_steps) + agent = ProbeAgent(full_trajectory_actions) + result = runner.run(agent, verbose=False) + return label, base, result, agent + + +def _suite_cases(base: ExperimentConfig, suite: str): + all_cases = [ + (replace(base, prompting="minimal"), "prompting=minimal"), + (replace(base, prompting="standard"), "prompting=standard"), + (replace(base, prompting="verbose"), "prompting=verbose"), + (replace(base, context_window="current"), "context=current"), + (replace(base, context_window="last3"), "context=last3"), + (replace(base, observation="text_only", context_window="last3"), "obs=text_only"), + (replace(base, observation="image_text", context_window="last3"), "obs=image_text"), + (replace(base, observation="screenshot_only", context_window="last3"), "obs=screenshot_only"), + (replace(base, querying="step_by_step"), "query=step_by_step"), + (replace(base, querying="subgoal"), "query=subgoal"), + (replace(base, querying="full_trajectory"), "query=full_trajectory"), + ] + if suite == "all": + return all_cases + if suite == "prompting": + return [c for c in all_cases if c[1].startswith("prompting=")] + if suite == "observation": + return [c for c in all_cases if c[1].startswith("obs=") or c[1].startswith("context=")] + if suite == "querying": + return [c for c in all_cases if c[1].startswith("query=")] + raise ValueError(f"Unknown suite: {suite}") + + +def run_smoke_suite(maze_name: str, tag: str, max_steps: int, suite: str = "all") -> tuple[Path, Path]: + maze_path = ROOT / "nlu_benchmark" / "sample mazes" / maze_name + maze_stem = Path(maze_name).stem + suffix = f"_{tag}" if tag else "" + full_trajectory_actions = _full_trajectory_actions_for_maze(maze_path) + # Smoke test already validated rendering elsewhere; use tiny static bytes for speed. + observation_module.render_maze_image_png_bytes = lambda _state: _ONE_BY_ONE_PNG + base = ExperimentConfig(prompting="minimal", observation="text_only", context_window="last3", querying="step_by_step") + selected = _suite_cases(base, suite) + outputs = [ + _run_case(cfg, maze_path, label, full_trajectory_actions, max_steps) + for cfg, label in selected + ] + errors: list[str] = [] + summary_lines: list[str] = [] + detailed_runs: list[dict[str, Any]] = [] + + for label, cfg, result, agent in outputs: + calls = len(agent.calls) + first = agent.calls[0] + summary_lines.append( + f"{label:<24} success={result['success']!s:<5} steps={result['steps_used']:<3} queries={calls:<3}" + ) + + if cfg.prompting == "minimal": + _assert("The environment may contain:" not in first["system"], f"{label}: minimal has mechanism list", errors) + if cfg.prompting == "standard": + _assert("The environment may contain:" in first["system"], f"{label}: standard missing mechanism list", errors) + _assert("RULES (domain logic):" not in first["system"], f"{label}: standard unexpectedly has verbose rules", errors) + if cfg.prompting == "verbose": + _assert("RULES (domain logic):" in first["system"], f"{label}: verbose missing rules", errors) + + if cfg.observation == "text_only": + _assert(first["user_content_type"] == "str", f"{label}: text_only should send string content", errors) + _assert(not first["has_image"], f"{label}: text_only should not include image", errors) + else: + _assert(first["user_content_type"] == "list", f"{label}: image mode should send list content", errors) + _assert(first["has_image"], f"{label}: image mode should include image block", errors) + + if cfg.observation == "screenshot_only": + _assert("Initial maze (fixed for this episode):" not in first["system"], f"{label}: screenshot_only should omit initial NL map", errors) + else: + _assert("Initial maze (fixed for this episode):" in first["system"], f"{label}: non-screenshot should include initial NL map", errors) + + if cfg.context_window == "current" and len(agent.calls) > 1: + second_text = agent.calls[1]["user_text"] + _assert("Recent history (last 3 steps" not in second_text, f"{label}: current unexpectedly includes history", errors) + _assert("Recent steps (oldest first, action only):" not in second_text, f"{label}: current unexpectedly includes action history", errors) + if cfg.context_window == "last3" and len(agent.calls) > 1: + second_text = agent.calls[1]["user_text"] + if cfg.observation == "screenshot_only": + _assert("Recent steps (oldest first, action only):" in second_text, f"{label}: last3 screenshot should include action-only history", errors) + else: + _assert("Recent history (last 3 steps, oldest first):" in second_text, f"{label}: last3 should include full history", errors) + + if cfg.querying == "full_trajectory": + _assert(calls == 1, f"{label}: full_trajectory should query once, got {calls}", errors) + if cfg.querying == "step_by_step": + _assert(calls >= 3, f"{label}: step_by_step should query repeatedly, got {calls}", errors) + if cfg.querying == "subgoal": + _assert(calls >= 2, f"{label}: subgoal should query at least twice, got {calls}", errors) + has_subgoal_meta = any("subgoal" in t for t in result["transcript"]) + _assert(has_subgoal_meta, f"{label}: transcript missing subgoal metadata", errors) + + detailed_runs.append( + { + "label": label, + "config": cfg.to_dict(), + "summary": { + "success": result["success"], + "steps_used": result["steps_used"], + "query_count": calls, + }, + "system_prompt": first["system"], + "queries": [ + { + "call_idx": i + 1, + "user_content_type": call["user_content_type"], + "has_image": call["has_image"], + "user_text": call["user_text"], + "assistant_reply": call["assistant_reply"], + } + for i, call in enumerate(agent.calls) + ], + "transcript": result["transcript"], + } + ) + + out_dir = Path(__file__).resolve().parent / "results" / f"smoke_{suite}_{maze_stem}{suffix}" + out_dir.mkdir(parents=True, exist_ok=True) + report = out_dir / "report.txt" + details_json = out_dir / "detailed_logs.json" + body = ["Runner/Prompt/Observation/Querying smoke report", ""] + summary_lines + [""] + if errors: + body.append("FAILURES:") + body.extend(f"- {e}" for e in errors) + else: + body.append("All checks passed.") + report.write_text("\n".join(body), encoding="utf-8") + details_json.write_text(json.dumps({"maze": str(maze_path), "runs": detailed_runs}, indent=2), encoding="utf-8") + + print("\n".join(summary_lines)) + print("") + if errors: + print(f"FAILED checks: {len(errors)}") + for e in errors: + print(f"- {e}") + else: + print("All checks passed.") + print(f"report={report}") + print(f"details={details_json}") + return report, details_json + + +def main() -> None: + parser = argparse.ArgumentParser(description="Smoke test prompting/context/querying/observation workflow.") + parser.add_argument("--maze", default="V01_empty_room.json", help="Maze JSON filename under sample mazes/") + parser.add_argument("--tag", default="", help="Optional output tag suffix.") + parser.add_argument("--max-steps", type=int, default=40, help="Cap per-episode steps for faster smoke runs.") + parser.add_argument("--suite", choices=["all", "prompting", "observation", "querying"], default="all") + args = parser.parse_args() + run_smoke_suite(args.maze, args.tag, args.max_steps, args.suite) + + +if __name__ == "__main__": + main() diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_smart_manual.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_smart_manual.py new file mode 100644 index 0000000..71e9c16 --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_smart_manual.py @@ -0,0 +1,164 @@ +from __future__ import annotations + +import argparse +from pathlib import Path +import sys + +ROOT = Path(__file__).resolve().parents[2] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) +V2_ROOT = Path(__file__).resolve().parents[3] +if str(V2_ROOT) not in sys.path: + sys.path.insert(0, str(V2_ROOT)) + +from nlu_benchmark.env import FACING_ORDER, FACING_TO_DELTA +from nlu_benchmark.loader import load_maze +from nlu_benchmark.renderer import render_maze_image_png_bytes +from automatic_maze_generation.mazegen.models import Door, Gate, Key, MazeInstance, Switch +from automatic_maze_generation.mazegen.solver import solve_maze + + +def _state_to_maze_instance(st) -> MazeInstance: + def rc_to_xy(pos): + r, c = pos + # NLU grids are 1-based (row, col); mazegen solver uses 0-based (x, y). + return (c - 1, r - 1) + + return MazeInstance( + width=st.cols, + height=st.rows, + walls={rc_to_xy(w) for w in st.walls}, + start=rc_to_xy(st.start), + goal=rc_to_xy(st.goal), + keys=[ + Key(id=k.get("id", f"key_{i}"), position=rc_to_xy(tuple(k["position"])), color=k["color"]) + for i, k in enumerate(st.keys) + ], + doors=[ + Door( + id=d.get("id", f"door_{i}"), + position=rc_to_xy(tuple(d["position"])), + requires_key=d["requires_key"], + initial_state=d.get("initial_state", "locked"), + ) + for i, d in enumerate(st.doors) + ], + switches=[ + Switch( + id=s.get("id", f"switch_{i}"), + position=rc_to_xy(tuple(s["position"])), + controls=list(s.get("controls", [])), + switch_type=s.get("switch_type", "toggle"), + initial_state=s.get("initial_state", "off"), + ) + for i, s in enumerate(st.switches) + ], + gates=[ + Gate( + id=g.get("id", f"gate_{i}"), + position=rc_to_xy(tuple(g["position"])), + initial_state=g.get("initial_state", "closed"), + ) + for i, g in enumerate(st.gates) + ], + ) + + +def _path_to_actions(path, start_facing: str = "NORTH") -> list[str]: + if not path or len(path) < 2: + return ["DONE"] + facing = start_facing + actions: list[str] = [] + for (r, c), (nr, nc) in zip(path, path[1:]): + dr, dc = nr - r, nc - c + target = next((f for f, d in FACING_TO_DELTA.items() if d == (dr, dc)), None) + if target is None: + continue + cur_idx = FACING_ORDER.index(facing) + tgt_idx = FACING_ORDER.index(target) + diff = (tgt_idx - cur_idx) % 4 + if diff == 1: + actions.append("TURN_RIGHT") + elif diff == 2: + actions.extend(["TURN_RIGHT", "TURN_RIGHT"]) + elif diff == 3: + actions.append("TURN_LEFT") + actions.append("MOVE_FORWARD") + facing = target + actions.append("DONE") + return actions + + +def _xy_path_to_rc(path_xy) -> list[tuple[int, int]]: + return [(y + 1, x + 1) for (x, y) in path_xy] + + +def _inject_pickups(actions: list[str], env, state) -> list[str]: + """Nlu env needs explicit PICKUP; solver assumes pickup-on-entry.""" + out: list[str] = [] + sim_state = state + for a in actions: + has_key_here = any(tuple(k["position"]) == sim_state.agent_pos for k in sim_state.keys) + if has_key_here and a != "PICKUP": + out.append("PICKUP") + sim_state, _ = env.step("PICKUP") + out.append(a) + sim_state, _ = env.step(a) + return out + + +def main() -> None: + parser = argparse.ArgumentParser(description="Run BFS-guided smoke test on an NLU sample maze.") + parser.add_argument("--maze", default="V04_single_key.json", help="Maze JSON filename under sample mazes/") + parser.add_argument("--tag", default="", help="Optional output tag suffix.") + args = parser.parse_args() + + maze_path = ROOT / "nlu_benchmark" / "sample mazes" / args.maze + maze_stem = Path(args.maze).stem + suffix = f"_{args.tag}" if args.tag else "" + out_dir = Path(__file__).resolve().parent / "results" / f"smoke_{maze_stem}_smart_manual{suffix}" + out_dir.mkdir(parents=True, exist_ok=True) + for p in out_dir.glob("*.png"): + p.unlink() + for p in out_dir.glob("*.txt"): + p.unlink() + + env_plan = load_maze(maze_path) + plan_state = env_plan.reset() + maze_inst = _state_to_maze_instance(plan_state) + solver_result = solve_maze(maze_inst) + if not solver_result.get("is_solvable"): + print("Solver reported unsolvable maze.") + return + path_rc = _xy_path_to_rc(solver_result.get("path", [])) + planned_actions = _path_to_actions(path_rc, start_facing="NORTH") + executable_actions = _inject_pickups(planned_actions, env_plan, plan_state) + + env = load_maze(maze_path) + state = env.reset() + + (out_dir / "step_000_reset.png").write_bytes(render_maze_image_png_bytes(state)) + lines = [f"000 RESET pos={state.agent_pos} facing={state.facing} inv={state.inventory}"] + + for step, action in enumerate(executable_actions, start=1): + before = state.agent_pos + state, event = env.step(action) + (out_dir / f"step_{step:03d}_{action}.png").write_bytes(render_maze_image_png_bytes(state)) + line = ( + f"{step:03d} {action:<12} {event.type:<10} from={before} " + f"to={state.agent_pos} facing={state.facing} inv={state.inventory}" + ) + print(line) + lines.append(line) + if event.type == "DONE": + break + + (out_dir / "run_log.txt").write_text("\n".join(lines), encoding="utf-8") + (out_dir / "plan.txt").write_text("\n".join(executable_actions), encoding="utf-8") + print(f"\nsuccess={state.agent_pos == state.goal}") + print(f"steps_used={state.step_count}") + print(f"out={out_dir}") + + +if __name__ == "__main__": + main() From 8591a0ea5e38a3003383ed85a503f9cf4a1a00e3 Mon Sep 17 00:00:00 2001 From: Arushi Jain Date: Fri, 1 May 2026 20:20:00 -0400 Subject: [PATCH 02/14] refactor(smoke): rename smoke_smart_manual.py to smoke_bfs.py Results directory pattern is now smoke_{maze}_bfs (was smoke_{maze}_smart_manual). Co-authored-by: Cursor --- .../smoke_tests/{smoke_smart_manual.py => smoke_bfs.py} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/{smoke_smart_manual.py => smoke_bfs.py} (97%) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_smart_manual.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_bfs.py similarity index 97% rename from src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_smart_manual.py rename to src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_bfs.py index 71e9c16..946db55 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_smart_manual.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_bfs.py @@ -108,7 +108,7 @@ def _inject_pickups(actions: list[str], env, state) -> list[str]: def main() -> None: - parser = argparse.ArgumentParser(description="Run BFS-guided smoke test on an NLU sample maze.") + parser = argparse.ArgumentParser(description="Smoke test: mazegen solver plan replayed in NLU env (PNG trace under results/smoke_*_bfs/).") parser.add_argument("--maze", default="V04_single_key.json", help="Maze JSON filename under sample mazes/") parser.add_argument("--tag", default="", help="Optional output tag suffix.") args = parser.parse_args() @@ -116,7 +116,7 @@ def main() -> None: maze_path = ROOT / "nlu_benchmark" / "sample mazes" / args.maze maze_stem = Path(args.maze).stem suffix = f"_{args.tag}" if args.tag else "" - out_dir = Path(__file__).resolve().parent / "results" / f"smoke_{maze_stem}_smart_manual{suffix}" + out_dir = Path(__file__).resolve().parent / "results" / f"smoke_{maze_stem}_bfs{suffix}" out_dir.mkdir(parents=True, exist_ok=True) for p in out_dir.glob("*.png"): p.unlink() From cc677f6e418682b94b3c3d720e03c07370a6a203 Mon Sep 17 00:00:00 2001 From: Arushi Jain Date: Fri, 1 May 2026 20:28:07 -0400 Subject: [PATCH 03/14] chore(nlu): keep only V01 and V04 sample mazes Drop other sample maze JSON/PNGs from the PR. Update run_llm and run_local_llm examples to use V01_empty_room.json. Co-authored-by: Cursor --- .../nlu_benchmark/examples/run_llm.py | 3 +- .../nlu_benchmark/examples/run_local_llm.py | 3 +- .../sample mazes/V02_winding_corridor.json | 258 ------------------ .../sample mazes/V03_multi_path.json | 255 ----------------- .../sample mazes/V05_single_switch.json | 99 ------- .../sample mazes/V06_chain_ks.json | 124 --------- .../sample mazes/V07_chain_sk.json | 124 --------- .../sample mazes/V08_chain_kk.json | 119 -------- .../sample mazes/V09_distractor_simple.json | 126 --------- .../sample mazes/V10_distractor_chain.json | 122 --------- .../pngs/V02_winding_corridor.png | Bin 17475 -> 0 bytes .../sample mazes/pngs/V03_multi_path.png | Bin 13946 -> 0 bytes .../sample mazes/pngs/V05_single_switch.png | Bin 17708 -> 0 bytes .../sample mazes/pngs/V06_chain_ks.png | Bin 19823 -> 0 bytes .../sample mazes/pngs/V07_chain_sk.png | Bin 19640 -> 0 bytes .../sample mazes/pngs/V08_chain_kk.png | Bin 21257 -> 0 bytes .../pngs/V09_distractor_simple.png | Bin 26139 -> 0 bytes .../pngs/V10_distractor_chain.png | Bin 22997 -> 0 bytes 18 files changed, 4 insertions(+), 1229 deletions(-) delete mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V02_winding_corridor.json delete mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V03_multi_path.json delete mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V05_single_switch.json delete mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V06_chain_ks.json delete mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V07_chain_sk.json delete mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V08_chain_kk.json delete mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V09_distractor_simple.json delete mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V10_distractor_chain.json delete mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V02_winding_corridor.png delete mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V03_multi_path.png delete mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V05_single_switch.png delete mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V06_chain_ks.png delete mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V07_chain_sk.png delete mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V08_chain_kk.png delete mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V09_distractor_simple.png delete mode 100644 src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V10_distractor_chain.png diff --git a/src/v2/nlu_pipeline/nlu_benchmark/examples/run_llm.py b/src/v2/nlu_pipeline/nlu_benchmark/examples/run_llm.py index 8d4ac4f..dd96ed8 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/examples/run_llm.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/examples/run_llm.py @@ -8,10 +8,11 @@ from nlu_benchmark.runner import EpisodeRunner from nlu_benchmark.agents import HuggingFaceLLMAgent, HFLLMConfig -runner = EpisodeRunner.from_json("nlu_benchmark/sample mazes/V02_winding_corridor.json") +runner = EpisodeRunner.from_json("nlu_benchmark/sample mazes/V01_empty_room.json") # Uses HFLLMConfig defaults (small Qwen on HF Router). Override model=... if needed. agent = HuggingFaceLLMAgent(config=HFLLMConfig()) result = runner.run(agent) print(result["success"]) + diff --git a/src/v2/nlu_pipeline/nlu_benchmark/examples/run_local_llm.py b/src/v2/nlu_pipeline/nlu_benchmark/examples/run_local_llm.py index 02f578c..037d528 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/examples/run_local_llm.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/examples/run_local_llm.py @@ -1,7 +1,7 @@ from nlu_benchmark.runner import EpisodeRunner from nlu_benchmark.agents import LocalTransformersAgent, LocalLLMConfig -runner = EpisodeRunner.from_json("nlu_benchmark/sample mazes/V02_winding_corridor.json") +runner = EpisodeRunner.from_json("nlu_benchmark/sample mazes/V01_empty_room.json") # Small local model (no HF inference credits required). agent = LocalTransformersAgent( @@ -13,3 +13,4 @@ result = runner.run(agent) print(result["success"]) + diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V02_winding_corridor.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V02_winding_corridor.json deleted file mode 100644 index fdd300c..0000000 --- a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V02_winding_corridor.json +++ /dev/null @@ -1,258 +0,0 @@ -{ - "task_id": "validation_10_v02_winding_corridor", - "version": "2.0", - "seed": 102, - "difficulty_tier": 1, - "description": "Single-path serpentine corridor with repeated direction changes.", - "maze": { - "dimensions": [ - 20, - 8 - ], - "walls": [ - [ - 1, - 2 - ], - [ - 1, - 6 - ], - [ - 2, - 2 - ], - [ - 2, - 4 - ], - [ - 2, - 6 - ], - [ - 3, - 2 - ], - [ - 3, - 4 - ], - [ - 3, - 6 - ], - [ - 4, - 2 - ], - [ - 4, - 4 - ], - [ - 4, - 6 - ], - [ - 5, - 2 - ], - [ - 5, - 4 - ], - [ - 5, - 6 - ], - [ - 6, - 2 - ], - [ - 6, - 4 - ], - [ - 6, - 6 - ], - [ - 7, - 2 - ], - [ - 7, - 4 - ], - [ - 7, - 6 - ], - [ - 8, - 2 - ], - [ - 8, - 4 - ], - [ - 8, - 6 - ], - [ - 9, - 2 - ], - [ - 9, - 4 - ], - [ - 9, - 6 - ], - [ - 10, - 2 - ], - [ - 10, - 4 - ], - [ - 10, - 6 - ], - [ - 11, - 2 - ], - [ - 11, - 4 - ], - [ - 11, - 6 - ], - [ - 12, - 2 - ], - [ - 12, - 4 - ], - [ - 12, - 6 - ], - [ - 13, - 2 - ], - [ - 13, - 4 - ], - [ - 13, - 6 - ], - [ - 14, - 2 - ], - [ - 14, - 4 - ], - [ - 14, - 6 - ], - [ - 15, - 2 - ], - [ - 15, - 4 - ], - [ - 15, - 6 - ], - [ - 16, - 2 - ], - [ - 16, - 4 - ], - [ - 16, - 6 - ], - [ - 17, - 2 - ], - [ - 17, - 4 - ], - [ - 17, - 6 - ], - [ - 18, - 4 - ] - ], - "start": [ - 1, - 1 - ], - "goal": [ - 18, - 6 - ] - }, - "mechanisms": { - "keys": [], - "doors": [], - "switches": [], - "gates": [], - "blocks": [], - "teleporters": [], - "hazards": [] - }, - "rules": { - "key_consumption": true, - "switch_type": "toggle", - "hidden_mechanisms": [], - "observability": "full", - "view_size": 7 - }, - "goal": { - "type": "reach_position", - "target": [ - 18, - 6 - ], - "auxiliary_conditions": [] - }, - "metadata": { - "chain_pattern": "none", - "tiling": "square", - "wall_topology": "serpentine_corridor", - "turn_count": 5 - }, - "max_steps": 220 -} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V03_multi_path.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V03_multi_path.json deleted file mode 100644 index 70f6119..0000000 --- a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V03_multi_path.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "task_id": "validation_10_v03_multi_path", - "version": "2.0", - "seed": 103, - "difficulty_tier": 1, - "description": "Three structurally distinct routes connect the start room to the goal room.", - "maze": { - "dimensions": [ - 12, - 12 - ], - "walls": [ - [ - 1, - 1 - ], - [ - 1, - 2 - ], - [ - 1, - 3 - ], - [ - 1, - 9 - ], - [ - 1, - 10 - ], - [ - 2, - 1 - ], - [ - 2, - 2 - ], - [ - 2, - 3 - ], - [ - 2, - 9 - ], - [ - 2, - 10 - ], - [ - 3, - 5 - ], - [ - 3, - 7 - ], - [ - 3, - 9 - ], - [ - 3, - 10 - ], - [ - 4, - 2 - ], - [ - 4, - 3 - ], - [ - 4, - 4 - ], - [ - 4, - 5 - ], - [ - 4, - 7 - ], - [ - 5, - 2 - ], - [ - 5, - 3 - ], - [ - 5, - 4 - ], - [ - 5, - 7 - ], - [ - 5, - 8 - ], - [ - 5, - 9 - ], - [ - 6, - 2 - ], - [ - 6, - 3 - ], - [ - 6, - 4 - ], - [ - 6, - 6 - ], - [ - 6, - 7 - ], - [ - 6, - 8 - ], - [ - 6, - 9 - ], - [ - 7, - 2 - ], - [ - 7, - 3 - ], - [ - 7, - 4 - ], - [ - 7, - 6 - ], - [ - 7, - 7 - ], - [ - 7, - 8 - ], - [ - 7, - 9 - ], - [ - 9, - 1 - ], - [ - 9, - 2 - ], - [ - 9, - 3 - ], - [ - 9, - 9 - ], - [ - 9, - 10 - ], - [ - 10, - 1 - ], - [ - 10, - 2 - ], - [ - 10, - 3 - ], - [ - 10, - 9 - ], - [ - 10, - 10 - ] - ], - "start": [ - 1, - 6 - ], - "goal": [ - 10, - 6 - ] - }, - "mechanisms": { - "keys": [], - "doors": [], - "switches": [], - "gates": [], - "blocks": [], - "teleporters": [], - "hazards": [] - }, - "rules": { - "key_consumption": true, - "switch_type": "toggle", - "hidden_mechanisms": [], - "observability": "full", - "view_size": 7 - }, - "goal": { - "type": "reach_position", - "target": [ - 10, - 6 - ], - "auxiliary_conditions": [] - }, - "metadata": { - "chain_pattern": "none", - "tiling": "square", - "wall_topology": "triple_route_maze", - "path_count": 3, - "path_lengths": [ - 11, - 15, - 19 - ] - }, - "max_steps": 140 -} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V05_single_switch.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V05_single_switch.json deleted file mode 100644 index b520383..0000000 --- a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V05_single_switch.json +++ /dev/null @@ -1,99 +0,0 @@ -{ - "task_id": "validation_10_v05_single_switch", - "version": "2.0", - "seed": 105, - "difficulty_tier": 2, - "description": "Trigger the switch in the lower vault to open the gate guarding the goal room.", - "maze": { - "dimensions": [ - 14, - 12 - ], - "walls": [ - [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10], - [2, 4], [2, 5], [2, 6], [2, 7], [2, 8], [2, 9], [2, 10], - [3, 4], [3, 5], [3, 6], [3, 7], [3, 8], [3, 9], [3, 10], - [4, 1], [4, 3], [4, 4], [4, 5], [4, 10], - [5, 4], [5, 5], [5, 10], - [6, 10], - [7, 4], [7, 5], [7, 10], - [8, 1], [8, 3], [8, 4], [8, 5], [8, 10], - [9, 4], [9, 5], [9, 6], [9, 7], [9, 8], [9, 9], [9, 10], - [10, 4], [10, 5], [10, 6], [10, 7], [10, 8], [10, 9], [10, 10], - [11, 4], [11, 5], [11, 6], [11, 7], [11, 8], [11, 9], [11, 10], - [12, 1], [12, 3], [12, 4], [12, 5], [12, 6], [12, 7], [12, 8], [12, 9], [12, 10] - ], - "start": [ - 1, - 2 - ], - "goal": [ - 12, - 2 - ] - }, - "mechanisms": { - "keys": [], - "doors": [], - "switches": [ - { - "id": "s1", - "position": [ - 5, - 8 - ], - "controls": [ - "g1" - ], - "switch_type": "toggle", - "initial_state": "off" - } - ], - "gates": [ - { - "id": "g1", - "position": [ - 9, - 2 - ], - "initial_state": "closed" - } - ], - "blocks": [], - "teleporters": [], - "hazards": [] - }, - "rules": { - "key_consumption": true, - "switch_type": "toggle", - "hidden_mechanisms": [], - "observability": "full", - "view_size": 7 - }, - "goal": { - "type": "reach_position", - "target": [ - 12, - 2 - ], - "auxiliary_conditions": [] - }, - "dependency_chain": { - "depth": 1, - "sequence": [ - { - "step": 1, - "type": "switch-gate", - "element": "s1", - "unlocks": "g1" - } - ], - "notation": "s1 -> g1 -> G" - }, - "metadata": { - "chain_pattern": "switch_gate", - "tiling": "square", - "wall_topology": "room_chain_with_switch_branch" - }, - "max_steps": 140 -} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V06_chain_ks.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V06_chain_ks.json deleted file mode 100644 index 3bb1fab..0000000 --- a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V06_chain_ks.json +++ /dev/null @@ -1,124 +0,0 @@ -{ - "task_id": "validation_10_v06_chain_ks", - "version": "2.0", - "seed": 106, - "difficulty_tier": 3, - "description": "The red key opens the upper choke; the switch in the lower crypt opens the final gate to the goal chamber.", - "maze": { - "dimensions": [ - 14, - 12 - ], - "walls": [ - [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10], - [2, 4], [2, 5], [2, 6], [2, 7], [2, 8], [2, 9], [2, 10], - [3, 4], [3, 5], [3, 6], [3, 7], [3, 8], [3, 9], [3, 10], - [4, 1], [4, 3], [4, 4], [4, 5], [4, 6], [4, 10], - [5, 4], [5, 5], [5, 6], [5, 10], - [6, 10], - [7, 4], [7, 5], [7, 6], [7, 10], - [8, 1], [8, 2], [8, 3], [8, 4], [8, 5], [8, 6], [8, 10], - [9, 1], [9, 2], [9, 3], [9, 4], [9, 5], [9, 6], [9, 10], - [10, 1], [10, 2], [10, 3], [10, 4], [10, 5], [10, 6], [10, 10], - [11, 1], [11, 2], [11, 3], [11, 4], [11, 5], [11, 6], [11, 10], - [12, 1], [12, 2], [12, 3], [12, 4], [12, 5], [12, 6], [12, 7], [12, 9], [12, 10] - ], - "start": [ - 1, - 2 - ], - "goal": [ - 12, - 8 - ] - }, - "mechanisms": { - "keys": [ - { - "id": "kR", - "position": [ - 2, - 3 - ], - "color": "red" - } - ], - "doors": [ - { - "id": "DR", - "position": [ - 5, - 2 - ], - "requires_key": "red", - "initial_state": "locked" - } - ], - "switches": [ - { - "id": "s1", - "position": [ - 6, - 8 - ], - "controls": [ - "g1" - ], - "switch_type": "toggle", - "initial_state": "off" - } - ], - "gates": [ - { - "id": "g1", - "position": [ - 11, - 8 - ], - "initial_state": "closed" - } - ], - "blocks": [], - "teleporters": [], - "hazards": [] - }, - "rules": { - "key_consumption": true, - "switch_type": "toggle", - "hidden_mechanisms": [], - "observability": "full", - "view_size": 7 - }, - "goal": { - "type": "reach_position", - "target": [ - 12, - 8 - ], - "auxiliary_conditions": [] - }, - "dependency_chain": { - "depth": 2, - "sequence": [ - { - "step": 1, - "type": "key-door", - "element": "kR", - "unlocks": "DR" - }, - { - "step": 2, - "type": "switch-gate", - "element": "s1", - "unlocks": "g1" - } - ], - "notation": "kR -> DR -> s1 -> g1 -> G" - }, - "metadata": { - "chain_pattern": "ks", - "tiling": "square", - "wall_topology": "shared_room_chain_layout" - }, - "max_steps": 180 -} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V07_chain_sk.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V07_chain_sk.json deleted file mode 100644 index 0ad9095..0000000 --- a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V07_chain_sk.json +++ /dev/null @@ -1,124 +0,0 @@ -{ - "task_id": "validation_10_v07_chain_sk", - "version": "2.0", - "seed": 107, - "difficulty_tier": 3, - "description": "The switch opens the upper choke; the red key waits in the lower crypt behind that first mechanism, and the final door guards the goal chamber.", - "maze": { - "dimensions": [ - 14, - 12 - ], - "walls": [ - [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10], - [2, 4], [2, 5], [2, 6], [2, 7], [2, 8], [2, 9], [2, 10], - [3, 4], [3, 5], [3, 6], [3, 7], [3, 8], [3, 9], [3, 10], - [4, 1], [4, 3], [4, 4], [4, 5], [4, 6], [4, 10], - [5, 4], [5, 5], [5, 6], [5, 10], - [6, 10], - [7, 4], [7, 5], [7, 6], [7, 10], - [8, 1], [8, 2], [8, 3], [8, 4], [8, 5], [8, 6], [8, 10], - [9, 1], [9, 2], [9, 3], [9, 4], [9, 5], [9, 6], [9, 10], - [10, 1], [10, 2], [10, 3], [10, 4], [10, 5], [10, 6], [10, 10], - [11, 1], [11, 2], [11, 3], [11, 4], [11, 5], [11, 6], [11, 10], - [12, 1], [12, 2], [12, 3], [12, 4], [12, 5], [12, 6], [12, 7], [12, 9], [12, 10] - ], - "start": [ - 1, - 2 - ], - "goal": [ - 12, - 8 - ] - }, - "mechanisms": { - "keys": [ - { - "id": "kR", - "position": [ - 6, - 8 - ], - "color": "red" - } - ], - "doors": [ - { - "id": "DR", - "position": [ - 11, - 8 - ], - "requires_key": "red", - "initial_state": "locked" - } - ], - "switches": [ - { - "id": "s1", - "position": [ - 2, - 3 - ], - "controls": [ - "g1" - ], - "switch_type": "toggle", - "initial_state": "off" - } - ], - "gates": [ - { - "id": "g1", - "position": [ - 5, - 2 - ], - "initial_state": "closed" - } - ], - "blocks": [], - "teleporters": [], - "hazards": [] - }, - "rules": { - "key_consumption": true, - "switch_type": "toggle", - "hidden_mechanisms": [], - "observability": "full", - "view_size": 7 - }, - "goal": { - "type": "reach_position", - "target": [ - 12, - 8 - ], - "auxiliary_conditions": [] - }, - "dependency_chain": { - "depth": 2, - "sequence": [ - { - "step": 1, - "type": "switch-gate", - "element": "s1", - "unlocks": "g1" - }, - { - "step": 2, - "type": "key-door", - "element": "kR", - "unlocks": "DR" - } - ], - "notation": "s1 -> g1 -> kR -> DR -> G" - }, - "metadata": { - "chain_pattern": "sk", - "tiling": "square", - "wall_topology": "shared_room_chain_layout" - }, - "max_steps": 180 -} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V08_chain_kk.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V08_chain_kk.json deleted file mode 100644 index 09ae2a1..0000000 --- a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V08_chain_kk.json +++ /dev/null @@ -1,119 +0,0 @@ -{ - "task_id": "validation_10_v08_chain_kk", - "version": "2.0", - "seed": 108, - "difficulty_tier": 3, - "description": "Two key-door pairs occupy the same dungeon layout: red for the upper choke, blue for the final gate room choke.", - "maze": { - "dimensions": [ - 14, - 12 - ], - "walls": [ - [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10], - [2, 4], [2, 5], [2, 6], [2, 7], [2, 8], [2, 9], [2, 10], - [3, 4], [3, 5], [3, 6], [3, 7], [3, 8], [3, 9], [3, 10], - [4, 1], [4, 3], [4, 4], [4, 5], [4, 6], [4, 10], - [5, 4], [5, 5], [5, 6], [5, 10], - [6, 10], - [7, 4], [7, 5], [7, 6], [7, 10], - [8, 1], [8, 2], [8, 3], [8, 4], [8, 5], [8, 6], [8, 10], - [9, 1], [9, 2], [9, 3], [9, 4], [9, 5], [9, 6], [9, 10], - [10, 1], [10, 2], [10, 3], [10, 4], [10, 5], [10, 6], [10, 10], - [11, 1], [11, 2], [11, 3], [11, 4], [11, 5], [11, 6], [11, 10], - [12, 1], [12, 2], [12, 3], [12, 4], [12, 5], [12, 6], [12, 7], [12, 9], [12, 10] - ], - "start": [ - 1, - 2 - ], - "goal": [ - 12, - 8 - ] - }, - "mechanisms": { - "keys": [ - { - "id": "kR", - "position": [ - 2, - 3 - ], - "color": "red" - }, - { - "id": "kB", - "position": [ - 6, - 8 - ], - "color": "blue" - } - ], - "doors": [ - { - "id": "DR", - "position": [ - 5, - 2 - ], - "requires_key": "red", - "initial_state": "locked" - }, - { - "id": "DB", - "position": [ - 11, - 8 - ], - "requires_key": "blue", - "initial_state": "locked" - } - ], - "switches": [], - "gates": [], - "blocks": [], - "teleporters": [], - "hazards": [] - }, - "rules": { - "key_consumption": true, - "switch_type": "toggle", - "hidden_mechanisms": [], - "observability": "full", - "view_size": 7 - }, - "goal": { - "type": "reach_position", - "target": [ - 12, - 8 - ], - "auxiliary_conditions": [] - }, - "dependency_chain": { - "depth": 2, - "sequence": [ - { - "step": 1, - "type": "key-door", - "element": "kR", - "unlocks": "DR" - }, - { - "step": 2, - "type": "key-door", - "element": "kB", - "unlocks": "DB" - } - ], - "notation": "kR -> DR -> kB -> DB -> G" - }, - "metadata": { - "chain_pattern": "kk", - "tiling": "square", - "wall_topology": "shared_room_chain_layout" - }, - "max_steps": 180 -} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V09_distractor_simple.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V09_distractor_simple.json deleted file mode 100644 index b2e6fc8..0000000 --- a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V09_distractor_simple.json +++ /dev/null @@ -1,126 +0,0 @@ -{ - "task_id": "validation_10_v09_distractor_simple", - "version": "2.0", - "seed": 109, - "difficulty_tier": 3, - "description": "The red key-door chain is critical, but two wrong-color keys sit in dead-end side rooms off the main dungeon route.", - "maze": { - "dimensions": [ - 16, - 12 - ], - "walls": [ - [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10], - [2, 4], [2, 5], [2, 6], [2, 7], [2, 8], [2, 9], [2, 10], - [3, 4], [3, 5], [3, 6], [3, 7], [3, 8], [3, 9], [3, 10], - [4, 1], [4, 3], [4, 4], [4, 5], [4, 10], - [5, 4], [5, 5], [5, 10], - [6, 10], - [7, 4], [7, 5], [7, 10], - [8, 1], [8, 3], [8, 4], [8, 5], - [9, 6], [9, 7], [9, 8], - [10, 6], [10, 7], [10, 8], - [11, 6], [11, 7], [11, 8], - [12, 1], [12, 3], [12, 6], [12, 7], [12, 8], [12, 9], [12, 10], - [13, 1], [13, 3], [13, 4], [13, 5], [13, 6], [13, 7], [13, 8], [13, 9], [13, 10], - [14, 1], [14, 3], [14, 4], [14, 5], [14, 6], [14, 7], [14, 8], [14, 9], [14, 10] - ], - "start": [ - 1, - 2 - ], - "goal": [ - 14, - 2 - ] - }, - "mechanisms": { - "keys": [ - { - "id": "kR", - "position": [ - 5, - 8 - ], - "color": "red" - }, - { - "id": "kY", - "position": [ - 11, - 4 - ], - "color": "yellow" - }, - { - "id": "kB", - "position": [ - 10, - 10 - ], - "color": "blue" - } - ], - "doors": [ - { - "id": "DR", - "position": [ - 9, - 2 - ], - "requires_key": "red", - "initial_state": "locked" - } - ], - "switches": [], - "gates": [], - "blocks": [], - "teleporters": [], - "hazards": [] - }, - "rules": { - "key_consumption": true, - "switch_type": "toggle", - "hidden_mechanisms": [], - "observability": "full", - "view_size": 7 - }, - "goal": { - "type": "reach_position", - "target": [ - 14, - 2 - ], - "auxiliary_conditions": [] - }, - "dependency_chain": { - "depth": 1, - "sequence": [ - { - "step": 1, - "type": "key-door", - "element": "kR", - "unlocks": "DR" - } - ], - "notation": "kR -> DR -> G" - }, - "distractors": [ - { - "type": "wrong_color_key", - "element_id": "kY", - "description": "Yellow key in an upper dead-end chamber." - }, - { - "type": "wrong_color_key", - "element_id": "kB", - "description": "Blue key in a lower dead-end chamber." - } - ], - "metadata": { - "chain_pattern": "key_door_with_dead_end_distractors", - "tiling": "square", - "wall_topology": "room_chain_with_dead_end_branches" - }, - "max_steps": 220 -} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V10_distractor_chain.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V10_distractor_chain.json deleted file mode 100644 index 88e274c..0000000 --- a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V10_distractor_chain.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "task_id": "validation_10_v10_distractor_chain", - "version": "2.0", - "seed": 110, - "difficulty_tier": 3, - "description": "The red path reaches the goal, but a green key-door chain opens a dead-end upper spur that looks like progress.", - "maze": { - "dimensions": [ - 16, - 12 - ], - "walls": [ - [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10], - [2, 4], [2, 5], [2, 6], [2, 7], [2, 8], [2, 9], [2, 10], - [3, 4], [3, 5], [3, 6], [3, 7], [3, 8], [3, 9], [3, 10], - [4, 1], [4, 3], [4, 4], [4, 5], [4, 10], - [5, 4], [5, 5], [5, 10], - [6, 10], - [7, 4], [7, 5], [7, 10], - [8, 1], [8, 3], [8, 4], [8, 5], [8, 10], - [9, 6], [9, 7], [9, 8], [9, 9], [9, 10], - [10, 6], [10, 7], [10, 8], [10, 9], [10, 10], - [11, 6], [11, 7], [11, 8], [11, 9], [11, 10], - [12, 1], [12, 6], [12, 7], [12, 8], [12, 9], [12, 10], - [13, 1], [13, 3], [13, 6], [13, 7], [13, 8], [13, 9], [13, 10], - [14, 1], [14, 3], [14, 4], [14, 5], [14, 6], [14, 7], [14, 8], [14, 9], [14, 10] - ], - "start": [ - 1, - 2 - ], - "goal": [ - 14, - 2 - ] - }, - "mechanisms": { - "keys": [ - { - "id": "kR", - "position": [ - 5, - 8 - ], - "color": "red" - }, - { - "id": "kG", - "position": [ - 11, - 4 - ], - "color": "green" - } - ], - "doors": [ - { - "id": "DR", - "position": [ - 9, - 2 - ], - "requires_key": "red", - "initial_state": "locked" - }, - { - "id": "DG", - "position": [ - 12, - 4 - ], - "requires_key": "green", - "initial_state": "locked" - } - ], - "switches": [], - "gates": [], - "blocks": [], - "teleporters": [], - "hazards": [] - }, - "rules": { - "key_consumption": true, - "switch_type": "toggle", - "hidden_mechanisms": [], - "observability": "full", - "view_size": 7 - }, - "goal": { - "type": "reach_position", - "target": [ - 14, - 2 - ], - "auxiliary_conditions": [] - }, - "dependency_chain": { - "depth": 1, - "sequence": [ - { - "step": 1, - "type": "key-door", - "element": "kR", - "unlocks": "DR" - } - ], - "notation": "kR -> DR -> G" - }, - "distractors": [ - { - "type": "distractor_chain", - "element_id": "branch_G", - "description": "Green key and green door open an upper spur that dead-ends." - } - ], - "metadata": { - "chain_pattern": "key_door_with_distractor_chain", - "tiling": "square", - "wall_topology": "room_chain_with_chain_distractor" - }, - "max_steps": 220 -} diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V02_winding_corridor.png b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V02_winding_corridor.png deleted file mode 100644 index 47c7d77c89c5173521465926d925ca211b3f6218..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 17475 zcmeHP3s{nQx_4^JDL0#@DJxUE%{Y^(HKwMebhl;J|GeXO9E|rzoQTI9 zjyrC#=WzV#Q;7IeCyy*cACE(xM8Mr#)~$8fux43QeEeyox2r4cr+2s@;v!uiLs9L( zr_4IND;Q~EVe_f^@1w|qSHQWpk#}$35|EH57tK0->SUPdO{ZJ-qes^dFF4-v$)a0p z%d2NibNP7M5sThqi)SC3_vLT4&z!NP>di+RguS!~|^7aoe+9+?-rvs0$#gDfZeIu@ZP4~vW% z8iH(E%t>h8RpIcd5^vHA5}^@=?_LBoR5KMZ;i?>)$#9=6DOTOnKk<^Cnd4b_FyF9e zSGB9bAU37wF9hQ4KO4?Nf>k`cfyXX%Fuaun(&z_}&%X`-`sy`WgPO#4HTJuDfHX%z z+|d3{v!g0dq!@QX!qb&G9Gs?xJtj#}Vk6vqd_+@EAyeCtV`&lfbdB`sI<3cwRys^R zlZcISasQCsfK$Cp^?jJwV_efyG!oFYz!>-jtnL_(u&4r?Nf6=kofOoe%Y@srv3I-^qO z6Vt22X&G!w(8ObnjAI<-@V(+&p?%d}_T6b0a%e4uNo%@cWi1O^QsEU{^=FAE(pGIt z)YKG+;Iqm=+Pgta=>fSM-*(i#a}9MdV>M56;0x{PHqC~0SdpSHgOl6WGN z**fpOuA`;!ARp=m-*{w^WX|aN<+JopK_(T*6NyB!%OVU?$ru5jj682Ed|i@Fx~^*k zO*N|5mXT5@?$v^vv35*D&E%MaX-qPu;vjHJEsngnC@Drq!|Q1RmUsk=i4GsXnf-7q zNJ^6Pmw36mqwW+4-tHqL1u!xxPwLPgmCz{P8X71&G@x-tWw$%ysj8euX|tQD$0v{@fF({9-F2iy-IRYpEfE@MsngCtB4+< zL4iS3V3;yWYkvHz-E!ptL~!7G80M3$Oj4MX0|z}Op%;O2&cmV$9y9vX73|E4_@Wc1 zPL&&Kj0Ox|i}CYuLEu!m0vTO69J)IBvKAR*iqCU$k z-Zb=;%_L3^ItZP30qq;l^7t~fu3|~)BXxi`jPErPFA=U?7*NK#`#Z*&8aWsb!h;a}?g_aQ>I=lt=08fEpWF+Yla?+MD220Oi zCk_)aJww)*N;Y_tOEhyIv|WbdM-$`LSigpbhr?-sJY9{w$vxdYA7yKNJ=b_hKzi?> z?`g@g^m)65?OqXk4UHdwgM`B%({s8eBd%n3Wz* z=CJ_oV&IrsfZGVtYMJcioo$BJFh!iaKx7J=>bFs{CfSIFmtQ+fJhIdU(O+fivTL7y z&Mznh$;7rSX`~0CwX?wZJ}uPr>eYUeueOe(Q+Lbz67(_|KrRvZ#Kg(Uf$_I~prn+^ z)=>~-iFcB#B~_39afwyCUQ-{iCIH1V;M_VmR9ZnUe$$rn#GdC5F1CwYieo?W#Q@vGLXMam~_e>?!%l!GpB}J5j=Kob|m67M84b{2e=Nm z#FOn9>eTJ4kKq@`0SAH`OffsO{Wh#x(``=jeG?ErOAM^c%uEk!MSSr_N^9ukiy(yw zVC&bxIS2o;DFjKw>uwjSMgkkAL9&s`cG-Mgn+0+*4TVu)_-c)RQ4#8D3Nkwoh zsTL>a;YeYM{n_*YlT*axi_Nr{!p4h(^9v@Ps;iRuo%vK>AnIvodX{PD zzqPK(8D1slbQvN*NFYkqr0jcDCks>ZjV3n0WR2l13&;sgnqhmutF7Y&(fR0(fDZ3+ z7djj;Q1~Er{`~oJt@H}x$Xlpb$GzBK0^)FdYhv>H<-vFSWmj`ztgNhL+QUs)oAvHw zPyI7L-)iGIxrW9{G5Hth0`w;Jxe*i@8>YV{x2K)lb)S$H3DWn<#N>nr$G-_{uE|%I ztFQA*ua~zH^YTJjY*%b=D_rPUc`Tz^g9QQ9sgD3$m*ZdregDYP_jIdFPF+*vWPn@2X0$A$O-5}UZ;ddH>oP|C^T&U1 z&X!ZBb!Ci8oULqW_qp>M1K~-Xxl}RPk+U!L7$yQn={{Ac*j=;&yU1bcwIvTBKtmSs z>e^9!B}iQg5yi#4dH??XVe-IEPx?MR63`(&mH;Y}2UiM)q(%^+5_OFa9A7GC?6&b8 z0;2JLy=g&)7SIv`z_XnZbuX($8Sl*yJ-coEw2GC>;5BF{uH(#&fYZpe(Nnaf!@*G{_xB&hTc$^7ZV0zUi!=F%qOOk?BH!a1 z--whr47PG>uj2LhZk$au2I78HfE?T>xfYEt^d6_dBSh__O@PT@+RO)qXJR7Aq)>Tu zPz+?t!^{ZdPz2mG_0-X~V#a$9WfefK>8Ya(4<+KtCc+5kD>+)rnb@UuB^g~ zp2=6%cGYc>MNNV2Cc`aZD3&Quf0OIAGe-RA@G3%DNO7_fD~>UR>!aaAS0Ks#>G}Ql z+gn4T3q}E**z8C7PNaaONd094tnP+Z3_~E|5`L^O9HZ5A`##F;HeQF|#xruCnj?#b z!%sB~;}pX#5k**+=S1 zS9t=S>hfNd+dVFjug3ubzRpl%r8s}z>ZvzqNHU#FuPs0SW%RIKw3k3g1tPdy>@+HU zeU?}`cINW6J3R(wMVVC-sUgC$W!3Pp^63Yu*ijUcd}`C%`{2PoySJ~J&X5o!Z zF7%8x-odWj*7*4c6ga zH|YeKMecTt=y^(*M+>S)!?wl|>ew)NXT^5e6%p+l=v4Rwvo*(n`q z;awHGWKUleObt2;3U@g9G6cdIePEH~FdVN{L4B%V#raX+z&e&v6ll26o06%}r|OwA zXFb1q#d`s#ne@WlUX*oHaj~N8jRoFD{ZKtIqC>4Bb?P!F-p zqc!iHit&%L|@0GZ@DZqi%`McFezt{my3$4UO|?`21k;zR4E+ zREx8C_Ea{Ida0e$DGSJV%ll@M7i=M|k(IS?JeVH;O}=P9qy!amH)#PBG0cM!MOO+L zf>1}1Z$4ZV&h_!)IW{hJX^T$NHWxVTB=!wbX-t@2d(DxI1IVU383>Q3AOpw(z0i?P zvSs1#2}0%tGtgbo*Xk@7a|VyN4OtuyMM-yr!iFBuevl206cf>WUB+7pjg~s+xgnYc zwsMnpa+bMwc8d1FUFQ`VNZuM5x9_vRH8AUJBSylg-DH4r(SJTMI^qU(E=h)d+t zMw(o4u?*K%iKU`BNWAe5Hf|H&QO}MpXiTC&hY96^2*kw$2bZ=A>bvEjR|1JhtJOJ4 z;imh&y}dB}i2?2EGw3)Mo}Vk%>f$xm>(atSjQQ6PD5$JyCvC^E8vb|(?6J2HMuJdF zTXx8?LGyz3mWrobE<4(uH%r9WE1aNe0-??^rk8N|9KFZ*GLQCwMjtfceXZ}pXi*@Y zmP=5>WD4>tA}#*zEQayXa&9G(Bc)W~Ldv+? z?Y9*ic^|hg0UBd<b#$o_^j#Frseic&o7~FDVl(r|xI4mdbZOB;3<9jrLW`C4D~v);CvPlZg78r z^20zg^z`r^UW0512>i-7fD1A=TIE`9|_I_hm zlvE3G*wvMhB0nFTKiL)upLA2BvGNRRrow|Kcn57d#5B~3ir4=DH$7L7dbWrFE1WuC zPrcdgI1M&rE{QOj?)b)*;BcU=|B7eMj%xO9XqN;)E%7;#|Ag1>aLalOi866Vv z075B+lzJ^$5F~P3+1-*3r*zk&x9QAP7h^v@fF9--?Jz{*7E^E)gil57K)FJ!#$2Z+ zc@{ot5Re}99e&G|jk*Gq_h7+h?L4H&??m^Dn6i}%8UFf!7tf5-C57*b|_Kq+}Zr(T&J5r;mVKf@Hm*p^flFjJ!5MH zLZ%_uuRkBz?loO(OxlkZ4%pM|oy;7{D|~1y7n7imxh+X@NnoKopo=RwDdIH~ss$4! zHDz+;+toUTe^bC(?z~pfrczq4%%Vb9+c5hX(K{zmB@j?{R9aW^H?U~?5ydD7SbZT> zZ$PUG`!1C^(!6ux2RN_*S33Q2`Sh4{b=EVm%|*%%61uen^RWlVP+3oPm}HhvVsh!d|KjVS^SUpGOl$g!!7L&1k^yT>9%JLN{>-Q zv%+L$zUsqH>MNh2jtJ$xf#u-O1jGdGE{7NoAPt^BTY79T7j&f3m-HsecSV)Hr$JE$ zz9e#nIy&Qk>PfA9_YGNRBpu>WN_Gl;Gh5E-4kG?m9|c-b~soVZ_>t4nCo=~vp1T{ePCAmmblwJ3pznr7MpT&^Ti zdu6v81=PC^wL*rx`RXTcniBy+`m)b}vmHJT_}_rhYyt50@Pg=0!*?kwAejCM#kfa7 zydes1gH3h5iS22oRmou0mOxn)Y7AwnLmX8kQZx$Opu_lFl3l1pMH9`Xtd_x&I{F?p z`gltBtdba~Fz;LuomcB36o z*7)2Aly#XR?*mYq7%_wYX+Q*&8Ai+GuLxvVcL_Yy(G2PtZWciYl&83~28zKhdXx6N z_cs*qqfi!t7PIx*0p_(S8Otb>SIkpuy1->&AA2+~gP>=k8JwNAvWE`(tOPMjl)sO z=XA7NiF2I$TOdrkMMr%jmj#)EWH}1rc_KeLr1-=G8pT|> ztQRe~gVTo0?Hb}WP$)BK8-WO?##82^NZGPvx;sdw;R-8gug%Gu@>%a(biY{WCb?8nu7nSTMpZQV6Wvxd{(gSnO5DTiK*ag)OtW@x z0C%O!fqI6|%#izFhy9j@?_V@SOdCR4#^+y2FY=C!$K0JNJYX5nb#~!s9$#ApcJsG< z)aAIS`(y=B=i5y1uC36d2@@5Z}EGL;yCIz+8<8nx9_(jm5GRt3LYOA+%}JO>-5vr>AGFP3$sfXJ>b2IHzZ^CsMItxGH+It&Z6^ zfT~q3DnRp3G8}F$xRH^O(HQ#lY}bt1DmS=9IL1EO-E$j>O>K#R<8Z#=T&Uo0X@>mG zU6fz(!ZW~59Mblz_$zuwzrWig1CCYT%wG?by7rKH-`apG+r4Gwl00PlVl}&`2JEBm zlC-&TmI6=w>8W)sWwCC<=?MF>W_W-K9v!_pMDucOP$=Y^qeqXLH#1zEdkcj^V*&#MpM1NawtV*AzoRZ^4&3=CM+4J4du9+xlX))EQCj;z#^41&B>2{E5N^;K1B}yWZ z=Wnt6j9F*2n1}sb%`Wo$GyoD?ec|4mq?%N+=2p!J-cptsoLCjB-EZGJy-0u zvq+z7P5tPPxbd$r-k*}{PYX8#t>8-RsT+UHxIex42NM3bIrQfk3-laphQIqgSkj*x zstc}NyGBv5^K8HOifosAAlvh7yX$jva;|3itZ!Su%h>O;W!*?m!`Tc)_&sLA2KV_N5b!J76K4tU&3+i)Eq3G)h|eZdB*VK+==g$wj^l#3|&25s@|BFiKKM1Hl0_(p>toRi%#sBL|{g77tNlov6 z+pB+u$p425l@GGvUnc}{$knibMEwa*}i`&SU;o{zvg1bFUlgnYAE~?y6NxY-M{D-{X>J7DWv@pJiEm^$v zPdXUpuf4H%I}(q*0BYMF=9!4xrluW8_-5ccT3qDWtIuxVUaW#W`?2eb^X4(J{o5`c z6Ycx2X|461o{^*9=X%ln?92YEd4ezOn7eNQIUY zMhCYOjJC%fABMgD;xGLFet65Q zasRSk3b+x@5-TIqJlLb7r+^;{c-1tw9RBPY(LBs$YLW8!AvM+2UrTeVhrb7ACS)6A zb&vC6n->GkiSLlV+fmf`BGE>Dr(^qVM7zP5$Y#UQHIhtn@{oCy2g=*CXU~pS`z4aA zF*Ei#Sg$Dhznk{7 ouvi+}$DjFsZ0JONYMKkqjI_cm2d?tb z`L6;K^z@8B*8S*55C(w%9DRtcaZKxnoa$5-S z-aff~u@4G7>jmUQE(3nGdjSl{7eRN7^z<&&AF=`daPFEv@Hp@>FgSWYZx7Pf)BDP+ z{sTR|AM*dv36FB)$J6+E{zO0Z@N2YCJ+$t-i2b}ROg?3%mcDm#{>$a2HRc$83MNA8 zP}MfuT5A|1Bd>aS{E*ET@z`HiV2@vkTxvNv!=+H#wQ4cQ7@B%!pY)fL^bMY7C(M0m z!G@OVH=gtaf{HU2%az4W{`zDc2$$9i@3Da`_RMvIFg0fr29G)CNhFfFtDgJsm?aOL znLL3@_e~q7L;W?gVoe;A&q2$_CLvfUgucipz`R|jf1Z2l39e0_{n67RP&PXjY~TFy z(p(DzJ*%&F-JSJePVDql3Bum3ldDy8Auw^)k52)+eagv8n@dVy;NAUt$1y4qGq_7jMyq;8xf)sR;Ng0f z(C=1Jb@7is{wOFgbI1>N4}y2NY7{)Wk9NTaA)C-!=sjxlNSm@aSpad&knsH%Oeqpm z<{SuDSQ#as9h>~fC-tDypu&r?;58{FVbOQ_u(ZiMug>)8vI74N#s~+n4_a`N4vdpF zub}<)_Q_ng+0RQvDOh<*!6m2gi&`N?LtJ8Lmwbv!ODi#7BVRtcxN`#0A!VuD<%RHE zYma$HkCW=qZJ(xKFystTG|VO;Az>1snL_l`5!5p~U&2c@&ojaGAHl|03(tHowLy^3 zDbovmRzdn-@pYeq{>v}Ei;q@-k3;3H>^v*5N&{ts{-Yxs zDjylldT8f8=$9=PeiH0?Y8uykrjK^Sw~I#>wvnT!wV}x~=HgV(q#v|HG;rg7;NDi< z`7C^DVqwO<*#*pN2HxoTY{S-NehydBR$((*bxI|U4d zT&&fIQ{3$>7oU1QWF*bh(EQoO;OVb@c8W3bHu}Uw={x2JOBDh=cY|XP|aKcWOU?-g{)ejGNJr@wMc=(F7eC>#stTzq$aim*xw=oL?winMjy{s0}#OV?%tk01lonV%VbFeKIBt-9eD-=nN2#cPfeLM+DCT^Y%WgD3#Oa z%xz!Ls$?RfMtuKnZj`0V`JZ?R;#Of*Sy;zZakPmYw?jTYidnd88RG8T*phP`M3=H* zT+A8EmX4!H%%?!E=@&kf!l&wUjJDzPphR3 zb*m2cPIamL4eKVvD6Z;Z^ikeMPHGmP?fl{O=qT+3NOLEqar5B%a%NTdqRO=|KE)>d zpit~{tEauVS#oj#+WWFQTi6qpGc0F@7x4ON4s9$^EtP*w>gZjQE4mGy|7kj@Gjnc) zO>S(RhMaOkG2G?8h?=$Cy&N4?L4*UirKh-?VqnRvBKHs3!;_sF%M?LyRaMgi46&&+ z9d&J3m^V6*l0i#ZP~ZCsxie+PV-|o#lqxIsO)LTx+z>*QM>jYcgr>poxl>(a1<2$t zW^6*KF8x7^-LR!@RcXV=u#>&H-z!G^F&)y?8h;M;qIqQB&&%H2P|dcHiXE>wJ%K(uI{?^kkK&DJ7#9ZmAj)sEuJ6k^@@oz}HrOS`RRoJjG8P zxPkX|pFIppfAJ8PHfaM(dtMQs-sD*7FxY%TfHSR20;mikmTSE1Th2~9Slv2*kT(2e zD0f1w!IQ(=KWp?FlE6^0K#38h#PO zL?bn<*QQ(R@iHv*;&RC4_? z7-9!CZrW7WRs*e+6|NT24bC;FJvsWWRUhSw_B67gGIM0gG_xv8#PT1eUM%&j|DXhO z{XE*_*LY+m+hvXYK^tiC*|OlcmaD^81v=* zyw3H`zJYXL(s|q@EIlLGTH#YerLMOm#l_OyG%e!w?vx;nco8pt07h6LS58=$-->_E zgIwp+YLh>9zVcBLN4zH~n^mq4f@H>U>TTl8SRiCVt&7Ofo1Z=CshweHDq0VG8)Jq1 zCcYi94K0>q5Hv0BJe1sDAuV)KCzc09Yd4IBb!gwztGlPWBm>9F@##_0tV_*qi5=pr zrq7wqF+t+g5$eT?LpIgt@Ur@Hzm$zRmzP|I#ECy?b#yWL<}MEW2JNA$^9VNojy9Hv zpr~bYeF#k5^(vS3xlPU6A_5p8X9}`}22XTrf|I2&=tOOCv)h}mtxA2X$V!2rmDRkr z62B#@$#sWwR1v#wNeX;Mw;iM%Ywvv7`%7G7`aa$#7(1zot!ynm6pdanGFdn2U?COW zWl?@k`y}5fJe(k{i14Fuu5G?PJf4G5k2CYjpvJ9fM|*8&H)f7#1acBb6S~|9vrnGj zAx~gRpGdN97v|+em%_Ol*hXwtn|5gDs7v+yRsYk2wv^6+xxt;h9S5*;R@ebaVo#y_ z!-#L;$-6(;<-I-EphdZSo1_*0>Nx1j`7f@O40L%(y6A_ytY2(czhT6|=37t#NAcO< zZbX>0LJX+fmheBZob`y76bt0#vT_ggw3jpS0eZraa6cSez^#5TU2BiU4u<70P_M2S z`(tr~-YJ5k3V)*$SlMs{L>R@nU@yzkswmpv(j69cq#E(nA1a5jdZT%f%l7)2mkkaM zdaZyV3q}>O7=ZRy0RCfLdWL(n5xc9~{5zg~wT-y7^+ovsYW9&E)dAL38*-xiCf3%} zEFo7yyAF92v<%K!3Vz`!)ly#5LuLOXX=$2UpoVPd?Q-Tz_T)OO;t3P9Jzu+fDvg1H z(u5wuupC%SMY4NnZ#$EO8ll(P6JDtRVG&SEj93sj%zQR`sGXVErVSvgyKSS}sZle$ zi9cT(vaGtbM#Lu@e6s4r%9w}ERGX^7Aj}1rc8lX`x{uT9y1F_h!3Q&U!wu73cy8!T z!P)yHX*NR*($0a*Ee-1|v8pEQs00nn3NLI|Qbp;KP6t0EP%z%4y3gq(zYx)BkMQ_> zn5s3;T{X;ruIyA!LjA9AdA`0pka|vFx;6X-&tzsZFV(tS$zXb~fs!j65@gdp*z~@z zHH@_W1QQUzszQpr8okZ?%50Mh6T6r^_;q``ABfdvS4j&};S?IS3YU(mBp6N@rxHt{ z90q#D7n|-F4JAtT(Y@7F;c=E@Rq5IVp6hevy(6Wk68fB0nE#CU?!zqQ7p~BgH8)(s z(i+%TDhu{`hs`Gbs*vEY?VODHJ|ae=OlLEp4B2t$!X0BjiKQb1S%>!?*T|Y6<$<(l zFqPL-3wXhu{h<}4=85a74iBmW>&ALmGb5UjYq0!clq)^Q(8G+N)yZj7O+j2wBZk<+>QTjJuYaOKd1}Hs>hN3pt+#T|8#YR(4OOcg26-V&Pz#@@#v7FBVtg(Jg%dEOx7- zZ9Ow)^PIDTT6#CUMxxK4L(bT$bSveYPNw#SRrzeqDG}&Qv}BI9x-S~08Y$a#z3m6q ziW*5oMl1xHHaXyRbN|E!+~*~BAGlU6tK@!4j-qQl#wUVaO->lEmV6A}IZ^l?8Lq+#!S*Bh z9&s%JDfX>z?nF%?7pLnb((&_8)=bt9dfNa7P3x&8PQjj>7vq|5CZaErS(FA)L)gL9r@eya{Z8pyd;-dNwW8z@gHcYGbH$5%JSt8~R@6OH={#Wr1JD;`3{wIK zEUpS^dx#|@u>zyP9^;fHp_}r}-2yyn?wM5+cOEbI#d-jSS}f;~(4uHjru)npF9sSA zv_bCn<8gq`Eu(d_O~sUj0WY!KVQOk>1eEs7Yqn`g+T1-l96<=lLEJz-2m!P@(H;vD z;Mjem<|!N`g%1L}2g5_VR7UFx5D(WyOmjWWkcU)&Bnd#>wX?OY$GOcmU(QD?$|Id5 z?!jbln(~Ae1$cImyhDsOCqS?aq4?Z>5Sb)#5NIvU%wi}!bMa6$sYcW6JeGKpCa1~a zmEvPp`7GS@Ydb_5{Q!J3w{3H#LRA!jAD-We=?2-R+R>E8=#`ywE`ju9BG@of9`1+X{&6RGw1x1VxGD; z0-P|d_B2oWWq_@sQ|?!CY8FDxDrRV3uJ8x| z;+hmc(u?$=Gnsyl+jVY-K3M>Rc6i6Ovu2_~r+y`A1f++7O@&T5(c5`bwvt9xUsQfp z5!prU8n@EOIpJE-SmER~4<1*SJ`qVUvFNfa1P z;^48#MFtj=R2tbk#Tp$jkj%Lu(=nWEyYS<1iqU9TMu^VE6NeX@cchm=Is|@`I>0y) zO;pzc`U9LND*haRgL~BG4=ta_71U6&2m6F1w)K-o{sY(J$7X;$6cDM#A8&YtRL=#m zs|>1L9sed1WXIK_JEaQ^ao@od zS77(Yi&J@~+<*@KYRny@3u~M`JYYKGaD?#yMo6sN6Ea>IV}-6X-3qm8o-SmP?97}) zuRH6d^JhDCapYw2-^(UXqsXuOfCOlZnHi_KY?um6uy2UW-B2oBnRh|-A~ldQSM6>B z5<)B*J$X!V!4tPv-x*pcB6Cigo12F)GKU-lkDPT!H!Q~BbSGv0}P_Z1G+P zq{aWz(DeU0Wqp6bKa)lO@hDaLw~fWgm9L*Ydo~c&us8WyW(GTWJB3J zQn(q`$r7ZhnmxQq8Wr-wq|?>awN-b?qsNZ<>F)*nrS5`V`rMhB3dwFLI+*f(W~PW@ zGDDdWm$=0JH zCvNIYm*zmW;J-|jCO}Vuv2=cpp&l`Q+YVr?{yTC19}n@rvQT*cAO0TH8%BUmhi&EK z?VayWY^d?7Ki0hq5DNKMu3YJZ-n2V&{(QBGqYi})m~Jv%Lv)kkKH@P;fp~#*^ytx> z+vH|}M|{jgY3^gz_Yfuaat_*O-{j4cJxT14rs1p`co6{heHsB?2UDLtTnieN`VA<# zAnB?>p&}d^sf*?xT{H+wPG+4?I&=1HSE|$L_F`Ot8d9e&kVM%ZoSARRAZJF*9tDEY zT;d-ZX5iO#Ea1T>mr{&eVrH}hARhfI zh!Njr)Vb6HF?ww@yK48IJ!hL+%}5t7>hqY}dBDLll;Rxr(ozP{!JKm$a2tI3^s5H# z+ncI@c+_19_wE~FL{NKrmhGBecIA2m_LnD!ZQo3p!nNc!KGI`1XO z|7%(OKRwH%8b(FQBp>{(Teg+{(eQ%El82eYsarro_EeqwUQ1GUcXw@d_49J^Q`v48 zmjIDaByNMNYcbLrQsSc02p*2*s#A5UfmK;kQ!|h?7G^$CvXN`MHa0GM^j2}e zrGm!*Hl+sC)FfJ}ljZNg3><|Q@Fw~2I(3JXI4p})M>S%Ao|3qF%^IgvR=>qtq@Qk& z!Hs3XRnSVCplt!3o}TRat;yLB9$533=i5JhL>K2d#W>|1#MmH@){CyvE6q0n=$U!OhK5M


XUA1ZzT1}2Ien+%n>Yf^RDtTN(cKc62XlbMYP{rfhQ0@d5%qO&U z5q|;eTe=J+LX3|bIg$-Apb{5Tf5+pnMcZjCH(QOK7a@V#zqhw{$fC`Rq_|Qg5n$9N zn>U}&;BHr`!b7=CV$$L0KMCKwd6N$A%5`vZ3TrpXg)ChFG=rGy9A*j>1ouDSrfwl& zDq%puD3gcOQV>)Pc~KULMYJosHRLV8b?%jydy!5J{_D2Ayob-fUoTW9w~ft8p8*AH z88a$&=Hu9t(p{#cSz7d4Rj?pc)rPEIyVf5L1VTCuIg~PtO``nJEv(7=Qv` zqHye?p@8Y`+1j;8JwqGk%)QRe5df;#7Y$bQ0H%vcT(xc6HYyR21%M_Hmxw4+d_En} zsZV|!FZw+sP@^4l#U(2hVgQ3k0x_pjca^&Y1GMwtF_&;XJ18k(B} zHf`FZRHBFNft@2n2S+HC2opkP+GZ%Na4nz%YXLc_4pm2ip)&bcLIyXZo%+sr<+=LD z{as4cAxHzrhp^xH4p5akImuU$ivY1O@JH;)ervh@EBe$%kuk$qfKY2Nx|~8ayN`?r zPfu@M=yCd+DDWR{4}1^6eG_ z?|BWs+bXFbzJ2-f<=K$SQhKV>sD(&elB3HAB(?4)?{J0UF+ub5^X?ug|3?P)&OAlx zWaf}XPJz^B-ETP^5NhEG!oQmazqd&Lt1Z&IoF0&xPh^qp$_@+=yh;~1W5UI*4W|Ji zDqLLeM&)o$6#+q#qoZR<$LGO1ClnYE09X|wLdbsf=zU#FHiFz#tK{-_ z%&ad^x7q<)B}Zqvzhdap9y4qb<89uFz`^10GUGP7YnT4f-&3b}VDIy8H@E2K)?K@I zhqjySg6asBI1Grvq_;)EtDt89g5fJidEY1K;C2mw8y_wQ^rshzWmLJL0;m{&n*%`O z1*BBgJRVOs2M(J3dibU3Z&_G$YU+@N*r@@Cy2ahoJkXmXjCg&U(}->5sB=&~&!7Jg zJ0NR*1#}QR1t357;opoeEF=!!1orv6W}WVkD2b~XTle!h?3=-0c^dCej~;Tae)KQ+ zp1^w{@b4D_e`}Z=db6^!!p@vIqa(b{W@Z)C-nZ7dMqH~Ec&p)$QBLdtjli-tJ>n~jIG>9;XvE8#B`^@`dsjlWcS>6E!m1) zlD>)OKo#2q?48hI?$tX%P>>BYRKKa+OuBsZg~&de?Msdt8XQ}Q?E`=et?H^*44N`kPkGv z*}5us`)Sl3D2ufJuN}U05J>X(y)F9zTYUi&pS}yYpa1=DI{hISemX1F>K$n1M|x1m SiNFzhUmx~9RCVz5rT+lh=bViI diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V05_single_switch.png b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V05_single_switch.png deleted file mode 100644 index 4c279e9fb536c20c56e45b2b82532aa7a307a26d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 17708 zcmeHv2UL^kx^=LO1r-qmrH(Qdq=`tCqK?9hf{664ARsNF_as=bQ8JD+X;G2hgb1M} z5e1?kMugB32oeJbgh&YiVv_qtXU>_q=iIygb7$^4cl~Qvi-jcS{n}IZv-f_veeSI3 z=8e)DArQ#sAAdM`9s*fi2Z5{!UjGevC5h><4*bv$J!Kzi1@j7xxESmSF~1lZ=no6^ z_i_I=+%q`D2L{zt(>SE2tMcvT(9pmTeRcJKzx@w2Sg^Nxvs2g`u*!zO9~?p;5UFj# zZxL_wJ8-O%89$ypZXKD;91;zE>Dp1@-GOmi|?BCbqXyR{74F)ZDRMlVn#W z_4q3JYOj;3qiCdZzpttM>GKk4#m%Z!mwt~YL)%nAVrmu zDbgCT^g@xu)UMC7s)3enb z6*KiDICOa~oj_e&Oftzc;0}<~q}*bI*xXDlzxD$2tP;zH2>g5wLxKNZq21JYrE6}& z)VLw^+WO6v(d5z+wBk>TY<8+(rBZNEu!IuW^FN~b#ZAlrn6h_vP}?JH6%D~n9UdNz zQl~`F;2pk$O;I6ikrzhXX-o<8f)DAMc;FQ8uva4F(zhe4d|U#X*OL1xCV4C*@|F8g zLm1@LM>sbRlVKM zjXOB-Y-e7cIQ08OGhPSb^O^b%mzncW0&g*RrhYt)916}cEgd)QNhYD9d-B>TH2&g2 z>Wz#?vU%9mt5;K;#UK;1B2xC%f@QQoDQ3FLRBOllJ@n@fkNT1z+xi;E4Ox0zEckst zgI$S)?^r(*RvD9;B%@<1vd`qRD>&b@s;VkaC+EEdiAJBUh^fS|a1mNVb9@c~sm0JL zXi*y@2)Kj}r{TG@xqhNiEtNaWsi<`!AmCvH_er1Y_3@G}w?DqxjfWxq8B~w=nGTl@ znk>uDXAo1ai5b;pzLBQUIVj|3x_KYNLxF40CM?pBg312=Jd{y`b70YM@ z$vmS1Odq%N_&Pc3vj(#*a}UMfmkYUwQev+0xE`rEx6hq+hdU=jn3Z8fAjTuwm0f4= zOwKtj4Aoh5r7aI`CFb^VVj?3WZ+3X3XsgA1x~9<7J>DEi9fh$MK4b(v;e~X?CWN-q z!&g2Q+L1znkdFv0^P#UF-4d&klpww$vPb!S#K-8UvHRp6cwDkCB=p&j`*XIlUP@cU zj!H;1y?OoGj1JPjv?7APJWk^Cu1lOP;!C>WaJZz^TXY{(d39zE5SSQ}QIc|HQ}8gw z03>wDR13#eEul6WvGNfk$SW)?6sB7zpJruYSW`?%t_+*%)%xomWE01h3S{i9{jbYlj?8g&XA%szVN*|+X{te~5z{1SzNbX!p;cGtOGKacI?6X3z z^t65W@ZnLI-c%XJ3rhpJ?2S#T^fsEmpTr2BZIvSN(K)w?dnzJY#7$Kqb1LCXKMa;L zKtj|q!%w-;s}d;QkJ@N49LSBtu-y6Q_%KOC`1gFt1y_H^fj%&QsET%mZ zMT&W|$lffY?T12wki?EE6M;bTFY zFd73^q6`SHdAt;>v&AdT=Uu`FJZ&gblpzn4VR%1rY=)vU97*>_ue^!CV>$y`5e8U< zluKyW76I$M7&0^C4%t$iN((as*2|+815*JTp zd0b-nSj0H8Lsa&#F=(EDc-A_D5fSoB7&Uz>r~>tFK{;M66BbW3e3zw=vr=RMs@9Y_ zTD>*TGHBq0`%|6^E7~qh#XoVh)z05d9kHDVaRZm=SwqgV(Wr^o9ipBETWE67$h0#y z(#VP4T~%arseb3H#Jf}`MLKUqG|7>;@vX*bET$F@ifcuM6Y3Cun8kCgLHiFi)P$a1 zU}x+W=g?_Wo5;ak1!gHEuPE1@XrGS(Lh+bI>Vvwgky0 zh%y_yBOqbd4ogD1Q@JqfnxS=|4J0Rym^e*YHh3G*cg^!@(ceKIQlQ}{ zxmuM`=4S$O{T_-_cQT7ikGb;a(n*cX1h>zl1D1Jc-n92TUl4CH^1O;D@8p7L+vR=4 zHf9YLQRcevNJ=#t7acmiS3T<0&x7G?vK^^b)kI=E1T@vshR>;$u?%gb@mPjO2|;Yj z7PS@QWTTad$!Lqn=Mv#{{#?uIowJ!->=I4b=LROO$I!ULn7s1x%U)|Z53X$~F)77H zbZYgzb{z*@U~sIupE$K^2Y4Q>TPSV#Mo{c)O#+44Y==ZKVfBo(C|2Lwx98XdkH?Yt zS{=Wc1H45p&0W|JZ7JEYbFV0$c{Tu`O!Hk)YiLp9Ihb_NnA~#ZlnC_XqnQZ2gMZo& zs*6{my>E>;(?I;i5*n??%{DNnJBV>*QThFIPTJE^$1xBw<|LBk~QO7bm_j?ta< z@g{ip?-Xs*H;0(QBKDayPu{(v>s2M8);L}5`(d}r(*s?4FC(w2bbgEVr!hJdt;?Fgy4(o0sWVp;TIK#Me$*(Z!Bx1P$!?F@=2&>WL_pHORw3FfwOro0H$l8|c6QeHh*5GFN+3t{P&;DvXb1b_ z4FrE4oqje!d~m1ukZa=kUS$^;jWB9$vD>5-Yn*oEBwEmI%IrHcw`biZ`?tS8wwV5+Ncr)RSGxZ_ZoE<&$&!!06WW;tc5!^38#pLJBp zq}JoHVKTbHZXL`f5z@!bQ0_gnUZccNU%%l)?W#2*QWHx@7|{pkoU7giG$|$M6b0$0 zGhLz>spLHes&<;I9MX!Yx<^7o zOeDJ-XoMXaent*RCPY+s&esS4@McY?v@T1!>d(~CYwP)o?%c~$v0L;${4^QEJN|5@ zU`qwt`a}KXTbbjEl46rj*Ke%W;W05>f|*GSF8YtPn`hceW+3xgPwXHug( ztZL#vngnmEPt*w&f9oFKy>!PW^~HHxVvce$c5_4{Wk?d|di_p(6T2K%ZkvzA_hbg*nOEXG> z&wz%f)`_U5pbCmJsv_)6R)l9*zr%Fs9&wXNrN)H#Jg?SKWA^p|d;d~N#q6rBebm+> z$ABqn2gS$F?EB5auF!G5HD%m>dfb>#H!iVkTW_HO*OxpV{X%B=oOvIdE<>}o9Xz2a zI;a)Vz!JhCUjlWY&%JQmx9#CgdB>z5Owm!zqORvU*WVsb%y_?T{8;jU;=Jl;Ej+HA z(1%13jZMzeucfb#4~hZ31x0bs8ylLlhp6a)fGE;`);4R+E;`Y4C0WP4YMX!J)=nGL zjaH5LsuTWYHyU}9820)Tg+0Wz+A=3qQ?`D&XBzhkraH>grw9A-SztJ=twD+oMj}y##uFFuJ)kW1AetcC6%@ z@9WdI8jv6?ijs-RRx+JB?M~{4$Zr-tc;zNcT5V|c&sw=A9|zqxvP#a5u#CD((It8F zZp7lxovZX}c)Pw!I3OmQwNj0)w`kyZCnsfDm}R+6G&P20(UU&L!Zvwq{NN6IK1C0q zxUAKAqqJ-HcfqJjP=(^r*Xd`PQ3fdTyGjTq+ioXZmXtFMV53 z3lH$nC|>!#*xp{n)(nNv;zWmKB!w*YDqO0O2+WF7yVELoq|owkpKyTh5UjqCzX7Cp zYt?NHPo3edtyy`2V2F6#crXaQ@xl6daqX#7_eVF|U4JSZ+UL)%b-5E6va8T}L)8-- zSqqtfa#LuY+$;g>w#kpHZ5nAT98hSROewV4iPZ1 zx&4^k2Zuv@Vdv;ERIGi=z9s-T)go|HE*-uJ7q$^AdfloH*aghc=*r1f*~&(Vx4Pxy z*H_OL5N7rVwP)Mcshn-y)~u~I^*A~+II*GaxqAqg6T4@94>85`)*X8L6kpuR&%%)1 zGufjXQmJF7vb0@Unt1a>h-rsACF6v%0!Icu@&!q;+^)|Gr)9DrOC}t2CWW*@L#7Y; zfV*L5_}&&?f)7P}!Q*(ja7M&f;XIm4?b-E19|ac3mF|ug3j9x#we;`r_2Cz5C}#1= zMmzEzR$0&h%VYnJcdLtQ@4_>OMO$gjKbYyBCD5yNZ7gwHot`FHp8amk1V@)5qCN^9E4^haAZ}HH^T9^uokZxG03mptPeN7ZHYMDxWT3HFp5+Ik} z-_$WNG0_hej15D10uXDJ0fYPk9h}ci<|DTj#%!HhXcmCkQc|A@32dft`bcV|GSo0w z%y@3h8>8OY`9dRh@mht1iK7YZJSP~gW;TOp;FZOdV!&rDH?mz75GB%XqhBcjKx z-ckgOm=WMc?lV-i-E)rH7* z`{6fA(gb2m?#_p|tUk4DZ(>U2OkTfiWp*^FQWtKTINA2Y53;Lw=3Q6#JhDz|$Vl?e zG6ghw--W#e!EFbuvK<*$Mb(N5Y}+QI;tjK``$`*rh)OxnuE=Cm#?`a{!)``;v z-`?5Sb7+^dx8gEm1E?b|K~sVr`1mn)!k_vtUb&(D@uYS~Ul#ePsE z{}7N^CLcL+h5Umz6{VY|ZEKl)cPKi4W;r<~X*@OhZsD>#t3I!RnJa%_B_z=i9o()& zUgf57FCU?HXZ(eu!8+om8yWHnid;)<22=rM{*(y_mGJrxKc5-&U45*OeX^aoRK5zL za?O7Fz}3swTh}K!7`2Ez&`caz1!=s#WQKROS2Z~l?*ysc4_T@z?1Kkv*RP9XZm5Gy zTW1e`cb4+*j{SlCl8uh4Rk{|XgNAE6eT8vl2=m3bA1}5)RwC2t@d;qwM(N_DkPEa( zdJ=g(RORW8gu^-T#Q{bgJ=y7QZOaKLTd6*CM}i%oQBHOA?t@5Tq5IQUAG@EOtpX^Z z@6V(bkpvBnjsdKSgSMZbY<$})^Pz)A8Mo%;`^Jr2%{79lz5yOKp%7b?_!lAI$;<6P z4^UbU1QEz%KqW&UKV6Lzg+PvNT%{&_L*?U<8#0oRG0!cKl7va&?|hgmvUJ2b48RvFk>}zO%0#V&!@IP^ZCb)b@rGUPpK`Mv)>UL zP`vY<3z(TpG;2)iA$|S9#oB@cK$!UbLI2CM`Rk2;ctHQxZiyj~L%L?#Uc7jb4|r9K z>gh7InoKR$b5V7bKERq*8}7RwFi@SJ9r8x6bSTl~D%=XUP&MO0u-4?29Uagi`{%C= zJbQM`iBjRzZk+s}mqKYw$?7i$?WxEiVGni7=FO_4)jv~#%rcEuozA*<@1Cc>zkifM z%6HnZVdsK^f}S%Vz|=fw!UbbE=By4!30B8o&Eq(k_q7B9Qc0)EY1gh@mKg1TL2s*% z#;4u<+9tH}7`09?<*B|(EVA|q%%o0ER@NVzFk?IVec|kE5c@n>tD|3s4B~hgj;X99 zajPX0lateIHq<>izlVxvR$~uFO5{e24lpQo2&Zxgq@QVj2`F!WL{|F~6}rEc`U5g& zv@{6mp`@EPZ=SKRu<&+Dv3K#+@*5UPo`;VdvDRi{iWd~f1b>wIzP!G~Gzt_~E*n7& zV&DK;@zwE8AJ74SslBJC$5+QQ{Vj#!L^i|yQW3SfNevOSM2_I_vH4MW8;E-!Vng+s z*>77{n}qD!x38znT)ZC-XJEr>B+DEyZTx}fcs)Zymo_4iUA51DxXA(M_}Hu5N>j^e z!^-{4a%HYpr*4z;Z8e1v3ZrG; zzpn(;fYPB9mlH4;%trG|MQr@FB5Do`Iv|}1oLxYg&6k^M-_~+%yV%Z7psSq2NESOx z^_1Z<%`Gg_0sE?5JIwnMx55lKv1tA^15$n;wSTYDUydJ2la-NCzIN@J@c7S#{Po4S z=wBd+Fi=_efxf=|IyDzUL!UnF(D<*Q%>T7c`4wRObyoi^q&Z=c*LC@6fT%Hl;jU(YHDa$?%Th=qrX4nXDy%ku};skU22u7f0>Md}+pXJpU{ykV2yogg6Cw z9<1MogJN2fRo$^uTKXUy4*z9who*BB9kO42q?#YA{xx@#mJ@WvAx3K+QLE*;+EGVe z-*J$!^UrtZ{8du_50%tY#lOJuNPHF#xFAPjgalU_9IO2%k?Ss0rlK@#}ZW`l_I z9pm}?HGT86j|75vu)jJb%yYx&sr|o%ac_l-Q*WhP00oEZ zee(w$WWpG|IOMAX0COt&@{#u5awn^F_=_8x!3^PDlOCUf%oZVd%gnq@-n}Mri`V9I}OCJ@YOUvH@%b7dJ;2Q#w zn~x{C)tcAA-+q7g08u`IRt-gkF;-@04==DQ(-nAu)~#1OcZ1RWt%e(JvV8!p@ae(2 zRI%74#iP#S>u7}9%<4^VeZH&|0+{1B{MZ*r+BqoxV?nh*WSxIVPK+G>)Zdoi7>&Lh zKzkgVnB~hzg%}|00sYJM7620Ab9X-a04aKLS3V3453h?IO+MOA zBh1S&p7-G$$ybskfH85R2);Ok&E{nGrNR&#;|`)R?|$%1shbsobCF;d9UbjeIger? z-R|e++D1Ld6Rw@s+7LF?-bH|$>G2ZBQxXL4uQAkJ*#gsm!h(W!qze+)zFN-nt`fNG zV?U!1kIgL1Q?2B6?0aGTPdYsTeMueT05gpf<4W>2;TdGiPE{yN@j7MN{oZgBf(3Mi zb3zN4C%wSjbGl)weZMi{<1{;2H~7NM?sXz6sem#$MjQeRE@KYKjb+#1aEp#^uIvU; zYq<$s5W8{7%P%z1Rx4u0P0nGh=^Za42u{$ZS!8>s&X9F)nS;}oddg7ytXS*?Jvu7a z4l)1a#os=8+iG=A=$sN+>e*?`ox?&>=1<2APlV=2$g|0ThA_X(IqwGbV=-UyrXP-y zwrB2xgeEGrg9AUlutu1Mn2RrN1XBXAsnZ*;dBC{ z;bC3fx*r{eKO^~G!R^78UslAHe&|LtEL{@>Q&f9Ih7r$m7jppJxt(iw#* zqs22{$DQAdQzl@XV)Na;xm=FkylK-@8@Py8AI@kOCcA#oA!8w*XxVfSGUP}r)yAUdT${f%KU_nMsPL&zF=?feD z$CCMX#PVND{kL<^|L(nd+kN?nG!trIVBieesXpaV%jEge`$Dz{3j>bY6ky;YL)+CJ zf`(mkKf1;i6FI~AOf#sw|CMTT)pQ!ydVoV{A3F55EiJ!H`OH@Jp4qsr?Ay0*9|nc7 zt*tGk?8ht}m>Q_2T_nc`=$bxKL1AIX(o!8Lj%nqns7`-gy&1rf^&h>N0!h?;FiWLIU!vKh$1_6rbUBxTIAtc}T;)`87T0~DEU26|{5t`VC?o(-AH6$4 z7)*Iv>TnC9Ri#Lt<)yUTNWLA0@fclWr^)hmqW|Z()&C`KtHYUt)d*hnUcZNcMDP=K zeaZLXv9V1_y#Eh$K#?%7TpD0) zSrg68&BC<0c=>YCM(lqgD?n&ddxRY8|15BS9u`{kwV_mdIP1m~wsA8A@}w^8XuWKB zloJ+Oj&ESBFrh$bQ#t!=Z~o3*<*PqXSsK0#njLUneC@!Ki{IA+Yg*QzJWwv~#+^AW zn)Fkx=DUXMK3D_*h8Ij%sVTKyo`@9*o}o!qQ~TU5XL(@(b{X40^7A`tF?kdkU zFDWnrypW>CVN-^{)++mKn=hl;?|P&Vtp97iNat7*_Dem33)c>bMc(wI)ere{Q5ZV4E!Mz*SsarFP`w zI^(6|QPUm@P8GPt!2mvDzEj7}EhuUY%?((AJ;6{q3B(>{vuE-9T7FqhfK z!$nG@nnu<#gu6aRzWkh5H);nV~W5SSGGz-Ds%gI9d0bsuU)v!EBIr6dfMH)cYWg-;1QLA%?rC3MR~~w z!FDPWz1bECGSInguf&O}_SXe@M7j7! z#+sI~O7_a(RUepG8ooOqo%{C+g0nUK;rG8wK^*ht*w1zWJD<29Rflh{GaGzugq0<{)LbKHzSW zv2y6x@73h#sVN`=18>b=a>Adx&H3L>v;0$q7Bl7&`97WjjFT?4#P2YwPPHk(S_4S) zs*dR_O-)TtUteFKMV*;oGRu7nelP5|agn~b;>UmFwaYu9&g0{!Zx&N5M{04G;fwUqeykdQL~~x{3&&%gHpmTsMOkTbn-UP z8YXL;7c{;)8h-U3fI4g6{%z;atJKO;)??Fgtn!V+*Ci~DHaY`%QMv;#ka7uT-~s*R zBl}~8@84Q=zaIP_Dc^+iq`z|#bm7kXT|ijnPUDJYYx0pbS(r0*V}o76Q|d5f;gaNu1G7X0nBXZ)O_b#6Tf1%Hi?87x03x=}CnhE)*Eril!H(W!v9*pL;@ADxN zPxNce1~m|bV>octn46iU7O&U%8)OZy4$B z-7U5o0)gzke(gt72xMz51hOq)=ML~M@e9_1;D=g}_RSzOU)P|}TLCT*!&^aq-o8QJ z9=DH%xC8`x`1&ZypFbym>FiPWpdi0MH3bFOzkEX8H^5EdwQX=WILa=+Yu14fi0F6x z{{-AHJ>Xmh*(UM#RBFf zPj0^+wR|t~x+{*GPb;Ol@>RzHE+fpD~e*JmhywKOiXo?7_&%YSoFxFh@^?)M}bsDOSm@g9| z+bPCX_927$VKgFw7U)rLxW?MnY;fY^macsi?rKfkJ2bvqWWYYuezVIBn&iMl59Hky zQai1rq9QDq=NZY&q!<`S43T=k(H|X@w@VKh2_9JPu)C5uWEe?WYUAy4kyVon zxH6ZdAtWUImp8jc#G?q4PA{i(A`UvbJ(lp+;%yMI6A#tpHRr5}V$2ZL-pN(pnvoEX z>drg!X?V5^VND^ebH+s>!g_PwP0N-&{O;YO)U(#64@xJT{Z0n6vcY z;{HkDbEC|hwF8vtZz(-JkA0d}`j@E+C{@IQm!m#rvlrX5>r~dcDGt49ZFD(nTH_RF zy-iCJ`M{+!HJ?RmP_PdqD7Bst9A)2nZPz3@B#HXt*W2?z#MrXD-;U+gF zA{SqKEDTj?A7ytsavv9v2k_jLF~3aRG&+t&TVBKSG*?zv4@$XzBP8VGzpc8wTtSn1 z#`w%{Fv+LfSu4EFHnK&rW&X*H}M5f3Cjs;jG$d>R%~ksxgH z7HTNS^?JLO?9frwrg4$-^k?{;5KrfAqUlw%`nj@;C@%Z97?pn_xlv`oN0AL9p%lb= zD7<$8#FEp#ovR<56Dq@&#&2(W2+8?IPKc8_Qe;oTmeMp_mi-eV?1nsxOJipqCcwL# zW6zYs_2yG2&#nAw9QnN_GlOW55ca!tEKA;|^7(oLqnAXZ(FimqxS?n6#yL4z)bf;x z8?tGUnVttTI@xhkc4uc&T8W@$1|p^sqM90bdYNt-rX3yxuOFr{vXd@To2U79hu4t--^V zs*S^b+aF7wDkO=(#a0O_6b)Bf<^*n~)K?CNpM0#ZEbiZZi#s933tK`+&PX2iIYpM@ zt+$OInrR%NE*$f{yRr42bAo2|^SRVb{(@$VE=H>yknV=h`q5%GknrT-v#O1F&VsLT z0*XBm_b!K?hP|b=`Z7UoRM@iI!JhQJnRHG5u}k}5 zIOW49D9DKNPIX$XGkol~`>im>0Gs(%A4ez%R;4DJDMxT|I2gtSJ5 zQR^*ea>^Xqv6FqtEeAf8Uc&-eX}X*d+2F_?7j407&EzG;Qeahmmekj;Uz^Y)rn5tZ zYF0=X?y}B6HmE(Vbf(w&e$J_u87u~8LTN`R4&0ig)9R*k;w<`1U@HhjK4==T%s=Hbhi62Jka^>-(b(Xn<@agP0@+9(N{{)gM>v)D=5L!^i z$cKq^MSeQZnl{Ka8AmR~Ii4kAwD7gu1z!|PX8@Fo)KE~82-Rb7=G0!EJYEpeWvSzL z$9kbv0qMWA5FF;X8qQ;ynPh6H05l^JRJmmbKa5hi${(g+Ums-eaS*X+kJa4;Hc5csIn9d3`I&Ye;`y>;gTo4260vCZdP^$Nq^gBJw0$D!F6kw4JrGDz{%U-0kvN9LaM9Jl~m|7 z5o8^fV#hbH3aS1yoRF#ea&^!RGmeB`%#)O^tt(A2sTXbfq@4@(bvf=+(k4m&WfcU$ zXwi^H&rRvtn&E)XMfWB3(WQqbbk>v#zOELUAj$96xlrCsX{SLhi|%NL`fb;#WNgH? z8iq~NjG^z)_=1e|J70LG?aAKiPWcA!TG)Uiiu1f?5EQG}ISQyYuw@o!m0Z4#(A8}_ zh;gPM))5;geoNfa8}ZKlRi1rCw=3%| zcpW2pA4*7+uI)@yP$;)=2&o`Pt~F4O;0z^8OLS-x@Z*NHWLC6|5~OLUF;<-x}-6*f?$Jf=LbB(YpW zCebR5p!0AP`bcAUs;>~U}S)O5)+W0ThvqMvfjHO3#^V2m7 zHGvm5YFi>{eRuBM89zjRufZM@R?x}NP10q&ic~|-SRO9zY7_LmPTl5?g;oZ7$Nu5;C)etbmU60V?Mv>w=R{|;iuleYjn3jjzolq_n7D&Q5xI# zc334qoN;~`ejGo<{LYQP;;|F9Dl5xat)@d)2n>T5PmSfL!gAefnatVzGzYY2qOLpB zeWPG?G=e^UbFBRn1%5!8_;LM{u9l5NC=10~L$xSwQBFxv>1OOyzWn1B`EOm(0WLC( zYw&!sMzAV9YnoK&EmM9nQ`D(F?)79VOh>+P*^#&6*b4Khqo&qZDwf}{-4!M++8}mD zL2TtR_Ez2IY%!_Wku!VyO1cK(qVZTsr791X1CuP}2ug-IEj8x0?@^%6lt}yH3nO_i zw-ByMG9G~bYG;5$26vqasZd!vmR8{x{9w^StfHi zbQHeTV|l`4S3+I+KsfG`UCc|Py%ruUcg|^EyyHfPRjjZXLlwiMs01_+rH&i?BraBL zpVWtE3U1%HTN>$p+2JQyg-ffGr?lW*JRiQIF zIsr%DzILj-@hG98_<^1IWQsY{{{h@$Y_1%SyENb?))e?FzVDfZiSsSSMHSY!0R=nrlV)#=dqL(%0Y$GdO09XF)$H@s? zMPsagsppLD2Aw#)NyyFKsO3aH{3sE|)EiBKolcN3{z)7?%gs(W(%shMy5ZZ=X5@Pv ziD1Sc>h^BjT-O-Srie3l2IA!LqFcILx5%HHi?`G8ob2sxylI$Iq%xG;-FV2*<(_gv z_|%JZtm1THO2=>dzTu?I*g2_By_#llwUo%sXN^~LurAYy1ri<1cWSWY z!cfRuY0X1;rbN0-T&9JO);{5fJ-BD7t^iykz0r@}t*Ck)s7Kf#|? zgMlm}u7)T8Rd|)6JI;LiopE?iQf%QI{OPO4%YC2&r9c^p2?i!}0<*@=+kbez?L*`P zCn(V+HFTVDT05sKUH6Pm(SvBs1rK9cOVjS+F`xQPZh6MMk~EH?xCjL@gfs`^b3q$Em}_ zFdcuRzL!I2t!?dypFr(s`VOfK^U9$?B~#w1ypKW?>ueI9*p+sKVD8#;Uz|2J8C~|x z=z_Ulm=xA#)P*wL!=Tk`V3r+EuQ;XC(I$qU)}){5hG%N!wdXd&b@XXk@Z%~{y9LgP z@2b7L6aLf8le8#T$Riwwmp~@n?8e0DR(wBmK^Ml)R?*%Cv7o=K7w2y6Sr4kOtaS>Ry{Jw zr&u1;FS4Lkn%9*ZP;cuBSB^{Y*-bbG3O2=C?1v}8OgSlBqXD(tY52Q+~61aLwgL%6XPSuvP0_h*tmX$BHXLHSjxy5+Tf(1FDk`SI4f5Lfamh6=4>U)<{6SXf&6l@(Eq;~7LOPzFiy?IUd_lEx@=>a2?_rqCrynOxa7o6{Ixt#! zr(my5Ra@%#1gbLved`bP7r?s5I7;Pk!lGVD-GW347w==P9}EPxAr{;82=|l$FDb z6ug7=vQ5&~!0`yYAu%AmPy(NAb*&qW#m76x)Yi)ND1_H%j2AHO?@B7AQAbm>4Bocg zd#F*Zns9RJi6c=2DNDZLuun~)J9%qJs+A)@gdDR>OBly`y_2za(~mOUDkoc?F$ASD zK}*rv32##$HHm~=x-dVLIM=vtS%#nq??=Q76cB6a64iZ28^@=?$P*H`MjS|Bvr^@3l_M#fH42^$lK9y=5&(qJd0eq&sSFrU zR$JCa07{M}Q#)PK?3E)M*lSaB#sJ0;By-El%h`Z@aI09gnoU(8MvO9JH!aQ5Vu=9D zJ6GcmrnHF@0c)|*>AEH*CMJWQJfUzHXIQqNK8p$LR<&*9=>qgH#!aT<3CiT;X@DKrhxl!JqpX2eM5b)q z5sq}q#Z|b!e>SeF%cbQ+&<^7e$oT5zaEnQ9l5dhDFVUxdrXYS6?R9~%(zP&X%9EP& zujp{L1GU?^+DjiqjvUMwAUiKOQFHBVnJ)|`S8i7qP!aNQ7%3`mS8IZA=RlmQ0Wop! zLV#L;pDGIbLfg($}9TTrd()X0raM5q zu4L2VJUUI+nfWpso`~ttdo_`$`z`YqTZcs%h2a$42TXs=h8I^gA(o1N8c>unD6epN zIfEEkupS|6))iRFIQ;=&LB{POFNgKQQHJ?qW4sRzY4DjGX1^D%PHls5R2{~t*63~%N(*f*b^N)vL|A}~X z&!X*sG~bkOTJ8zzHPN-Ocyd=5T6;6gyyXy~xNE;?RFW3joHOt$SqFuvD; z`%9bp50y5$+bVTmns_jtYyx>;)V$s4O^594C>UTYMz8_gff-X+gj)q@)D5$Em2x&g zYci#)xYW_5&l#fQCgQ@UL;b#y2S1MO1dK4`-qDX+Adqi=j@ko(T;93m^DhiOUA!_f zGExZsAWL_Z0p=@3?}U<>BF8)WC9Eh7s?7W#-)Eqenc=rrD&Ii+tm@6ersd-p~Q2HU?DHNM6&)L^(&C1|Q~=Mjlwd zt}b-g{LtmxU=Cu+^%bmW(0rw0b#)a@BVS59)qSP3u+Zas;q%z#m6by4lz`fW`T49Y z(c`tz(L2a)B4k(4J#6X%sFS$US1PNk3+~(zSTX$?of$~wz#2oAOJ6(7r3!tcnJ`ve zS_9VpibD5M`^0Q`{e9MX@b?*9vnVXxN&8wj6o@6Yn z$074tc-hSLV!C~{(4~)2%XLJAPkpAMS2f}3)2A2DojW&Tm?45jqhry9XVYwos z0XD@!9d=?fkU9|&5k=b0=qUF0RM7`ZGjnrh6=KxpkiJ%{8BYs$clQ?v%a<@AA)$iN zQ3r&#gpI9jU$LI2rKXZuMd1@nA;=uXxYT*!jrD%R%%^Q@OTT@fh!)A-TL3NUmT6@eE3&s^7DF&u!7l4QIj|*!5`edK_*Na1b4`eOW zU}ei{pVivIsVS|*xq))yt5>gj0QT7fFv%W($ubm_@Vh*O;&IHpyu3W!7vbU%$bRXM zTY~*Tp~>e%cSE3N`P2hCOKNddi#14$##6%QN=bVh7Jm*J|Me7K8Vb_}3gNF&dko%E z(;Rd7%`}L+F^%PNx#OYmvmt9I2p*XHI1*GpEiYSC!C;U0RKDq%eD2<>Gu=3D&_a_} zPv6=ohU-T`2Z1>&Pm!omQ7aMh;nqx5IRXX--CWZ%6rec1x!%0p^OY+Bz1$s&Q$(;Q zq@=hbxv6s3+vx0%LBHRv(T@|Kj-T`25gpxA&I|F~WE<9Cr6_eWoN-b7X?bIKhgt0V zk6?@)l-&ktuH9LCx$iS+m4;i+pNSUtIgO-+y zI=9&)>VwnFMohqFuv#3Cp$K{tiO+W~Gt%(G?x4>Lzb%q)u1)ErE`z!ANxDu}%fi)z z%K7CQ;pNNioYj&u!#=Z5;|}J9o~4}NEbmIt6ZC1asONE6Bw0ZF+^%QT2hTMEvDZ5> zYR?1pVcY`DSD&{e#WRWIv191z@@hf&<=>A~mql#1O*^YcQ47Na&Bof3evXb_27I}? zCLp^NwkSX#544-Nzd0dD*08@6K?EPrl^jg9nKB+V5n}4)FL#VcqH&w)O+Am_z-Rah zWcZXK#yZPqCj_Dc9=q@-Gt8#B8UPisbYn_`Qq%347$fUGp`j3u%}wVa(m0&i!%UyYo9h7~ai9mx zR4P;$1mZNc-|x7CxQik=Zg*CX$#KVLYaK!2MCLo*^l*)lN zU;r4Ng2;ztefhe)bV+t1VyJ*)u31jz+$wBx@wch+v`}06eZQf_4v3R9_-Ik%?WOip z7`sRwO{5_&Mnxg(Mm~BKslL~@v-;ZF0&+2=vcc4IUy%gFAiOdqx-A#r2QQIUT=Tcx*g(_9<|5&q*N%m zuP@~w!Rs0k`9UbS^u|PYLjlZ)^`p%y9b2%`K+iZR$@slUDi<;HIH(ycBoJwWfcVm1 zg#hc}utokwMx;#6ytVKkR4$uxb6~d|EzH5wlcKVBbu`(si6!STVk}=Wiz&_kyPE3*xx&V@2st2v))id52 zH}^pg0&$g=u5H#~R8oq|arNs+OuqiCLv{~)gLjD*GTOLHv-|Er`D^ND(;c&lYkI&s zPALnBez+#N&mNCN3>Eo13t=KXJNc{h5YS;Qp!);!Y8%1u8UaN znPB?`Am=i7f%B9#tqzUPImpGGa#@+w(1dfSX$5d~=p`8E;aY*VvHGj2wvISM7eJ01 z9Q5z`I&s&9!;u69gYv9FXqJ2@{-QE>BCvcfHDE-BR7)!J z;N~o}t{En%ol`nXu9r`+^_XC9Bq)J)BJ^H2D&1WKYY!+)q}DnSBReo~n-l#sXoTaX zvYYz#@-y_D8X0unC6^ne(X=M)x*9$>As$HqACI9~TQHqNDK%<)Q#(Bx7OM66+R>?A zBM9U+$T9-Iw6_lUj)sTI7&|FZSJF5#DJtIAY>%LBdk(Qo3M-$h%Biv6R(@*dJ>KDq zSK2%5hae;%Udx3F*HoFyz(oWg{KI^9Xb=d5)&n0YW%cD#-@$6DR7$zygC3*R zc$ql57K*vBzvvx$5AHDy&Qmv|h~N#kqBa6;);ckH7QP4zvGo=uC9SFLHKW;}H>;u_2V%rHF0Z3t}P?gfvyt3C=BxG7{B-MwffB&V-#9) z^+im+R=Go34tz#wrk>Sr5KgsR5zd=X<2{mZobcsC>76xvpFq=X(DC#@#{)D}$n`Y4 zfa^nuwGlIVJ#M*DABpMyG-G)8YEnf#LzuR1t2UozRp2O-;FF1ksOMQmcz*fE;|?as4;ZSkkgxAbobZHx7l#K@3*S zuxfZ!H@_{ip$4c1j|Mu1FIWsMw#=wKz4Nbyk$#&)1GrSnGP{g!C~H2b zTXi~5omMZ4m?|L0_*T4ldQ7tfer2iZskt8F>eZRD70yh~o!9Y_vgMB==U=uJ06wY@ z%p8y?Pf)?{fziQe20@Q-RNI(LBCX-pn#M1UXjH8i6_@cltGuPd=sW@Y^85nuD0%QW zYYJNy-s?(bSC53)mIDq$35*8y)-Rl{@VVYtedUW6les`4%m=Os=~72L6!zi8F*)Ff zxfG(JAeo{I6M#H%0MD*x^8T|_1xE*9Pjh-@tqlf~ztpStckwtM&%()5z&|2(=ukSX zpldhqjA#>j_4$aTwfZwfGosAo;=ehEglpwCHEH%eJ8yUynaJ4lX8j8k?=1=Uw6rDc z5EL}?_4VDmmJ`9pBm4s#e*4D(s0FjjYin2=!_$Am2-P<<^cnC!Ru3z)tGfW0e}hmn z*ZbYw-Qz8g&*0q^P)nb|J0UgoU|k^}c@$bc{4;a*Ptxa4Wa9reOY*-YEb{?us|=v( zoe;*InT+re_Id2ZG&$v~s zRM@HT!|otI?BKj|BbT&po`F)P!I$W$M)m_Tr$y7u(_sH2%4O!4@|q7!WBJ-~2hUiG z01Bok7t?21A>jk)A+OrCVbahL8dP0SYEL#{z!4vUkTt6q@|;3l4sOI)l;iPQeV#uE zsKk(~tE+$2_&uvoYIN28Dzi9J6fd^?t>&*4K~YJdpW7 ze4c~uvsC0M7Ek>D4BUIWN}qkBxYB6tHBe?~a#2M^9eWJ4=etq*msE%4tzRA2ei^

*SXZJ(HYGHyA0o>@I>q7Ki4?2bo=rTZ+RK!NZ>t90Y| zN`yJ_z-Oi6e?VpX%uEIT;OuO(UVExwdxGZi6tSs%T(LT7GY*;DG1qSx+6iL)8v)Ui z)*fTeDpw+)kI|O%n(T)Af>P*%T1-`sK6M97v?Zs{3-DdRPO%pZUY#JKJofo?QW-jU z-^jNuW}U^9XIu#6#wkFxz0x+;e70^QlcL<<^ZJe;HrmBD0?eyVgaGYqU}#u)tKHJs z!QRvT+Ft$bV0_>^-A|gKP(yxe+2G%4bNb@k@U#JNs@coU_pet^MNm%T9%}5UVQ}gj zA2ZN>4&Yfl-wM7oI7E-sEafb%si^HxK~X}f^d8PM@&^tWQH-)q?mS6;ccY%ZU&_D> zAAz)4OuuSc`11UUeK1^XeWJ=!7~IP7kG~x_+_>>(9B|$SLT1T-iOK$7Il=#`U;o~l z86Sv2L6x|2SH~7&f5=uybI^BwyI@~}4IJNK-jA`0N>`a|KFFw;3&19r7(?Rp#T<9&~q zBcWNef-SH43(?!><>3Fr<`~bu_C0VOyIJ_xEk$c#G3-w<9{8<7fCH%C4|bwF&%|&h z+r;maujcr<5AmY5noY3Z1nK+w&ay}7J$q3!6MrYu&&MO=r`c(+Ij>_R9^=AB2UHpe zm@<4>`m1kl`0`gdxZ+kz^cbDqIMH1>>x#4f>Fjlcks6vk1en?-Q}FUtdWP zoCUxb;$#Zk=miL=D#!5lUFFv>oF1#5cKQY{w;FWXnSNUz6Zn&|vlq65Yqs;_4v;=J zU*4>73(nIf5X9{Vh8x3jF*QS;vR{w(#3$Srr<%P)kusudQYV+w?+O`pN-h0T8P@)| zk};J@3dOGw#>k}~iV+ngK>>qg(0(qD0IlRChmF-bBcBPdzezZeWo#DwgvOV(W~cHM zP}-j4Kw?mPIWnWZyh(Wkv1aelN8zT5fzA*EOrbMR+BD^}7d!To!0aRfFOqJI_^chqhe*5!ya6{)}j?@~nf>Z>a_0}XwwuP9LrVN5}3gMTwAhU{@1>JdE`GTeSe$if5%1ex253+{st|tra&tz zE149%6_|-rifQrRaR`hk#{jaXxw)BNklZ~zWmb7EQL~%Ca~@&1;sd57$2|7W=b;D$ z!qf7EQZavUsrWMfcZkt6*MJy&P+fU8-!*B3C?+O0?`Hm*g<9zaL)aHAFbX~tN+C`b z%uaG%{ks5l>P7$i_viV-otnCO+!65=(~5SQ8z_xndUMjs{fvOw#FB?=6|*bL%Xv1c zJO28uX8(8>{0H{A{oj4^)1X-el!nq~XMr`X5GS|0#MJY1^PGZZerf4W{7L$vl9Gs$ znSxU0JTjH%xQS6kg7Dz?>n>i`Y#1MZR@KxD_z{!ENZTe+Y8%^~BSG~v{bF6ne;m)( zFJJC2b)ER!jgT!*X^n87;tUW7bEsv9SU1Ux&FxDNzFh39F)yiL`9W z$;mO7ZTqw3=Wmns-*|!k`HMfRCXXY&b=S6a53bUtJS{u?n`hAH2QumkOQ?hO|BzP?f@ zpQ#l~rQ*kE3u8jB*Q=So4bdb5*L&sP!=TxGmiBLgbm>kMx{y!v0SB3P(dJhSx~4dv zX`xFfazj;zj}nUpfIRC7gtGs;>(KuI?E7yQ~*$+;EtDY?IgifDj1?CAZ{?Unf5UU=I-i|D^^4E>*0`Ti3V z=zrAI{#iu-G%%j_2EKVd71BD;UJLvyg6Be&j6wl>lzzCs%-+`St1DQ;-F1V#4F6!blWd5 zJ|X(y!K1od~TR7h)krW*rZF=?YulFu<;BEpC zrw$!Ifs;x5K#Mxry16n(;&0In;_pHSt`lI!qoa&%0bei#wjA_;-GJE+Tt*tUMOaG% z7^%x4H$*?9_ch?lI`etsJl`X$gLeeJApiD(>5maWX2hP*PYv51RnPAN&xT&-OT$0WRM!zGR4XZCC z1K@IlCgN3ucQSA}O2Tard<%dGbo)N_{vOeS7>&te9vH;x^;21XKd7-6TmJac z^_L)wmcgqFUe=oPt&1L@!6{w5m5Sd^w!d zMleWr;V_D)uVeYNJ7DU-%OTckbAK$jeNYRwXt4g zJd#FfrGEb6A$p*|7d9a#K5I4;3z@#0v2_wyv?LdCeWS>toUQ2%eEJ5Eiy(<0&pmeg z$(coC$5o^1^a1_z^N^sxTWRHHqzoO2Z1-N7>*ZzHvk;7r0b`$FOCq> zMdpc?M3yy-Qv%r4=!k;e^pi(JaXbl#sV>Y}{oz=r<$g1NfcTHk^R z2KaG>$oYKtFp2+`h)DbTzQ>is0G8W?KOtCiW|U>0w?e3`Ci51*7s7E zfd0M(a;{?1k2OKWsPb<}NQ+iAGu~N_rny3bf8yuEKlf1lB_r;C>!|*pny>l$EyDYX z9a6xsOX?|2%|oQt7={MA-uEh-yS_GxSL44yW@f&Em_wWD+RYX0wXDd>m@HW8WbuGP zQFt)kV+$iL`71j7DyUy;FM4i$_3B-;#TC9{!Ow#8N|}19FYF^XEX+2uD(sz|ojpA~ zGH9+{yMfqIoH1;k4OqK>;WHwCxa0u#8-1np04621^?J*Ea_h6jMb-)5FRla@>SwNk zM}i(9hWuZ_ER6}DAz}f4@HnTK1sq7wYvqqW3M}3H2^dHJQX|cO74-bKM#TUAC;vlg zZ)o7Z9SC82;HIOaBVdj{A$>s_O@2Qvee8Z69pmLwL4Bv4CthipYTft}n={oSA j=^OtER1^D=C!qIj+WY&W{dwR#kn7q;KbHOA^z;7%|G0z9 diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V07_chain_sk.png b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V07_chain_sk.png deleted file mode 100644 index d58b71634afbdd8e6fa9c51ba51a4b6b18576ec8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 19640 zcmeHvXIPVIyKStfD2!ua6agI-6ckXJbVo)|T2z|USm`134zYbQ3Wy*@YDP*RRF&RQ z1f+xrp$CZ4VrY>h5JE_Do)>5K{$|hY^XyL27~Rmc;Uw@FxZ9)7;K~Gmd)Tdk#m*;;6F9*^H;r1Q1;$_*FA5;jIMjTyP~{Z zoo*cTz3u7cgmP1mJtZ%D=EOlqZ*O-mH90xtzx{%# zzjY3nZtz)#M=t*OgQB;XChcjy`|(XFKUySYmcXwyu;79QD zH}~!c!eD2&td|A9JG2EX1?<6}c;V9h!%-TiXKTTSB7UA1rf_{$5Jc~ zkZ1czqOK@1miI}5cc^yWr^uk5GeYaz7pxmpO-T#tn;~z6W%QnzKDzF^xC1AAhdo-x zLQl88S*qvF)OYXHI}mNR@Yo`vVpy5vYEptG8mSnWjJ#?QPYz|bBl>DWD{YKX!Q-Mt zHU)zSCV%+w)Si%*I$y_`XHLqYRYS%_$rnTr!5)EB14NSK{nd5NVcM&o#%2!uXl-TH zjo{G}m}EYeOeK+dzG|#tFXE||1I5jOUQ-igO$rJ{>|BR&?n<+Nam`u$JJQL@_1 z4?0vDnjbVhKQF8G;WLdUx1PV)kEn01!f=Ku-CPz~du*=H8YOmMu~XuyVY2c``rwbP7LNcA-5%4lc3sA?%9gWOOg9mKM^~&u7B zMn~(0JbNoX-V=O$Cq>6iyleBFR{e%e*n9zrV0j~qR_y?l_eMDMW#4TtSZli`Bav`m z)l%F?eI~#7@!6M+Lh6~8#rAvFNk%Q#j*HPQ7$nH1zI-##Km?x)1pA(rwwUj*XQnenwRpLj{vunoBU14cTuVc>odws-_WYcw znvr-*8uM&Lj!&0sx?W&pWYiSG;{}m9qd7D55n5>ZvMpl@FC)YrMwX8}j(&Id?p;N@ z=F{EJ`SDU|{^`prT(I{0qfoC~C7XV7^ld$wu=@wU6ZJCvlKu2gVG?#TujTyT2=;AB zV$09A1-xo3Rnn#fjWW{4W)R$I3$v&*6vp#nut(@pq@%{_QhB|Ro|up6LQSQ_L}M;P z*`L{^Cn9kv-$+Y9O|E>g&ssPQwTe}##hp13bfywz8?x9-MWTYN1LwUk)+aD7&$PF{ zM_co*{ajR3G<^|EKQt59Lhl#W_;NOoLn7OyTQLVAzz?4ZD56yb+-McvWlS zsgACV{k3a9*Kn6eBJHW5UfkCvqxsx$%%ru6iAex&l|A|J;lno=e4;{j@LXxX5GO_N zfX9!o4C{)fUJb`*JW+H0;uou*MVCx=J-r+xtUmklvZi01lCp9<1)pp&Y!O=4r7Ic- z)q%pY<8-hCY>yvP(O4Qx{x=Ko+i@yq#v1U;@|li6oPWL=!EL4FYI{8|%Xog#@kt z{M?+ai|rF_-twD8@Pz6WZgsVvcA$>IV;KZ(^cPmXLFh@kr=W=J0gps7Z>jt~!LMnL zMbB`dP5-LZ6h)OYTdZd@Usyday<%uLFH*nU^pOD9Vo!`Yh@xsdD(maf13nV|AMd^? z3k8K^zYnRfaw_S~V%AVZ`MIuhLENi z^o1r~CFWY%Yd#5OjWeX$5#s}Zuo99wv1tmb4h zwB?epuKV5Nb30J}Iv)H+3Es;yWcM>5;fum|=4nVTtn}B1exU@slaKOTU5G*(%2*Vh zknEyTsmFC=={as%{qzqy!Y9Q=SK?Zu=&B9coI%2s z+Q}q)_J`5v?IEreU5u`_jGyBAr6i0sV>EDPot7kIx$UVKPEXm?Wp1=?0b~1zWMr9T zFA^kPz1o=@++2%V>Bxj(gchGo5J6Bb52(*q96o7Xmj4-E37@#wMfW`uGaSD`=*&Ux zCt>u3C~cnqse5T0d-1y+Ty#bBY_PHKsIjL+M~F()2MQ}P7*pO}P&OoO8#Gr+jWsdG zv1x6zQvtgdL4`Uo6wAlv6;|awPmw8SyxRpgycwtW$t#GF>A*e~+eVH-BSz!t@BKn0 z-gBK5wb-rjy|k==cT2?>uB#fOL!o6@_f8egAb<0BQeamO9Js4Wt6@B}ox-C+y%v^1 z6SMC9+$~!@dnG?XPr3Q#MwRl?)A}rELkGy#`yPo>EA`aVLsqES7QIt1boal(AHETH z`8BJa&uuK^6#CWX`D;XN9PuNEH1c@C?T2xVgEylzAGbuQPCfUm)pM6A>QyJRJ4THhscIW{dXVx^uILK9sK)X{TCFw;fiEDEejn@>aO5#Hj^nPNFV zGn1EVt1{|8{_%q6sGZBcH?}XYy?J@KL3{a)@a6<6Xn0XgmJX?A8icz7;#YeE;UU8B z^a(pf)?`-nq%)R#Jf6*+wFDqQ2j@v?wRRE(H&d; z-&4vbyKt_V7atizsg26*eIp>DQHT_m#yBMD`FnCa)x#K2hdLv28q@Ne@Jh^B3voJ1 zc83&3{rQM535gq(rjQF~V&vmmWZL_sF}srVc)HW=D#PwN9wl$wLjux=R=`#jKR>P} zN@&fZ2Yh4)4+PJ<+@7LX>E2;%nKGGV5Q~E!=vY`-m=P)g+Sz+}B;W`$-r5a^v!#J0 z4`+;Fq%ktq`!3`4u0@>J>Q`=8xJ`M~=-fpjSx4BTrxY;9b!*Y9;}XK+=V9`3TPw~! zZ;X`*`(8NCp$Z>%aOzUL9EEPmc7Lal*miG|;Nz{>hsHHbkmfa4r$&QlVhO&!-rn0c z*F;NBw?5OI1Hj28BrUi#9&E7e-0ODPrUx1UQuWIA(=$GO?BugD>f0kY#%x7cxmctaSL}99gT)(ZK ztp`6Qp(b-m*xOUAuP;n@T2FOgvNcmk<4ePh!PUDRALg_$%JT6@$4uYJ#tfOTpQ1|9 zp!&+mPWv+OD%qgp6#1v-CZ0KgZiv?2-k!Z{Vrwq_$ZfXjy6+wH{Widwok1L)w3orF z`>TGpU6Wt@?KZom*9kkG1e&IJcVE1JsO+?J@&>Ehu}3lL33{H*r`6{##fed}wYgLm zTUo-aS`5Ij)z4E)PZQlo4-sCRp=Da=7D!VLKPh)6AuM?~x?|S`&n@r!l%LJ5#E=s2 zU%0)|>quNn|B|2CE_cSFFT=1VV7FFSg1)9~)#vnHtq%dt)?sD0HHMP(x@07-n07{i zEU2Nmf+|(OR@Ge*l@4*N2#w`TX!FLk3zhFB-5<5P#~2UAJAwtgdgpAR){@hBYoOb!w2SI4+M$bQ-z zyzi;TA$S%(U<}*ZGlk1P{q33VqYlz{U!ATT{1Dw}HQAaW{X!XDX66BU=I&<#J5nQZ zhsuOPf)$YkhO%+OE}eIsG$MD`NLr0z8D(k0k(~)zD>DebRq(M`9pr7%+#dVaC>{2f zv0$ehjs2;dAGJ;&;ft|jBP?ph_B7p&qKDZwwnj}(%1GEvEW*;ialiYG^g3mX7b}^( zid|MCZiQvXwBX1cx^K z93gB)%O}2P!3{((qW?uQl&)~IxP37)3cH0JKHpTID*?mq z8}SF%B8?pN>9-9_wUOdb;b^rnT2a2q%=?}XUM`HIu@9f=DTJlv zsmDz+bYxzf3Jd!*5#4(NE*&czCn2nQX{yoQE0XakBaRa-1Q#Fb3wFCbx!M;!lr+hJ z_e%=AX$wdnSFJQXKl$2}ec`eGPZ&p40MPm`kV3Rh1c#J5d_SR+KsYlV@n$E>h8(Ri zzK?M1oSRTJ`}JkH;7@{5%nhgSES_)J^fqU9l@d*=X*A#0Dki&of?+PROL>Qs#1add z1Z_wOIv2EBRMfdlVwkO@gtBJ0kDm;(GwiC6$Q8NzRr-xEpCfUXE20ZUYGm0}8SMM_ z4#4Q8Dq)OQQ6c%?ag((|Rx0ciO2t9{F-?Idx@Zpj{NN?#rQJ#=tGV>$d$4QSK7}0{ zQ-nfZf+9ldG9HuPo>kpF_0lr&l4Mx?fo@}sg?HD5H;IMU5G@m7 zG6>0|7F$|FCQ293|DyP7Q$XzM-kM>-p>tO-Bvl zO-Fbdm8NMQbH7dZl*c`o494)w;K5_NnpiY2S9XI&k|?S3&HJzvbGLx8mycg)6Uy7( z+3sOC{uDfzaYA4)DtO}XeeMxNp%mOEzVWP38T-|Osj8uBRMk|YQ`i#i)5OcGs@WPC zzchkO5i=_IJqMa04m?)#u8f{&9g^0QGko_MIr&pKtBzIM)L*H|*7)!uf!RvZ;r%R) zelKrFKS71YH35zTPgJHTK&wYXf|nYl^<9y$v}8Ag2|Bi6GkWX~J5|AL#-BF^tVXV^-v1)S0oL#$E*@@Hg|!3#1|RE3sP3 zW=)dSkwYb8958OW7R*pu!gwKGluj`lKLA^fp+5;Cj@6BVPTzl(6|Ew_F^)bu?R*|e|{HFe}q2wrBPUJ+SK9Kx|4(m zZ(~GG`GOu;@QT;aaF(5u@npY^WPc!-Qty)iuuqVyo_$UCpP8AF0nEZwUMcFci(<@b z(vCS|CEBzO2*q!YjEiOu zwCrzg_hVlh+J)vkdGdswZ&|!qg6x5qg=-UljS&%~jLtCU3XLM+S^E&6 z8lxGX0*Zo0W**wgN}LOtjI-FA@sypE&gRtct5~B!*$`~YsmQ`$at5dv`w%Y*&y=-T7d;hx54AaA{ zhhzF}yo^pICFtE`RHg>RV1h;G0>T0|o{Gw!+*(9`XB+VD42G3RW3br6_*IpOK;5Up zPWa-vsrquyp8^Wav<@sAp#n`ja$LmOqA2#IFTwNv=fMMYA^MKOrM(w>EkuexPhG5| zpCGVHJ1MuObe8&kDlr=A0dF>m>_$*_PCrUHb^?V6@K(dHk{mX69)85WG+iS$by$x6 z*+cxr0XW<7i3Z1CoZFGGxebld+79-DU>iE^I`hm-%W8~);Rm03n_BcKm17`Yi!ZX6 z&#oj)5%=Woi|j1cqxjH|_y#<=qY+cBCADtDcXC-No-K2Ofam+!IO6vVpkiP3I+%Qn z#sUG%No~2?n1C-TdOn+LS2l1XJh#7+&*KnHOAc`hb{%5P4Y%Y)bA0E=VcX#io33?F z&&?S@;bS&qTFAGO>n-z>COzf#J$lM-FvZG;_G2tN(cjAsNy00H?8Ad6tIbKkhH zgTc1^(69*xJGgP(+6&Fzi9dICcJ}o2D25hnEzHiAiqk);WTL=zjd_L4P0&(i56Jc! zYA2_-COemDO&FwuR%<+?{c*~j)YqpRpFQxisn$1_6qJYrNf8n@kb3i3)Z7(3v$(aj z3}-(B0b~4(+K>DGulMku-rV0W{BFPgKUk7u5Wig@BWG802C&Zy$jjH_uH+4VP7Rn! zbZU(`x@#%v$Fgg0?s?N&rTVM)ncPE~)G}YSw)o=gR|Y3;kxYvhdkI4%l8aTj$BFtg z0;O0i_LWA{=B(^&J$!C#BT`SVqjGt@&)2&Nl_QbJn*on^=sP%MC-pimr3~VbA|Xh{Dz_Q^ zc5&-?`_2?6PW}y2$0y&e^z)^@T|4x7A8^&x*0nt#$Zvrl$6|~|k;PeAO`(>1s;a84 z*shn&ms1^13z{6F`Q?E(qf{kt4-6)H;F%%VpTC&a{|gm@zc2M)lv&QCs=szByskgMeo=!wKlA~u(3 zQ-+C1uWQ#1g23#pK5lXjFsPRm7O#K`>Io`nVyJRJIlhK3w;{rAl;DU$!Al(tZ$3q2 zuHHMPrm3mvY@Tsoz^!h{-nL-djvYJ72-0PE9*@^EGExu~dCtWpFVO5FJ~!JQ6yMJd z&SiBgiNmI;qobpyd8alFA8;cLMdU`BL&0w5^yc2C^OurUz3fZ#++N1ZA?$iwF(FH@ zLd;(wWo@ezPXe{(1|sDk{W5@$u>Wv_@$q{(x6M9JPwV?^t&r~me9A3}ZKkaP$6+wn z9K+L~VfcdsqX4v9o*#zR_Ju>GK>MJW%LG=h!*#Nt<_+!IAc9;s47dsG)qXCh3 z0-vOmH;RYBu4#JwwARJGd$XxKvizj15GC^T>DqqTxkWdiA^HII?0f~%&G(ob3}%w7 z6Z;ob%l$JWu!Ds_F7RGBPu5QeWsP~a#As(uw#R2hs<>TD?nq9s!mvAf>KL-rm;H6J znn(tIX2cW*TMx$EtzN6iQ5P*%mtXgWE`833+FhP=2OFxn*N~|3j!Hqu@?M_8cUkQQ1n!z+__|r_N&)mm;0?CieTnw)( zZa$5!xp~p!Jy??nfpsF!zX4Eq|LV$Jj>Ls1jaBy~6*SMeefH(}`d$Ipwc#xl4G;YL z-llG@?+p8%R@~y|R6fGGP(52^?J+vXU~ty++|sf~;=ya@PN+@vI<;)#tDGZ| zNXc%?_(f_SB{lg;5V*I|>bVrHz@e}!aCnpjXyqP8=f;dIlo50m{3of8!t zB3pvef=_cP2?PU&Q08Gfup7<)08jB;)r*2Pf%BuhiF5UEa`h~o`64PM&Cb6e+yrO3 z-)5>_Znda~Os1KE9q-s^bL}k<60jp_wLxmr&sDM+^$U0=O@dc{nN(L-$4m&KElR^+ z^7fL6>m}bE6+ufavKF(@Ax-Iu#gzf2ElJ4^1xsAO-Om1cxpUrRFZjxEwsxYi_9B_+ zwG?1y^(EgS%d*%ZJBu~^Ec8pQfk1+Fg$ZYWg7A9S9V=+ztm=XTRtDik(r$32o6Q}} zPy92DGO%CwkC{B0GUPjaiaVWzz;=>QaA8g6wZMr_VJ13_B_+K?OxjWa(6~;a0$hgN zICHf?{f!WWHjLeUR-k?s5iSaY z9GojPsD@T=QuQk)i}?6@g*AVL9KO0ViP>n^)pFR0ACQ(&xcQs#2$Z=)ZjL{s5@|{TY8jb^rY$KDbh8Bimm^A8T4GM&!Ml%#W8Q z;XvDFm*GJh=;`mbpkWQS0)_cJzQ+KH*CsI803`gxv`9o+l zU9kBxpb;`bhc5A^UO6h7)z{b8^WnqK7_%R=^9nGFELQgXyc?L5jJAa2mn9hiT_2yK z>b2f3x`M`Mqp?Z2!5>-0tLw7)iM_R$R-@k5M4b7}Z567+Wu>K&lgE{hrKYByudoHq zi`wMVx4pf+GePg>=5JfEL~>8?Gs=*;{XcfKIOl<+nI`k+lEL+gF_$%I4!tv=yO(+j z{Wb2BKMx50g!Sd`5C0X|UXo5|pR}^DtMT^$HG7V=IlyR5*?R|7$&)c&cgy1M#dsfq?rHE{Lyh}s_4w;U+kmNAx^N_hqL zJOR@dj^!d?_Dl+zce{I4AYEKU+f7_A0jJ7Z$-h|jDAG({F0o?0?~qs^Mp)1;;gMZvrcY9GGEqlZQ!#gO z9qf+M)`|y;-G>@V59n(|xA#{G!C)^X*CjlqJAQUw=lPkSce<-Rs-)AgA3)}ZQXjwV z{-6`WIMe`WdRS#Jo9U_U>k2NQSUzP?q8`D!T8 zPP%+*$g6Nu3!S=kJLy2CAEoAR6Sqi z4Y=4mDHyEh(H+5zFTmC0S?V4%9;t!_%y8W@&##_iI|O_dM8ICh+jl&VRvvOZth5Ps zN9`N;>$~d~3s5^kxoNB#2l-*G-nah{?A-;P5KI+5SIb;2R{(3LvCLg3a6o=}5y4;S z_lA^6&q^D^6Qwtr9+%*iNxM}5^yGf)eq9XmZuQn-)cxi^_6J}sXAd&Kv!2|pQeC|CsdWD#gYm{~?7r>_q zkY>{@C)(uiVOA%kGIH_2e795rmbzzn*v3Y33;0~&!++NP`0qUc;!)*y0{dVw^ciC_ zFIdI)cq>ZPuG-ZLGm0uI*+Lw?K>jPr=iKypXgwP`0l$4At51)Zf&t2dU{NI3_;yZ z=UDR4RP@y6K|)Si5~%BZg}ia7*7uE$+BGH}SHhlp&-eZXl;l{(_2dUSn_J^SeSnxS z%f=y5``Z2c_XAubS%VoQTq=Hq#q^pNee zfFCpmg2cV!56F(R`HJx;V|9SPR#H;R0@ElK053(90rx^$d2vXw^z?Dkx-bWVJ|{Si+E5bWP6<}Be~hFyJiEqbbD^~=9*C-%M@Oq zq<9%utN! zy7NFbE8cA0UbOj=0wCV+U=d#e3f0tpkz`yG0%Jx6{;(cS{Y7} zKKxM<#tj{8Sv5y%ztWNsm7OOMtjL{GsXy)p8A*wd-;oJYR$WPX$Y@h8B^8hkuJ5 zEaXrenbb`pl^NG-UL!CQZAn)Q#!I}R>g@pRUepc+!hr=1yE~tt#mr|sornsVy-5Y? z8azsC#nBm+7*{xeDrXjom0iRWv5^Lg`$hGsFAG%Q^#f=Yb1WMf1CRufL+10m-|f`P zlUfh7S4Rq-m;FQr@2T~uMFm5?OrT=cC|yXz*L{S z+^o|5B1~L*VtvN<07uBDM*+_bHA8~yxUh&5EQd=3_boXEHolub7q6eitU{JwpwQU) zC6{${s+ipdL{ajxZkWu5nIq79^Ye=8)KY;NrKvyr#DJ_ACnCG3U%&PiBY;{n;&(#V zQw|7o-b;fHL0NZ7zXj9c)ke@pE=MHU7m@74z27SEU&xxNIfk>ThXu>!>N*up@PX)` zYoLAy2T3#6cF@4Sc54D=qAacQvf3PagMICR%8~dcC0~m|o5Uwz(ZZxR9A3LJUJlJ} z2fp19-0}*sx-82wa$Ld0vtYk|R=OCTxWBu!-!8cmT&k)};CT&{a^_xrmJWMZMI-i% z5*pU4#U>T2nU-uXu@K)gP^E(>yz*g0Fz;?6?LnZ&c400+1d~_w9e?Ppo1u0M<-xH! ztva9J&gv1z{t@D=j$Hy4)vt!7u*;Fk@MJ^O38B@P*EDI+rZ-R_+sGgcSsSphs7gsuIE37QRfdw}914Y60Ukv2 z;E*0)>nA~G`^!hP@ap4&PV+%<10tsKX3{HKcy8zu71!$X9vHnu8pv=D)M5G3tsv5N zy8$C66NH&a*t+^WcHqjM#%Y<+XO>-2-xNkhxm2X7d0)E!zbMHwciG9pX57bA~r$Ua$w72?P9D8;b&G*(=T z%Vj4-RxNA~6yJG~Kk+^-?4hDGZziYue|)7E&Y>2l=Pxmu`P|TtU3$)eqh^zyRr8-( z%w6Qp0!9=pB5;oxL0E5EE`fM{L^uK`4YxxZNf%kgDQnRSa84A5?Cy>!MMgtHR-zm) zyNBOjpIN6)%K>KMg^w9jIhmkt;JuC!I`a`$bl~NF=w5$l5%aV0K!4IipVT#QDV>%Z zaT@HePr>5Jig?Zdg*ZiKUrh2E<6NmMlG9uojLI?{&-zKWf>PAPy{sn|2d11_&j#nU zL>CME{G#czBdaZqSuDV($d6l{te;4xcJY_bPFGUPakN71tho9bX-#_CO%pN=l}BaA z7y6&J<<=tuXROK7W*hoGR7^#|U;~!`xuGBy!tA1&U{*|524bp-ykxwQ(T>unN6gi! zp)qA~V}7IU@u=gF8=IO)$Pd|Te^3@ehxt>P&y4jU>#r64-D75A;rzwRsS?AiODkOn z3FFAfNNRp@XRe>uor`PjT?uG(UY)8<{fK2^Ew!&ciu?M9n+xS>qfB0t-(Ca;G+Mxa z`{ua>-Dx#gE3;t-JTC;|UF=K@6@A{G^vl!>YVjTX*=lje_RR8;?l^Jk92`t0E-MWJ zT9*=hVJ6_{46Na0rxs^+4NGgEv`(}O(s8irBiGx>#5FFGP*ilUN7brC3I%x}1-nUv@FAro00A>axku?fKb~_LM=V?08FD zmWS1lqm|CI*s*M=(8NfYXOhIN&4=gh7SMP7=Z7n)K&&wa2Z2aL=oC_az3}rYXbp4Y zH&Bmfr^FbmQ8T>ZpnP*~!A!yZ&{5VC&sivwEDd5W9JU zWGWZpN`Gbhi-P&@ROmkFunk~7FWG9lpio7)(f@>y2O~qgtgRK8G134OztN)mbzafY zxzCM|_U9Kt^U5!ihAsG`c@Cs()*#G(6*h;O>AOW^J3EcPx>4McRr7&_`EP`gnCRk_ z;z#oifYw-2$i1eWu{PPP67rw0il=>ie6p-m|AqAh+N3}9#Qgoj{|eRqn@RT%g7IH0 z`~T`}{9D5Qh7?f$Ap+78jYfk5dJw7p-~&Xeb0el%3O$DSK0R%QcCi{5jT01pPjZI; zT;|x74&3R+Ox#}oy6&aNF1Xz1yaK`0kU>jj<&qSoC=Jy zRZ}u4esl|67C1_9o^b%Gr&506m7DwTuWnBIs(S*>dre1L)0fPZ@?w(A;P@Vwbn4C0 z-VVU$bM}EMz6<|LZ2gh+sc`bEB?PLcr=ej-cq0gFpl&q+pVmwsrnIDFfRN*<-B@So z16ZRahdc^5Kk$Et#`%+l;lE!>{(c8U#Qk3Sj5pAfmw&plvH}U=88{p+C;2y}t0wBY z_D!aAwKo)9t!vTspXf%@G#Y$lWJF(C9}tMn=0}zC(voofKNA1BhDs)J2d4wH^X$Nw zpZg!(dk$8=yFY{^+8Lc@T)w9i7@#mVUv=JI6d!;s?X9LvR7f;j<5PLN@*Am#pF zdCdOrI{0Vn;Gg{3gWnrRY^HyUM$eD@rgTk)Z0GFDuo%oTQBX?0@U?RNh7m{o%-(d< zJ1{Wt%GdU8{`#^r5@}W}jk8N9(HXS$h=NXkM|sZ zJT)~nX4TN}5E@|z&Q7|k2QAI+Zi40ua6DjoWc$xgAeIKW^>hLEaiP5a7Oh7EITV}s z?y*^3cc;b3C7M0#*#n4!*y)xmnr+vx>ASW=ZU4_<$C^73)?i|UsPIZsMiG0^R(9f8mwTg?~kL|5~)Nn-VBOq{(EAq$RI1LROJuWiH>UL&Dl3Wv% zhd2Q~NC|m1#Qqs>Bl`LG4M~taJ_Wi6$^(4}`rQVZ_@c!H`r|z_1t%MN)~Mj)8Uc8C zIH!$Wt`jG)x%*kaoo$%;Iujsbf?NI&WT0miusC_2G9pwun~X5Xo3AgDom-C;THDiZ z|5-1G%uy2RqJr>hLXSK;)XIlk>(q3cY@k`sHLFeDY`6}hm!9cWrkLAt$H5$U@-J~> z{Uy*aa?uS0&0_hD-dx2!M!mipcIkkKJUAEJyCWB77$Ef3+6M7BqB$UDOI!zE@21k$ zDZ7G4YS^4>Dh=#+_Py2nVB^jIBK-cz;&6&1p7HG09u?~vo0OFFxC%G;`JeNauJw=` z(WMR!UlU`F8aU@D3+_E}K?)Q6`dq}lul=R~Q(9hL#Bbj?iB|hB6aq+mfc)nl_dGft zMgj+M0#$D6TUC1Ao}Ql0^;5uZq&bxK8oK2ghw_Vl*Toj10)fvQ((I!7mnr{@vA~R7 z!kMoYVRhje z|KOrqbFBNvol==DsG3UPlx>c_hy~m6-_jcY20?$Z8u^#N*cU)3X)i5=zJ9?}I@;&}32PO3eC<@w)ULN()3dXtzqC_4Xs1~Fnft)T zvq^C8@}&xW@clD#&SeBG&Oo-47%!l?LKdPtTR%-agD?FM7`IXQr$Hu@!0PHrYeQ3~AKv2B> k7cJs{IH5oNewn|K=k4$Csr)i{KG?Q7Ka6$OuXmLXjF20qMP$=s3a%3Mx$qkWoquRRls$ zm|*|`Lxj*nhk&6cY8Y(0|ITl~|2$o^76M})Cumg%lEo<01PH_ z5c<2t71IrVms!w_AAc|p&0HMW6>9IC1^>*>cN@y}8tP6p`qlB5KC`1zZ@$|(sUk7u zxMj!im6Jz~WgU;@?D>WIB(D1D)cgByB&r|AO||^8vuREr_CWWyU#|R~aQ~6Fbp{tT z`cTzm_l>rO8yVC6?%B`y{Y}fvu6t5o0n}JqTlF z!{PJAbOYpu*#?&=;8u)9bMx}^^B-h!-6Pp`V@9Q+%`)A6wmw-dotf#ugU)HIYgI%o z-h?pfdJ^e~mECj8qFc}LRPQ<^rd!a{>` zSF*e=L0iBfV@snJJ?PXiesDNV2AO{D;pUbWB7dHE6gm6t*zj=XQq64kDytNwWgj~6 zh@KhHc>~1`L8(r^OLGW!Q=j}rqI<5lP>A1m(qFKOh3CnX*1xNcS_;4zzp?ESMA_hPJ;;)MQ*&pL>%N=Zl7quXiOz zC-;>ik+OmTr^GzLdZOUG`{3xPLw!r?9HT=)p)X48w2&j?pzoHQbV)ep#({RiMsI0; zNW;9n8+#-~eC1uD_Qt)9&#`dLZ;vrQ-4kmdF?NRRe*jL}%ck7LC|tlD!<_4m*(Vx@ zYaT@MdXYyYiuW9qJx?Gbl9`_t-#K@vgpWGHHz$PS711a5MPCdZ>D_8@Ql9IH99!XcxgFVK`t9*-n*mx=JG zwQ`J6?5W^e;hh252wCdDz^zUDusOH=0Vb)~)S0#E@fS31yWg7>+h}8D$@KQtTs~=R z(`d*A?PaE;qvMAm3NQi==0HD9V~k_GQimUMFPt10h#a1E)n=ymjE!Mh+AG{B?hYyf zxkNZWbcF@%bS=4cmW+WbW`uQJ6FKrjh!Rnz|6D&~B368KIsm!)TI2|sguu!$losli zYCQ(sro9wV_kCBp^hAP2%u%7^4}{`AXC*%FM8pQbKXlMc`zAISX+%S8roX=W=f-W6 z$>#WY+6)Pu=J5F^k;BVEDN;kd*>~MP)&v(=Owvtr)z?m&-i z&O8`1l}>4A&XUE~8}!7ZR)2BO7@M1)@38f%wD3c62A>?i)IP@P3{kO}afNfO!_Z!M zDRQP-d8jdgJSI$_&1%-ji?25dWiz;Sb#)zjr-FJKmpc~4hWU{iGcTRmwY0R*MIDgu z3CqHLM(ZCqGZ&3~!VzKhnIWTL+laID>7eJBD&V5GG0Z|F&wZRFUo-t8gPJm*(eE6c zRoYAK(ak8LM;S}`fYO++S32+)-(Xq02iGx0oQ=&rA*?M~g^F+Q3kIyD5=*ltL z0vXnKnCA_|dP%J|GgAj!{VSvs36=@;53SWRrk^4=od~o86wQ=X?wEl8wqy4BP^8@_ zxvF8a3Yo+;no6l%&BX>jt5ku|DfzsXJsKgD+SVF3^L%Wx>Mlnc1z%6aTspW63_ng~ z#)hyQwJZ~j@{TC6kCx@ms1mI;RWdb)_>UAKUtK<@VzRBXsy>o7t;abwE&xf);E-Iu zQU@X;_R@OUD_ydlbmCjfK9g)?t;|TX!puCz^*ds1QU12fH+%`2IU+Y!v35)EtOuc)8H6MY>Na)nlZBgoi zPR*_99xqISm6Pk`s_^)Ha1tfQw8#>rDTf?sQ!$Xi?yngxbYK;{7M?YoueVzwhP4M} zr##b3Kyj&K33HdHG*4vpyDl}Q1oJlfiSiD0Sgd5jqL+^-#d^2N8aT~&p~w#s_%Qpd zu$goT8+q}PVuvZ{rq;=n>7`_*Suq!yUA)XJyBJBY71+>26qg>T{C8w$z=BiCBD|a1ZrKN4%hb z^l+c*PZ1yj2|qcC2_ACOk>`>Ho8;!Hu+>ka5zD6-Y+=={sd7j{edy*ge`&e>kk(JO z)>g~m{dFpw`u3fxZIT$x0z40-rDOM~UR5J1(*fD6BG)5{vt2#Et$|xi&dOL84NNZ& zfM^#td&=*R114b$GiV?As)`CvC3WN=p3$!mMK|cMvJbKdlhtIrmuS8h;M{jLdr$PT2%n)pjOA*F@+7n~v{pP^1t#?gcJ*Zj^!y~%l9nASmj6id4ZHwXe64i~# zDT$Z}l83{kh4)rsT_0T2NZ%xmYG-NgPSx|1X_ZZQqC-l9hbTZXSZQ^4RZE8QW(u(;BsofHMUSp}+=|a0^vgV7lkw_-V?X^V$k<=>W zqfc9DXL_V0ZXrp$XkyVccfWKM-P@bi`nou*M_#;x`!21aXk9z;;*9m)lv3vPyV{qS zx86rdRGk*r!RRpx8|3|h8EujxtyS8CNC68eBJsvLn||}DtLD^kH?kI6R=16L-9?VE z(@RWFynw*DHL>z(EbQ)O^7uNaYV1#jp!LROE^W>3RPMJ!oJ`CteGs`m5i7Eckbk({ zFlej%w|6r9ouwI$oF}DWbN>6CIsWoGi^DXj^e|}$?HbbfA%&>TDRG7Dq>#;)tG2e( zD~+O&?+SloB+wYN4pol2>PR3$ot~T2Ef%q0YB#H7GEr2aCWw@!YMOJQJU7YSe-;yp z^MCQWor#fEfZP2#7|mUb=e7%NvCYu;EjGL+R&+3&xvPzFqpQV+pH+jL`7R5K9|adR zAEur>$gH4lG9?l<{SD<&7P}H_O8di&M(ko=O}q7^;0AP9IVbywz|aqi=T94)p*_o|GRoS>Q+vas@IMtn+ zp<_=AQXs5P2EIQX>lv8ow-mzh>=)no7#lYz6qc(Xj<6|=91ck(-?1yS*)ZjEnJX<; zc7Dx`WX4*y0N?KXoS{zc=&jx7wZzX#JXP|3_Dm>9VH%UpGX0M_w{SoeB37=l#ZU=y z3YGO~(}&pLnWWxnT)2)A{exr;mO#`817 zGp1C{ELEh9l#dRl;q|+H7P^kuc9p=7KL{NuzxponHQqh{IdNj2&-o=L_bws$;+4Z& zo>AXUM6;k7F|5C7H3ue`f`%LSY9||&{ZbK&?(Q~ZuGj& z`P)Uo^sc3>wsue8Kd!#+fw%I<9`T&Dyz7f1K;af3T|n~Y{VG$Ng%a2f4o zI8MmylxXY=tOydtW9evHx&>(O>hw)Bu33235yRu9IJ=)pKN0EaMVr z!Wf^d;{?j@XXG}CLTj-D9}u+Gt72K?)+e(kW~Hzx+=+ZiVQ&v@v4H9%35%f&+LV0k zoG5yCW>}1lb47RsrA#I*uRZE4vcxqYtaee`WS^i(oc}C811hVpy(isPMQ)uNe<&>G zcT-S>;#N%_bml~zM%KzA4+3vI?j0%RKhGRe*>~2YpF1Tk zxSYS(ar%4lDcj>}$Mv&Sw1$I1WV|{lxY*q|IDaKEK5KZw+<33@FzY8w=K;Nwrja+F z=^bGzo5uRL9CtnZ!+CM?OAI`v_*U8Lo&Gg_I`#%i{bd)6FA0jvT}UUA#?bxD+p(uq z6pd;wzCb4)D$YC`@y4wnWy(6%-6Om08TPcWwoBC82KH!OHCepG-1yz|C5!nAhW1eZOz~njWu1zYem|jtag>f7*?eWVzea-h`8#no)Mi(&t0#I| z?GJ*zEZA=M-OrJpce3B16R{nxx}B-=_UhBtk7d$e?XMJLN$tCw{qMD%Ud&pKoPCD) zK~Ex)VPSlq1-j^(f#)AUz8{_mo*v^(C*9VnA*t2RmJeBW{*ghA23_z`*Xj{-v<&4I zGd;(rfz?+)#hA#anD|=kG7ZC4A53LN@u{FuHQAe4pWjzLm5_JxJ)^8J#a7$zR`WP! z)|U2%{Si*nFOh>evO_OKph%($DcKukZ|0jZX+ zDQ=REMO$v@Wiz6o28LoK1owG6jA59MXf~(`aW_w!(tb9tQK|8Os163LVNOjf5TEx^Vw!;@vBDHK&`dE zCQP>jR3Vm;pH~87e>BgG-k@(2Pnb4P+}CRY%YU+~VYfo#G4;*QP0X7g8y9mx9sZ0w zPp3;k#r!t1Oo~2@!<w*$bdSf$@z!-)jvsfhy!?{*@;8QXjSnXxEVbq zI(GSU1arBA7&27h*35?bm~(05L}5M`v|Xxi7eGer!0;Aa<4Z*p95$1k#lxnbGhSbd z|0dPG+V}QKeIyqkBNsl`Z?b1v;h!G}Ytoc!K{Es47?^Ip{s?37rBi~`oUM_)bf?C0 z{cS2TzJ31bba3mb5KU(e)Pj9!rDmsX$XOa8<6HpcFq%b>c48h44GlCe{GdzD+CB%p zrnY%cUrZ-2hY2>yY6R1>(=7NvR>)|LseORaH>Kp^Y$KLn!a=pL zz6GE}7d-1!SVO%-J!^p=;wM8pAd(1*y<`my4L!RilW0cLwnWslg!u6wjVt0#wBBanKAu6GOE*v18QmqyBsCtHdW+_^NH1q0Fr91yul2*&#a7$P14IR zA#BXuQBp{3nQ_u!UPw#1)Ryq0*bg5hyg5HxasIFm`{S}WJAIkS4l|b(>`O5yF$@Yb zXF_<{3fnUO<*W^j-slrGZyS#*;rwCZBxkaN+?rrjfyW=^(dg4oQVPUP#_F6EvG#JC zmDn!Qyn3SXj1-q6Th|murH)#HrtR_v%^{3lK9j-miH0x?Ljy8pim9B_+78+?%gs)^ zj`qq|5#ix+L{$N=2EH$ZM6;P@r6zKtI&4QT`ktdl)&~#*KSF!>TI;|xQBJ(%naTCE zL^fHXMndbA6+mv5FQk}`lx1hZ-l`@0w~xpYA(*n|`GLQ~C4b(4kjuTaS-2n!@`ys| zPCj=TnVEU-wI@hM7778Nam3>8jYYAIpqg7QU?iwN4+12Ihm=yL4HRfec#Ek^7>=P| z+j&@wTF=LiiowOZO6uw~QuNO%BQEhgUzB+jq-m>g2IPAU;pj|H zv^%~={zx?hTJi6ro4hE<%AUs(bPs+~D?in0ZBgmZn zB{ek_pb3Qz(48)VI{`ae#=8-V)We8HmN~nt)rRqSe8S9G{Q1SjMR&`b!2nlmpVn&S z^H0?^62%Uc>dGoAL)wJ{Dgo8@Wp}O>+Xu$KfCtXa&6z1c_c{)2mhPGqLiP0a#v?5s zB_t#$o!%m%l6KOYT(c^>L;S%ZBtIc=N?Enw2n=R>QZgMJ+<%6D0M`GHqB8!v)qk81 z`q`|;PQJhPb9;&m0%v}LdUS`SRAt7~VE|924Gj%-FVvJH=EaJq@ZM+We;U$aIa!;& z$s}Vw%^0S8l@p0@*4mnJAR_ch%EY2zyW@q3A zXMV+if972M5kcYM;bz6UM|=aSy}XnQRLs5@dmiw)!#@<4m6Z)$tGeaUS0v?*2L#lY zJA%r95_d$VC4{#9xvRyr@U|WU(rS5IGad&mb3_G=MlYsWyvoUuFuLCOXeAgRu^Tsk z)z`kh1?Ff#`yTNR4%|24UHf2@HGt9TX7wYH!`{^sF_T=@d_NWxT}lBnACv&3TmgWl zyr{K7NdS&%R#sHx)~aKww!jqCL7RC_Eb{Yjs}Q(Ik4J9a*#JGtdIcF&2OUC$Y&a@9 zoHvie7;CQl0Z1Y`iu9TvM!Fvn+-ons7BCq+a4w^!fxY z8A`DS4*EXGD6%d;*D$NKd;^di0=3WOvz_Ry1lxLh^}xDEZkD`?0}aMLDAdMaQ(<@7 za=n86MxzA1U8Ypl8$#&fyT+U2MQn8d+4;-;J?4vZg{8)=;;6`4-WoN2*~*i6zF{$) zifx!7;WjGs`+M^qijWk%*+uil$eMY=DhED_2AsjW3hq)J7OG(A#XeDmU7#+>PA#v6 zNm$CKQ9$KsYADe;5zI~BM;9ei<+2*sD)xdT*tPWQ!Z>rH(TE$I# z0IV8MZwE+dc|Hs_@ckB%V}8|9fXd7=hVxb@=ld6{jcO-wp{_OV$nH~{%}mnX=t?|_#MDF>gi|kJ$_5MY zV?Wi3$^Q@KPQ?K%`MXsGoy_2ed??P=5I7$e*y=L^_QGZ|CWe1iJw>mVY4L~xz42WZUCdb07m#D zB4rC~0G#MvDNNZq*b(Fki<`ET8SI&%R-msei~1oWYJrM~>L_=Qei}&{CNHpx%#qSZ zTrE5Mr*^>Z?*@zN?kB8OR)n+WdM*}TXt<5_(zDMy93XikeL|ZRqTtv8I1I zRdxI_2jnJmVDE_O^1~S5K+XNFi}m&eqOrKPOUTt172_17VO~pqL7G}IKDnvjVQiWiRJ!i+anmku%PHG6}UUOSo-MCQvvFJ!^Zj5t89-%~oBC!lt zE9TOhl^Z(IeE*u)`nF&fJ=t~-z6D`6jte$il-a+8aeIS)H_LNLkwod+Q8fj;GTHx}Z3t#-en~m+OYO{rW1|<%3~>gCMoy(?V(9ajv*# zLH`g_Vaw*me5rU&9|$-pyQ&*?y=!QxlhIW()>Hm#)0$dgydby<1wSZHf-maeR*sYw za4H#X`R~v^gs{D~J7JCHv}1OT5)g&yN3>{K^}~IL;Vv5$68U3pEd(ysI#xWX56Piq zAPc)&A{!h)NenHH-ZuE8b|q`PD{6!sPrYY8RP8-dd%+zXC?Z{|y-{~=9WKVKi9y+8Rjhv}BCsI0dq8EcuZcce57|e5UJ6N{HVg2$P!IyZeCv%eh3OCa31^Bo7FrP_!!>S8ZOh`UdBKE&R}u7E<A!j^yVI}c^AE|un}+|_!Ql(7@ z&bhfC)|hGp;>7EN$9&eAhtl>ISO^OA;<j4QG7nB$H!{+Xu`cD;57VeR4 z$UM;Z-e{;Avt)W81KB#;yyQ~!`_8~%Tjf8S`-C=$njsK~3p>`W5;ZLY9;*yfQY=v{ zrrXd@kK76`@h^EXi!r%1%aI|{?6rIgfuF8EkEh(QGzT1MF+xn1pgr;Pp`xoW*lu0l z2iTXYJ`Uj{^A1*#JX+n7sKui`yVa+wpY%qPYD)zca}86%ZX*_Oqr}aQT{w1?YX4@2 z$O!YM%}CX0t$Ay*e(XvA_iu)TU{_AE4vm0-rM~jVAqs?W2ChU2V>SAV3^d5@MV!i7 zRG5*U=$;Xf^hEff%U48=Oum8r`uk3K#aw{9X*4W2D;PA;{KNZ=PT^38TPG9pOhN+l zsNnPQN9FVZU^4upDmb+pKQ``&2?7{czIC}v9#WfeYoj|W2VJ0fG@3cvO0ljQvj;eW z+U#KjZVT+!tKj0w4*9xVlgPG^Cknxcr>5v5!X&B08imA;%g9W%{UNn9*QfakF(6vu7Hjm&-x1aKFt2 zxB>5+sZ|;Q5TsbBf2hH0G|Z&QP@?8lknc&7De5aR)NW^C?F>kiBVYUJb%pQ5@1!;E z7T8O55E9)W;F$idL*77R;jPX_(_9e!EoLm!JIa$(ZMF0~do<9K>O*N}!!u8uI1%R? z0dxa4P9SI9XxyQD7f6PTA--%VsNWvZU%Rv#=YdMnrgWujY_!fUw8II4iNMbms=0+? z8v*X_?)oCgNIytj;|8*eqAZB}WuPFyLI@AtqBPuMj;?^c$x zTe=}R;%lo;SPY@RS{j8^F@YrI{@gUeYF^)A7z_>e%wiDek|=DVfVY}b8ktvZNG26p zV$F_}>h z@@n85X6-o4#7sJ+TXbx_<&HTE(vEb85SUqki%o@bRQ#nA-7;0S&(<umVCBd{j+K228_d;6hRZn}O*E9tx?K7Vz5BYX9p&zNp$3i(+i zsL`-V#_Bi-Rk$e!_OLQ5ct|R0y-C~{=(M=q!l?D8$;%^}mFq2{K8A%>rShHio5`ve zpeTY0Ct63E$b+?CBoeNJ=<`+u278l!f6tTjJ`7WFxln{3=>Ip0Ts;L;$qgPe8XM)_U*ey`d_MjCZD)h(LW6C1e$Q%QI^|b zhJL%uljroMCChSYoHeVb2%o`XvGRZ{>O|@&qoShXO&Zh}Kl4IBT|=E$yHPKlPQOuk z!lIEhfv>C#Qc@_p=H}yLbqVUC>G;9H!E_*HyU=Bneh^CR|60F6%a;xe49vAr``_SG z|4_Z(Urq9Vyh;8)lUbfm>7e-+?|~#KDf)S3UZ##IW`*gH_4rU1mX}Qca(-weS?U$| z^CmqUmmaNB1Mne$9@bM`Mg?hGbmf;_Wo_}Yy|1;OUUj$;_UOXJk&zJ`kZ}YB2TKp` z{L%>4s`H9=Z&-Wrib_ir08BKf((7UO$*Ho}6A%TsvNFGueblBe_Mrazb$n(i1r>1}IktH(TOn+&keGs7jf%I>^IUv#dhtjwF4aryJ^vVt7E zS3!ShSF3wrI$JO|CRjw{21Cm+rdrFcz4MVerCYjGQgLDCR7p zz$E9{*cIk(r@MfAq5y3p$Q3y+EKD!)iqyevlshKyg@uJNc~zPK?7@|mnl$uy4s#)M zTgO;yDTAByh0I&I)~vq;eY{SQ3_MMTsMKCV5Rq@xdIbju=O2-=G=u;s0D+zvE=bFG z8UptKI3~t)2lsd{qI}4G2IUe|kFh8PFi&s$g?}53S1c$$-*mUZyKkt{198~M$f(3^ zCk!U>-S|P{{Z&8|Y;0QO(4Y=lF^|`1AjM9~mTHGUBFF7%AHBl{nP%Wg-3qMhZtD#3 zu*B?7E-+YQ@|p32Hv6lLSO^7(1L(bU?f5@j(ET3^@Na+`rSa%1f;K5~BA)U@3(dUzyD zWt0BHJb2cY`(R0)Bw-?ASXld;Dh8`I@*EA=3@A2^Z37=FuN)It+CqeKRsL zAniFKmk6dMLc_PpPbBN-DFKA^u{QvH-lnI&-urKoQF;C~b|pa*DKb_^U5lLWeqlQh z<#RaTz4KpFnzC#I%?$(x_k0I;l~JCofVc_|n64LCnUFy05Ty66L|fXzZSkIiGepCi zd~A}{F$u~Sy2I@2eb6diHtwFDMYrj4B9VZlI1#IS z5Mf{t&n}l{t*++UJe3}w@udz8sb{1OysoIK`kH7#nbxpc*47&QC%F43_6EGbLYH1zWojV?O#ZO{EB#6mF$;^XG|3 zE`<0RYq5G(e-f$xE-SG623@%{%qT-GAir`;k0F>N`Bhr&eKZq2W{AZ^hu4C!520x7 z*+Fg|z^n4}vw{tb;?weCYHZ=og9PQm?xB*kikXrWHC{d!yCjZ0O79D0bh zE6Iptu%_T`z3hb22M}=;MaK$tNo(_SRP!%O*F@}-JK=)OFL?GhZ{nbSLh+1%73TaryQJHYaU$<8>Q-n`lLarh-tZib8xeX zvZ)q4cdKFxd@|}5e(j-9l7OnAB5p4qbmO_Hd<17 z@$Fr^f<8qQwPJn@by_Z(7mFxBrvgZ`G%qJXUH|u=w9B%;_zm*aT<{T#HPbFx| zE&8t4Ax0|bQ#NI0xa6a!Ld}O*KbIqLgwqfPZR8WyVQXmhO?E0ElN+DO1Nc9%T?%^` z33qBH=6WH+H(iQ*>b3UYkn`b@?|jdeL2~q=F_|JM$0Vq$$esicw}P%w`6D5*6nsQ8 zRWetfxfXe2h^}m6nAquhiw`7Jil-N0`QLomqEHC;UIKd9p0|#%6;lVWM)w{aJ+-++^<1gd7|VsQ?#r2Ca$F9p5Dll{4WhU&|J7) zrEW8HfJksr&OocS<*kyF^nfSHUTCw?=l=cXHO3b7uNqmifl6KC!~sMe-h- zM4s^)cze5NaqHv($c=oti&aue;aiwJ7 zVt#I-ij})S#c&u@A>9zPFgaf=9L;-amFHX-6S!2E{al;T?L`SVN-|Ukn^`D8I(5Y- zNG&nXGXXSoVa98Fz6!L5zXQ?w3o$^M+&S@i*jXqs%wqKI>}Caf?I}G$FQQvRfuPD8 z$F~+nKTVH6iXE(5LOi)Rn{<22-gBU0xOvaRLuScU&LI_K=azcu#1NnB0zNag`@$IA zkXRq#MJm1IF{`EGu5jfMu!t)Y0KrvQLI>NZ$R9~}u+lF-F=rNqt9A0h6uDtX{^_zt z-3NYs$gR`2&}t3x4N;gKbX&c}|EB8zSo}Ce=s)-hOf|9wFVE7c}3O2WD}6KTvlW+CvZ= zAQ@s$9+OF%IxZXn??Y?%uU%Jn#OANIXw729L(EYxa7E@;RuyH$2rnBV*x;%|FKZ!- z+MPltT&u?ic=?Gw?XHj<8lX}&bc?s10p$kR+7Ia_PlngntS8pjY-Ut*N2C&QeL>CWIiOPru)x;6jj2h2w8qJ%o=z0+5@x2$5la#QgH8 zo+Dw_9Sf81u3&h*m~Ixk#K0rGgC0?(of;iTb)sAEam{s=dlPjy#A~+w>Cm2=r=d>| zfW)A(`&mvIX@TC%D;CzekWxL#7KZypF5}BR;Lm%9h}%EwM{JS);)^E^*oUyaaO{;> zJ&P2*el0)UnX~aOo~z9E_luZ5?>F?I4xVWD6j9KtySU>GlwZ!vI@DiYtXn4FhR4$Q ztIXvm1=3cdW#_y;DPTjiQMYXfQBE1k4ap7-xoSL8Ihrz37)~ih4Tatf$+fHmNk^9b>JR(M7OZee_0x+bZrmU=N9nh!?J5lg z6Z|O{3UPCquo_>YOlJ#+U=7s8OnmsGBXm`EX(^pmE9g=Fm=6TlT{hdYU$5l=aC0ZN ztn9)m)aD8uf!Q#xosGcglMIBf2eB$KE9(Us{fDc{eAyxJY_pn72R13p>%v|r#hC6qYA~1fWpo-oE$NF>7PUN>8zkWYc~)MP3D&cPIK!E zw%swTLRF7EhIQt$rXYFs7Pr@J=0iF}+?(v%S1B&M_YWYef+;ST(+m_El)Qi%&yAF& zilu5td0u!?tG3E!<-(&^?CHxIi2(Cw=^1-90jdbQe|r%a^|kl*U>wUPqMt+$ z5sxskiQ?`9$%-XBni4IbtLEwXtovnXM}^mZ{(TPx%0C{vLT#XUkFJR1FalY~?TuvMmV}nBhh(x9=rff4h9Ls7q#0W$ z%fjLva3KmWE&B8bgP)e0P|o*u>{){bJ=5%it%t&9vx<-ZD$VNEIt7TJkyo)|8K6O> zpm_bLya2M6DyVAnxF`l^QYC6h1?RTn?bc`9VK%j&e1mdgHn*0(4_E(6)$0~frMQmG zcJp$Lu%XuY=UmYQQh>nCl~(MRNk>p_P=4k|eIXiQH!DwGHvv2}l=XAt6$qEJ0NY8& z!g+3U_N*&5K>-Bb*e{98TF|%zP*`SxKA>oqqdDY5?qlgir6$lIqn~XVN0uqD_p;&>7 zYL+3{2_xG2pHwG*2SCic#ouX;gI4gdug`K}03gQyDg5Ctnc#mbf#koJj%caF&;)6G z*Mv*}mqCp*6JD-9N_L-j?MRg?yxo>OM>#)@QvcWB-@_VR?kfI}5r4JMG|=w0Jkkc= zFSwc$F|SF&w}e^ zRQcQ+Yt{nUk_@0Xb#-;K;*$^mTb;;X3R{q7|Ba}e+)e|tD37d4?9%n~x(D#*-`dRq z^%;eqsm^IZ&V*fQbWr zm8*braJT#_FzpYeKs*x9iNyNxft+Y`^GgUW2(kR;2E3sl_etWky>z06Dd} zOe?6j|I+{ee>3^;FW7j#8BPf=w_7%?_Fu%%ParY0_y1%X4DA@+ljnsC-5|I=mHcWM zU!Za8eFpjF7~Rvv2Qy!#(O3Yw07-P-U5TFV?$<6B5LBKX{!}#z$P)(1Z=&Hk5W72w zDb8k>&;6@-I2H&yFghQ`Q5L7>oG+&HG|Q*Dcs|0q=Fom-tsRl^Y7tx6$?7H z1EEe30G}&@7l#gC1G+CiIY-A1AcKT?zVffm(uxRf;zG@!^Y^Y+=GqDF8>9FXL?Km0i!hDwl*B> zV$)-eQMY2{j=_g>t~CAvscZ~S6x%B2Ce{a1D>^n~9$-cS1+{@2&ceAueXBpBi7wP4}dQ7k7r5c<% zhe!Ooxu9nF+&?t{n~CbEzljMzyoCKSW{lBr4IXlO%qxFh&FM?1m|N*>*3O7}`g{fc zD?US6Rkg#&;uuND7)%@i>z7v>cG#empQ}BN_`M@BO0RaNR#dX~c;hku7gGOVdhH+d z+Oxk__3x8G{k^EErU%FXaaC0zkZMRtN$GG9;tMh(OwEI|PLy4ena`lfp<%UhikN|o zWUU(FcT!bwu0>+0al~>%$Od@r7`%&=esELxO|gsGr8XmpG`1j3&NkKE6y(6t-1^^- zr7tZlDR~W50Pgh7U5lQgk`iN5!LKg8h)b*!J{vDHyo}Y}+6Rn0MYBSTgMhQ(YHGru zdAm@cK5_Y9ri56#CazZkg4p^C0F>_n{1Q=4`T<@z`r@oX$ zGp}-h|3Kv$H2nrdv=coD6V?ymDcgmAX}ysTtmi-f=Doj6@c;29zJlJcSSdd|Itmmg zCtC2=9stA>Ylh4HQXzriN>+F4Iq|RNoCJiQDzRV=v*PRUFKD4)Nsqn?EdJ3A<+C&E bfi zBA_532@pCOgb-RNfk1%d-Cw*j_kCyXd*{x~o%zk@_ufBHL%wzIwbx$jd7ia=a#LT6 zb1(m14h{}Z?dw;KI5@Vy=iu0RWX~?}o3fnJ67ZL@-!%(AV=pJaz}r5K9D29??z(&V zxx?*`2RQop!o56YrDV=YDM=i6@$HI@Daxo&cioNOy+yeWbcXI zbECYPw}~|JR>Ww>wLHt4^9UY^K10J_qn|cid}A5HRbWkFXY4CypS;u9@nvOcO|?_C z+;O7g<`OQ_d57;Dh@f@YWo6u0XGS zT;TFK@S*?U{cRi^pCS+L=is=g@bDJ+RoRJu^OQesFHb1qFHB8MMNi*gnNOERHL})e zFjZEI25aF^aM+na&$`)Y-0CnboS}V9JHjAEW8*v5-sIBKjqs#_jDf7I?HA=A-nxgH zCn8xDeUmpxwH7sZZ)b$g)q0bkh&KBo*N0M7gMDB@g&$9)*!;FzDCm|umYOdEC*<~p zug{M4IV8lTd*LomMH`ab&@Nqh3gCkquQHOY7-^5a#C27B!Vqk)E7C+8;W zM-K-KO6KclTG?1xTaW!nG>F#{3O?CfUg4SBHd{R^XMWq7S}zvv3Ql}6G>(0Xr02&kx-hTxLNVzD_m-FV1BFk;2O-X1m=Ak{=D#Sf9xiAX$i-ztgQS# z6c8B53)^es-(zZkrKvH7xY^4vw&{*`j^5A0FB}Y_eH2c5mq5VI#t97K@HzY)ZzBWcsvpEV;ikJfJkxV_-7?L2wy@qcV?R z!p6nNj&=rVdXG*_Opt@~hHZ?$SzD(mov9R^-J6*^z35#!oo3^5oVz{umEZjP-7QY( z9!K=_^qTycn*<$c!MnEM8)L$}36))%0z7pU`EUKF3Q(F}US9i?D^$EH3BHzCKls8@ zlsf&j=w?JCr=oBuBZAu@Px>4|8Bekc<@v6r#-=}NnyaJP)jJhQBz^uf|bVfaM#pOEUf@J8RM|GTH166NYEk9`ab1Pxwmx2yzTOQpm znPt}|(^H@kM%96xjUP4exFJ7$Cs8k&^^6I#>g=t;F^YNy(^dwqS8HRgh2Z;r+9b!~ zyu!x#r!Nn1lkx0zd`#y9x>cTfqFs?+@y5l|RX0K%W~xWSo7|Gr4JN+4D>hdK zd8^k~TQo)(4DfHvsx?+lUOovjTq6VmF*@q>CDjnS*eXt{{UVZ}T2`ZnMgNE>kkk_N zHyL&4?&(QV7`oetYe9e|rt!)VrQ_h>0MCh2IE!wJNLTW3%Wi;Sk-hLzT zwBA|IL^P3B1S5F#qT5>3S4u_I!nL%tsQ#Ex+tIPS0L%!>@50R6#v7TLnbRe0Peil& z0@<7jeXnRZ>SDfPLxG$LZYh&SSL-kaqlF!os+%FuJXkCSo*_v~!EZSB%--34!o32)wD2HK56^K$tX>%%HBhGJlD&xedgA_pftV#6V>l27v+h)$E|zDJhK6i5 zHf?Dk=J*6e^HFE|a70)3wLB{|`qwLYz`^*&v25n{Q7`1T3LyRj>^*Z%kD(sj)k&lC;n>4E$+c zZ*MPdJl8RwQP<gGpvDX|RdrOOVs0gDtOyR=dfFURi z2jewZtM(T?JNY&<=6a_C$ESJ=Yy};PimaVRXsK9PR+G8Gd&IrCoojIAa?n+M| z-N>iu{Nb0GjF@=oj4qE@sjlxS5xS28fkg;dO?J%A2o{mSrrVw8o1OvzNGDFZBw*N| zL>NzjjWC*wLWGtmozlHc^bNCWoHS=EVbfSxJfbTQga(IUA5Ky?ONUYj%2`5z9^)<7 zJ0q6b6{YV7)p}6(J+9C<$r-99rtAa`(13Z_q0q8$YZnihwjQ-I-J?${DA3qolJHW= zDXve$r7&D@L4)ZIq0kva>{MY<^m;z!g*vPiF%LqYImM63_>0%4J2x4lunp;k>O6`G zi!h}|3pTiRVih}$p0<`IF5$jM3NV?Af`LPZ%HJPyGjYdinNvkYg%{4P{QCZBLB)5Q z5sozw2G({A*3Wu)iL==C6g97RZF&L%bu?V4H-4#8BjV=eyRL4*6*S0eDhpSuW-C5@ zi^V-#s8}%7PTyT-_);^WHRrrA^J`RZ@F)XV214XW;3$HjEF6Ilpn}j!jD`GV;d<1! zMYl?H9H}P>Uvd3)?h*J{-B6W)SG{*;wOB$u;)jT^KjCk6HO<-biECS3$QX?fs3i~Eo!Ib~L zvCEWKcSEm#<;D%0`??ER#tD1kA>~} zzJ%pR-*|EJkERhANm|oW&!tv#T-oR#$5^_<{S(I2)i+R3+SCbO%&GME^E=!EKSf=| z+=xG=*RnSy;q|foc-Ar|=5y3@)@h>~6c@eD0;}DEeqUcz1^ZKj>g5{my24y3*jxt^ zp%z|;#YQ`XlN_f@s^?U2eje~h@((c8G0)7@g;opn-y5?c)>e8#I()OZB_$;{SH>ei zAmL5aQ!FW$CJpp1ie(XGFjNz5Hyv*KJj#V-SoC?NicUi1FI(0PsqD{*=?BIv1z+fRozhHb-vbrcmjAsOPV)(vjK$) ze;zNbPs^NE&JVUFc=zDfhiTEI>@W#7Hg~K#(Qm5EDqdw*v(yQ`^JXIluM5!J3|14J zD)mrLwBJi+wmNy3+XXwy$aHk5&}?+0E16f%!&X1=Mh^}FU;cp5v9?5Ec&uXF9!O{l z)WgpwEw3@~Y(deCyO(7?8>G=i+b69(eVOaGs)B824Nz^`33XEN^Xav|6twqxW!^f* zEj#O^o>}4xyu>HH3q$!!To^TNs|$Q6?dOD#Y7fNPGcxV!e)M3%{SO%#f7RpW$&>an zvbZ6z;8@VvlRc!^n#jMbxXohkwxk=}6{n~%X@}jzfrIHiHb6S1=gu$z;Sd|F_9G;* zTPR~A&MKc-7(JVJIW%4Fg2jbzjA|jSA8Y1I*t{pX2UKR>lIthNea%RPc%n<*ubXBO zu#m#R+|K9zmZPB|A;WDn>Sm9R$GCJh;p881qqlOGNLZNrtMyLEYUG8P_F5f zTZ&qHRQ7~;r8uR{8Vp?2l^w}{d7HRLFD+AkC&eed?YrNZj&UpadRsNjI_+AO#`xDn z&F5NHwV80U{$(+C1fsL(`9Yhk+kwl6KA0pdPQur8kni2yLiCI$ua^(xlO!KK?Zyy3 zdGr5Tx7=$zMkgB3>JS&saXG?nz-&s~?#vbBKC7lbGB51EK*=!P@ws9Sbu^a9CScY1 z2)ez~#57=tqsp^_ zTj{Ah$;NoOY~5PXoc-MKcr}%Vpcag9x3OuMWVgEYxw`eschg>C@oiJE-&A|BT(Jh1 z&)t#mkW13~YBm}thmZ|l8&ARbGP=akHQ8RdigBNEpl zcMWVM#u->K557Fxcf`3;u^JTd{fj{x^{8nV_>zU0P!bL#f6NBGfq99914&-67{41S zP}1AfSwsuZ(3B1^UoVH_N3kR@FHmRqV{^;w6NBd%IZxi9Z1gZ~#dWVqpn>h@z z%r6r4KV}3 z%sqVvYG}07FBPvxk&B2z!Bcjd7Ne(hl8cuV#N3O+XXY(dK*C-lZ6Re>ACy_GEmEPK zwt9LtS%Z)8>Gpu2e9x-0#=7OGe?;A7%t)`?chj|00&5_x*zIyI%1FKYxS*`fv?q2P zM+b>P=_qRzrLhT*8++>Zte3t0fYNag%a_@ctLUk281eB#eee5XvXlMg<35OrU4?*x zHeqkho)$25HAjY}TLIBoLRhxtsH(d8+!I(s2zzn(52HXZx3JWZ9uNjm3n$ zZ1sdW76Ckk*mfe0qq?$kIc^U|T{yKy!3 z-UFygwD5yUbiOp5@YsJ5ifpsB8+I)ovjgs$mjr((uNP8PbvPV~;#OvF-yCVX^`?a8 zuY<7+KH1Q#-5+@)f<}cm=zu?lk4}c#D<4qS<4?&5Tl~BkPPIs$zIy-O!TqYGw=)%9 z5TyL5O3f$Ri)+#S$!+DX`Egwp3q~X35+17g#~JZ~^*M#w!M&n!N0JTJpDSz-7d*yo z4-g|gUA01HDn;k5>e|J=p`O^c3aA&%LRiw+(<=uoz0%tR%Hf(belyN_gdb9Ha+=TA z8!hq4id97a5YMf&XB5K0nn7}MNJP@Z0%g0mEk*8vVgD3>`3~2crQqIPM}^%m>eGAw zYRG_45KXw_M)i{t+ep6BcuS1-Tc)NLafeP3gT}gid5w=zAplqIXc*EUpi{+GbBX#T z;dPaHwoz;-7l^ZcYJ6P}<2I1~yLP89NSofQt>qm>x1k!9S*v3)zZSQiIHTqif)>kF z>k&ito^77b+ebU9j&tF@SdMXCJ1_X`^ zjy%)cX{>R^p!J33ZlcUU3y5?RH51!9PsC;9k0p>!?JPB6pVT`Z&~=S>SyH7PjbtHR zk#v%Iqa4%Fv)RnX!hdy;*N50|X6SM;m7wUEju8pG3 zsP!K~mzS5*0osD61^D~>=W!|yy)}WsU>o6IFYQiJUwoEuSc3+@1_LbnnU;{{hP18u zB0eiCiy#0{2V9F@LW)rJ>({SWH$g7C8A3y{@XV#bTMsyyb1Kdq83gEwHtJ`q2GyxJ zE{%=5@I*ZYY8~y%OWBygyf#yF+n&HWdKi%nL4a!Tqk@8hc>wvNYtKrPByFLfA{&>~ zoi_tULLWbi(F_jWUONP`Zv$)ufSagqa^k1z^a_^&&czHzg5;G7;YFNPXf&DtFsmxf zb+d>#W};nr&${1~@c5{JfPfAq!&e{A$W#km;qP>t>MlUw4N}j&9`!G<;!bC*mz=c19Ou!4*ZcX9AtQ~u}#tC zKLFk*Kjj0Zy6YSF7Cijs1h((5>j^~Y=cs8N^Zuypl@dVGtNSIt>x&%GgQ@1hmA9mw zZ=9BEs}W_tLhR13h;hT_dNGKTcLz4lW!^Xyr;u;>a#=ga1iq_B&d_k6QZ{aHo-sab z&KD_uTK0y>?oDlxbgELD+}Tu3f1*@C;E=bLw)-S{)Ou# zDg#Q8gANpAja^dKrw{pdN@jTl2dSiTJl5ri>T4$qSNBnC6HRxM(_rGk@xWASwB|q;`g0!#W4W)74luP zB}p_=8-9-~&Kqtb?(yRvH<;F}qzs`QZ6$@PGd1rM=ib8S?$hm_Xzb5#J0OsWilOVQ};it=DPB?Jzoz?u;bWE2L zpmJ`_=RHY;+=ICv5}@UEsju8}-EnIZJfjo<%g2?TK~Y-YO>%QIwtTeU7`vI~KCx3e z=+ZUDQL45p;Y({0f03`s1l{H+AL5r=t3PR=Mwx$u4RTdBxNB`~4S+{ZJGb*+ySfZB zc3m3o)`bZmijhGP=WyUP>#sBN^ z*J6(J_(jxR`7fZS{sWBGKTi4ga)5t*#JZDins2TaAw1AOdiV`(9Q4@RA;r_DmFul5~UAfScIWXxYXj=V!#;v5J#B>y2n!MZK z61t$EAo-vcc?SMT1#P8USbB0fa+d{5ky#&Qmj1=R5^1cDjv^@GL)6md5Y(Y9s;fAGF4_E@5)01>6(F0?83n0-l_9yrv zjrYEDW-$e#)i1W-f6Ojm#Myav92{@?xgEez{k<93zdrhZFP`@wclj%;;{RsiBOsJ+ z0p{s26tNG^IP%} z5#J<0BO6!movmN$xc1b|(-TccTUz77f(@&o;B)JOO)9uf^DZynJqdRHDA}XWreW^N;p%aHzg~QVP@re`Nsw zVWoc;F~)!2BGt4lFga@)O4+&Z$W=g$eXDZG5Kpn*+Tk;&ok7&^=kFiqXWaZEKR-V^ zU@9N-I);3cfiwYOR?hpS~?DMxOVgCCD|KBOm`G5Ms-iU+n!;3&V zMMVNbN8)_PeTvedK)`o%W_C6XWTB9q5W2ffy7h;<^2+8U8Uq+>t>e2g?^ad3BzOZI=9P1UXzZ_q3$z8<|IW3eKJkb4lX<*|LEJ{d)8J~uC5n7 z2L=YBIaL2-zB0gkQ`HezoY@IP2^N>EUQ>dcQaDy$ZOXto4PF_Je1J1U47*dzIw+UM zk4Rb_uDq-J9~X1|KW!=hA^F>X--!JMkE=62_h$p{0tQT%MUF2=dPwa7p5tLcCM43IJula5dbZ46far96{d%yyy}i}!Qain}6&&3o zvGW@cOq`v|!~nALT3}!`*3?ju=8?1zd{fZhxEuuK>Re|jORs!DB5e4tg6Vt^xSX-m z;FY#JwlvW$00lM>-~G#5tf^f!h!)n5mE7z6I`t+!jguFd(*rJA3-Z8glE=9JUHZd+ z#{|TQ41*|SasKNSt|q|SaPQ-liXCrD0m2q7kk~f;_?(!Nmv;)1>Ht0A+I2;4J__Xf zBR>MY19^Rv9c}~Inro{KKv0*psBZ09>v$*Wrc-+uMe&yqyn$9k&KHOFpYX6`l-XZTcQ1EClh3U$WE&i+1ea3pAVeE-id}z|{<)jrR0v_rcGJ`ahMa1#oa2>%)$F;w9moB;r@rtwz~4n#w0x2%;4RB+|JG$`Eao_Ve# z^<*qTLwyb~;%{Oe^Lk)!*9X^0I4~h$E;`v~=zw*TDVN5P(Ynp7 znvIbtGC=TRfXHQa${f${6~2_7<9V-bhedk->unrw1|U@*r&rJwK-*%(ewo)No^BcM zOc$i6xsigt=L;NA;=o8g8fs9?Iy9P5?z<5%s^);<%!jmvWJ#}J9htKknwi@QA?@kS zBmmti*#U(OZ=Hr>cTbZF5Q(n#EUi3NQ%F^K!!ztX@M9H^2uNTDHwVa+;?%JT%{Z?C!-4Aaac1JT2Fp*EuYyUa)PdNGPoGRkR z25qFSMIlkNxukl8AClgv0y$0NVjm>20Wa_jNSYcKfad9xldemwusW)8PktkO8q6M< zGIJ!|z&#p-P#=MUN~t)3E7k)tpX~wTN#@N3s4T0EFlLrYvsB};du>%ORmE^iUz`uu zX`>vTO9SQ6A^7fdE70@p{{##)y8k7lHmzEA@_v^u5a%*Z*kL33WuS0%t!6AfCQ2y8 zb%QSrySPe42o9+cK7{GZ(0K*-Y@Z$^5LOX)+;W!Jq+l% z4r>V4&Q2J@%oWe@q$`DDQbRr+lDS=HDK9J39=J);E>001^w;Cook+2Nz zARvUR_ndBzEVfEqQ9G`{nySu52m3TJ5KTv99dh!UK`xX4iD$16)Nh4Be%=D3u}L*( zT4trY9~4l~9s`r2V7yQr&STCRC+zHiNE{&d+RSYISm}s=arr0I5=)~ zV<*zwnEKjshx5Ov6xYmo;?;yyC>4_%B#lC=j+vvzBp2y-SZG>Q$LGF&o-=s(<$Z6R zJ6vM@eV{{AXT)z^MB`czr*@db%@+zKHqRn`1K)OV)=Q}|Mg-fWHUf0RCPrk&d!m{O zHEax*%lMHl&gSN%<5dUw44|20%EJj`b-aNqD+hv;l+i3V?*jNygNpLn+Tf>FJ7X0C zqM(N$=BtHU<+fl$vQZ}d6*uHP%MrSL1(mC{@J?sV&0PWqT7876Ggdj^F}T5mO_Bhg zz-x@sk27s(fE?xnqk<>vGr|H9v({Y|ZJtfE_VorO1@&c>uA24$c}d3mcyvvusq?}T zx^7PSjK|JqVaSKF6pu_#J9MfsB*IyYoHp0d)ghO$zHe%xS~Os^jl$v1LxGcjl>VrY zzPcHERAvkzTmtVWNe0&=AlaTSQoUiK>X)%Z;8fm)v}dXW#yz!+(e4br?R_IRZ&XcI z8N)VzVTOL3glAb6Z_G+Nd^jN4RkKMfvJ0q1jIaDs9P{(cm$IlDR@)VwY?CqX_nhhO zW;TJtK1BWqS~!`p$S(5kxuVniPi9n10?N!q`>OFrI_4iYw32&GN=8m4942u)&DJ!J z3LsY-7|)`LtmaDE%$kuxVRAr;Z$DBWEMIwuZ**&6xdCZ!Y~uly;?=dfz#(JfN;BJK z)R|@a#|lMZVZFN9qyFuQIcujdk0-X<@eebGcx z=sa$HrPnpAH8rm02Z9Nm?`-0->INM8*POK)o54cs3|CWF@50zOtEltkq0ttLm7SU< z-s)r?HMNDtm35Mf=*0-=k+O`GJ60?^L(KeElF9oNrO*ZTZ4o|(s*5kVv|8jN63~O5 z@ktT&zYB%4y}SGy{Y@ol^mJORRvk9x(6{Yes&DhFwvEBYt}B8dS z%_4`6eUo%*#-+R8TrN{)1vq*a^epQnH<7$><*RE;NOqWncXRdG`Ppa2cGl0PYxLLSYg!XMTLMdF9lCz! z^BM|s4qNK2i$Qj(E`90qBkDypaK7t13GEmQo?T*lL@7WzzhN*~(x~uz#xD$WUdi}Q ziTZ}s&3&-d%cbIS?=C_UVlo4jAV5ls)wRLuQEsmWr`xK1UGlj}HUm9^H2mBIuD<}{ zvRZbl&;M}(p?hoN5LiraS{GV`g(=90a5)8q=5dDl;dS-`vMKms)CZ(ckeuB-Dx&Gy zEQ1Fhv0Ma;OF+yxTv@buw|W|Sx=N9FrQcaA)%a^5DN^n3H;vtcpG|vII$t{ez^G?1 z857Ic3^lBuqwS0STocfDI|^t_SqCBIdxPsmQ)u>G&jZRaRU;xCVzQfpLm~n?A2hw0 zt;gH(vO^i$n+jsjb{U^-!q7s`99Un!-?z>n(c@US%~`JmAaapf8T8V?@ZGFzo}z&w zo&-q39L6Lfwu@XWvVH>MtEEXFC1mOIlnSMqyK)Tq<*{uuBwf0{t1BP-WVmF^Vblw%G4-bEp37i`u zm#wUYtEi~Nut}u!{9SzN(6^0Jg45~oBD?rJaRGoc`A^mFZ2OnEoT2&Iq#HdW!9a;; zXl|}pBgs!`9XC^y2LL*S**`caU~6kDGWQn{>2cu?XS{{JZV@>tVO^|MPXXhWx_~kAbEMd(5(Ci3#k6p1pM1ISN}NWFG0z1Sfc`y!j%pnHGd4&@q3Rh zC3C6jEhAxC{GUcrQC+PF4$;JlIovQdHh#wW<3a0RH&|sA6{vFOPjKW}g`n<|MQjg= z1ZA;|%B?Kc88$sP7Y{P#+o4|;7K4wQo1XT-^!!DmjV5&-{xQ*>u1L^3Y7E7&?(=V~ z$NkfRt#)?pUEN<24*~WI7pw%4yOXT{fl~hc$`${|EgkTWF={7Y0(1(z({nEJJq>%bvV2Dck8uB#&mW~t*X93V=Dg@ zF{I@e5YS9+ib?Xg=hU9Y1E?XU)gyW(@XUdMfnWugd(*-Ste5?lE?BLtr=^(!@Y7vs z-RjDfD{_j8T!RkryL-D@^Hfd$C9-pkBTLb1?45)X1!H=m&IdPD6ujd$)%L za{%J?DQ2H2V6m0>#2k@~wo*F%*UP6h^~$d_x~K>C0ZhhT^`&ZXnXkhy92esr-fGS} zd=C@{@sE85_?SH?#}XL^NGBh0<^U+qzNiqpLw3%VO?Gbj5gExBV@9Wc^fcda$V~^U zVJ!0CmztDc=6^Jo9tGk1z4wlRF8_0ulFKpbu8W@h)&cQPLRDq6y9ETnT_Y#ov`ct3NH?ToX`LlE`)SO+i6^DWVRZHKRWA$)RH;X%4!^#gs;e^f@OO@s z8+kZ<9#Ry)`YP4u(|5bwV32kJO8EiyvvKrW>kS)o2Aj?L*qO)`u6Gsj;OFTF_cy+} zO>EEcBKixxHkO1pSrMR^ zC}whs{!d?}ss>D?AsmDYH$9oUK`{ULQ4PtH zH`3C{UB4LJmR(_AO9B%X`3HEOH}t-o(hV&NFE4%U1Pfox$Cuo5Ldqp@9UJV)`+HU~ns ztpz;jT17fLw)AQ%V*reW!&UufFGlzX={vTQy1HI_@)(u>qz@1!OzcnIZT&d@_c2nj zWqG$Q@C}&$j*<+Ji-6b)#BVwOl4S$?C%w|J7230gM}cB!i>K&q5p07ZvM~57C*YwF z9!xqF_>mvJ6_6kKFHR$VG8FgT)8`wxS|Eyo1dBjO*ka{C5i<{{&F>l-I%e(F;Qu94 zMKNH=yT!%D<>u;s2nl$X+6xHr6uS`k1}GJl`atuj9$tCDczNFO#!M|Gs}!_8XqIU? z#y#kT-+Wt<^h^}w@*ECEfT@HS0`HCsl#fuEcQgC6XPJlYNJxrH2KG|7yuvm5il!H( z)xv+&qcnRn(gLVLHb6CsloV*oKp~GyM+9fw{qUqC8El8uPVZRH)M&`$63t!FLaG7tJ-uCy#m~RKu1#$)y6UV=1-) zmfro2DYj%FEsX<}3jK|%v3&Z_7JSYr_t_xOd7g!!Ycd0_q9&@)&)_V5FC-n!jQn-G zm8LD?ikw%!k&Tl&qo;HVH#qi(I9`~GwcO$jFtCqM(*me&eGKZgpqnNKe~diT*go?W zjNTg(k!ZYw=(pc%?>laVJ_)N4R!JXKjnbm&Yp3vyqh2aeVNj|{{l}HkK~%VafZBRe z)8ryltw&D%E?0YG48rv|VqF1^wg4)z)Nvc*(pm*3(Jw=Vf2#|pFQ#^3M_;MykGxk)Kf+0|x0ReWJ#GAE%q2B`3BKuu@l%mSa?))<}5GaRk1AD3JxM{Y_?iR#9p zkn`>KB+B=%O8$*5!IWAV4@yJKlN!QIS3sPQ@YX#|m;lcaNr zCa}x#WvG|hg%MV9oCD=0w_BD>$QY$VJY0P&R(4mO$9|2UXSII!$*JAV^V;{Es&CPR zl20e+7*#;Nr@mM(YtIB!pTG;GDi6LYN2YJt!CtD|Rj40EG*WSxHh&^CBjrmPKc#^Q zmvOvo+r;n~AyZ{MP?W^`f^F3G4+eBmpo52zV@~d+Q|XP7aQMp`KIL`C-%Jie5hP_ zw1HWrr937cfjGwsphOoY%Q_DBl9>Q|a+Y~!ZfiR((i(g0=U|@*YE`H^@Od#Mfd26U zN%fe9tMzYc@)_S<2+5L$B&8EV!9SkWfa(DIC_&B6{k^L4TTPNMgKAqO*1N|bdzO;YMi6sP1jl;8mhRdFw=DMg3_MgCKV|)~-dPtDVmYpWwyJbJ zX#MH#mdfEAIV2>6#g<;?yt2p_sNT!t+#aEnRo(t0Xl#aaj5j9Ptf3eyI$&{NE>>{M zQpnKuVwp^IZ_AtOIDshEkhU|laqSxHF|;vAlatHo-7mQHY9dlQzk$viJQ{)0h@h{@ zDsGMWeIDg452nhI3KtYyg)$5$=EeaAPehsHb>v01y69*=8V-!u*mx&C;Klc1T<~q< z3niMob<0e9ddw(n;Jl<%SbJ}2W2BzEbZ8SF?UblGRlg23avG4{Ie&JBl*<>(SJm)A=kQCcI}4Fj-Odp#aa8xcdRpV7Fe)hZu3g% zgmj^)$1o`?eyvS3+LOG`^tPKC_8w%B-(z^j4uBWRa?KAjF&3(d%9&<0`B z?^;ws?lv}%49BfgP|dA%ym5Ys(Huh!GtW}hLIVyFj@JEj(80v?k;9+}{N@X69l4iQ zFCAhgDNtIaI-e3V_AG*Ro?k#KxCv<7b0I%56NAc9G( zng6hF3^aFgRxJyR@ks~f9JkzG}`m7wOmJ1TgjGSRK3^0 zUto^a`;*&%Kmbbs3L7fQ8&`jybc0*BuUI*T#RGN&zz+XAsqz2wJcNY@(nz2TkJ%!v ze1Jfyr8P-B2lUW_Cbt>TJuHx^o2KubBJjjUhh z+E(DPITZP(eG>|r2*hy6-0;EOs&1D@y{d=$xhbp)S*e<0aZmi_3RI**&hYJc0ypngWetq zK*Mt;AT$KA!Ll|UBZ?}tD5R5E0Yj>#Qt?V}#8@FC6+uMt(}(|9pN6qRWP*DE^t*ff z0*EmDFQ0pNuALbM$XK2_F_bORK-txPV=kkhG_>uTfxWzgt8K>0XyVv&jBdQ7VuASS zu8@(XEMCQ7KjJHmBOLE6M0RZ7Ukac+C7^5?BgaU`{~C8_biMn>>esHsZ1`ozIyF$? zW`$_7G|q8MXF_5d-iFC*P4XNUJHT9?Sb5EbID=zv(s*0gcB}oL7hvm~Z0(ZB0ir@t z0xUy4PkImj$0=#=*mE3DA@kX@2{O!hX&~il>Kxs;?t83;F~9R$nVv#%0y5nH#wfZ= z6dAVowt5@KtqKr7L_+c>GAJ@Vd-{zs`PHx9pJLKAi+kZ$S1S_JVvxi;mY}=G_~Xi* z9Jfva-*e?;T+K+>QhU=LcjZT2*F(F?Md|JQnICG6MYnNW0PW=>-$6<)R91-V!V*cN z@0ftDz<~DKU1u}n;YFeyB4H54?*|$?MglK&ZU=6y-Rp=5S~|?kp?@%g9YKg1IbX241{JY)*zV_e$_-xdX^BHo7$aiGTAidHb$buK~=!rIiQ58?S2A7kTu$zMiXT`GsT< zuBq$UAaR;TpLI#aYU%c^f`El0i3tgN$XslbP5|u$_PYvf>dWkoFY^F zROfG{*Yv1ldpE0hm6Mw-MpmXKvFr_yT)0g~_Bq91vm2nEaJw5aK`@yF#^>=I2n~Y( zJdj;_!k%Qw`M_?_i1FOQe=y2nt#bWpXAZsyZW%g54_ZgpPNZFO736p~39yO%**>qr znpc)(;N*iDM+!VYjJ0ed^{BT>fG8E3qg`EiU{Bp=M!_MT-ttGe& zt>Rs+%xdGt6}EHyxCjRA>bQ2klc$oL7e;Uv7K@IGIJ%rz_2jb(l9yXSrEJR4WAUnh zUhGbeA6KD+x5QDRoE_9Zkhy_99;J&Km;@fhdMRwm#wn~&P0f^J(x8Q>EXTzIkrO+T zIbOQD1cmXAjS#-7>oY{~XQe79XKl;dIXEty`whH3@RTvrJViGigm`|7gm+4Wdk%fu zX{dNR@sj0SYX^bfV;9FKfEAv54E5C_Ggs=e_?$`%$C@#*V&&|EaYUr?hWhP9Qu7Nn zbS!fs)6H(BBv5^%8LCR2aNNcb%LA77@igFry%Do>1uCKcqA{qkKfvCGG zM(LDM%dBur)+uvMxx<*jNfZg@Jnns-691ax%R?Q z)z0r-o1eV0o!9P9hI}Jigb&B$P)dsm?I!v)sE>`r$oIA%c$^t9P_Xq>*T7R93~Ce@ zIhgKVracrM=oR+8PEcO5X#IPeeG_NY$3-_Jueeu;8y#KqddhKgJwHonn%olj2#Sbp zjq5l#ntz8JDA?;xbtsL+k|`LQuQF#SoM==2XX@W&#k1=~w^U7_6d}|SGxSOHCYnFJ5)QgV2|fz|N6Kv-$q@#s@3lb7LGRYwzzF#2gL!E-fu|+R z9s(AjUSTniz{)~cnB$fl$lN0P{@5+V2O&{1ZuDS{>F8>N>-gJQe#8PsWo(`c^B6sM zCZJtP7zI43IJf~{{hU801`1&jA|KX;%OzNqcPCG`h4{h6^fzXrl_nC!W1hS{ps_Iz zh3Yx4U*G;D#LaO_0@^lY_CT&mI^Y@Zd+&(Sw?4-Os;?O*^UXYuxqp-IXoZ|qO??p` zf&~iH`6!uzcEEOS-xS^r&W1LLVA3Q&xX3j_a&CTU#Zxb%q~kEK?n7MC)RK_e;o-cR znO1aAPiS(>31N#vff_fXlarHcK$pfbNUp{V@nUdrJSqA8-tA<~`#>Np+zgt*+H8ci zsO7C9WzCvoCnP2H^ImBB2nYQr*uKyjZ}i1$8f=MyreDy6=!nriZgy=$HJ_f84u!4uCm~ZD=<*c)Ba06Z0zmu~`%26>|06(=jUSRA z_q$Nyg%Ag-TKq@Q@QE*Q7#3YON3)}C64$~h9hm|kytg=1D5{qx=k5W*x)@+-hR0_= zY-CVWQ8g!ZQlum`BPpFoVaAZJL9xxt4(TCoP=6o;B1UtEk6^X|;@Kr(E*)|C4;= ztT>Va%08zQY!D#x@b}k&837)n-m}VIJh8Xas{8PLV-C-MrZL4w`2Hj$kp;AcFlZl4 zBAv~}P~95)NubojToEWpB7}0KIm03g2O-YIL#e;FWC+x0W*>?F6<`~GU-j5O#7qAJ z3cUY9&0E1x!#`_DWFZtZJRf7K2_upsb=Dj{Dq95)17l$YKb@M8HTe@6|GCO@oZx)v zpEKUl_lZA?G(~`TAEL;A*1j>Vc(z0(KMUe&(w;lpRv*3r$)na(ZsN5{jmSA+@D~^f4*(|-vXKZ ze|DFDtJG})^h%WXtY2t;=GmOuLm>PHn%b^Iog*#-+@>_%YD??~syHD4o)kbobkUwkV(wR0%j?l>7DysG&7UHeWApIIwqg*A(^CF62t zYx8-#YT)S+YQWu#qOVs#PE7%x7y&soW$nH^&ES)vzc+AayS)uI*PJ*Q=-Kj z;GD)jixo}$N(>AYMx$q<41yK|Xc`XR3LY}59q6K;Jm3H_Z~_dp8XLH1M-VvD@Cv*$ zIOT7M`QO}<&3&tY2kGnr9;B1I1GHWhv|GyvG;z#uOo&zoLUqLavA@2qe1&nsg?OMv N44$rjF6*2UngCp`j?w@C diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V10_distractor_chain.png b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/pngs/V10_distractor_chain.png deleted file mode 100644 index 26add6a2251d849f22310c28e923f96caa575f70..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 22997 zcmeIa2UOE(zBe8R6X3UR?Kl{NdRb$a&Oi!=Q?Ow(=n(F17wjMfk8~dlCnHrZf zSAW=X`6AZY?o=6;yN7b?Pv4#>sy%U(|F!KY`R9iod}}s$CNbmaYna&18D+=u07Xk* zm6h#|IUjgDJ##%PN5`PK0ekvDlc>JfQZc1-E4Z33+|y>DTWk*9aeD&{w#5Lt;RD+b z?uNl`Dc^AhpZxOWfBu7`$u>>1R>hVlc2UTD9MuC8=cYGj+d4;JH!(9GQR42)g^ZTZ z#5)O>Q{Wa!k!U=B)`oE9W%DxKiLq_xS^bS|PrlrKsgQ{l@y@*@QZApguRfU>LJeaK zSeTodqq!tfhE{;5owJb2#-~W?e0*8iDs7)gtu4L$ z?oCR%FJ8;kDw{h{c%QvU*B6C7PygbKSY^>6Wh<|y)j^LG$Kd1GV~MiSgym#h-iL=e ziUO9th_xABBUl@enIHG6Aog2^^j|!d?e7zy9LxA}@0iqvdpmE(9RFYd=U(zlEx~iixB|xW{QM*Hc;13XcGTK&#hOunWA3lV z5T|D8=#MV&eSf;k4C{&Lx2G;YJ+vDcF<-?X9*$dccQAl{3O4j4) zp6qPtlUE;QtyLE8O3#=!y`ph6?om(VvXLmC{igXBr9!zOs;AX zYo9XBsfctOvI)$KtxuS4z8Xg8JN!`dkar#JX>nCmRc$xgDX{NcF19lS&&r&vQXl#u z@w&w?jhyLJx2)97;^G#$+okT3JD9WJ-xP%tA-0=l^vs2$&6<%4=;lIb6j#v z;sxlr1Vz;;H8P`^@_6{N=3AaSeN8reCWny`(0x(f277XUO1r0z1@&uf9pU;1xJny^ z-D@DVE8QF(IuX-KPcLkGEvb`-7EsV*Glad{Sbg23M8454#vqoL9pw3@`XZS3GW)#fQII*fmNDnrZZo8UH-nq96vK` z;Ig4@;&v~GJKqMVna>v*lBc~LOUGI??ojU}QF3Nqlmt4up z2Y(vh9!>w6lFW1M&af4BgZExUcZlZDXbLr>uXIs^FD6fMv_*RSC-91BM=C3etvkIM z5fRae3vHFZhg7PG2<5S8Gr1T=_Ufv4rD9=L_-rprM$tiYWi*mQ0grY0s+Ne)ibK+u zrg{$P`>@AC#wT2`wUhUA7=rb7eQcd$9hOtkMdEe@C`LMwfTgo6C;OEUmNiy9=`9V7Fl1FKz^^-w9@}v z;dS9-LhcD%s*;Rj;N_)vn5d5~=j`P_DA%|NP9a9Y1G%y=GqXF*EX!4h--oP#((2*r z(&bb>xKA6fW<&OKg)bt5>o9|!&dxut>)u1dyE1iZiVX6{>A{j3d=*ZsQuO644PT^R zgpkj{l(5xDyet}-tF$#qbQ7QdzUobi8-kTeQ$r0VyqlI-ef^%)v_2W%Y425j&M%@n zL#x%ziQ^=EA>`BbDGn;m;UN9(a14Ea*I-)`^k9F)yggg}$~o$_gvRzU^sq-^a*y`h zi)D~_Y>+iRj($#PG`jpHdi{Nzl!E4i%0&y*76hwHB~CgT36gXQ6Qs>aA&*HQY?-Nh zkbL*;<1CSoO(0yI5X?D=>?+kdy`Aa? zL=;W4sS&;$Ic8{kZ$!k%2(`L}(zC5HEQhl*d;R+`T1RrLXrYdtwG3sY9J&bHi4x|n z=k04pzL5=^db}^PYnEIZSHNDvpc}E(nUTwHTLYma=e>A}!|K5d`L)Z%!z~MDBZyB= z+MZSo)(7h;nSyn+A4Ri>8Zx5JmXqT5JPj7N9?MX+%*_VL(rA~s`eCZS}Vr=fd2p-sbHWzyxd1*ADM=zf4c(Y}b z#MYL&vgE3$!wLyE+%o*ydT*0H+$BFK+tagcf?OGG?0F7`|f^+d1r zN~OJ~RSk1Qpz7`Cp1oe_f3uA5e3_F|F#KiX9Jb-%x{IeIHMxCCS@I=v|l0|RLqn|7^v8j{6RE*Cz z;EA1I%JN+OMF#&e$9umbbDOrZ@})|pnt*SZ;gp8^g2iP-U|9C}C>?~e)&-k1MfkVu}KVl1H zJ@`$$F(%eyc$<~!O?yy4NOqx|FxOG^fIs7ntg2at1}yc;%a zXM0x5+CU1m`B<98j>WC=JE5l^FKD5#!6GHR6KRfI%8Yq;$Jse{ImY2trxKx2^Bwsn zr>L4ejz;eoU1m5AhJezuOT29eWSsce>k|5px(xNXeV~L`syyf&8q%tq(C5P(U(+^G zqd>mOC2=~Pgip-0Xi1X?^f|Xqh2`M-r$Nab(Y?ZhT2E=5&XdSnnT@@g{H3|h9ysHS z-Vl^B9c-dc{#fvp4@)bIqJP2e$ZmSiE&@Z}xve#?4pf2F<+I|)naSI?48plubJ81e+6x)*2-BSw8<0;-zByc^P9X_9K2TCH zhZ+th_g0kWmOo4}(xJStt>|mF5)#VUR;F99vZlf7dw+4jbPSU^FbE4y)58KK+JPj> zp9WvuQx{Dnqt@;!MdoaI684dPcsE@{E2nM=o1bVK(6(k>&aWVX=R2cQ^!U6zkI#J# zcbezntZ7m7!xrQs&{g8N&k+bVy#i>8u_Z&&0_Jf$pEi=oW4JeF{nN>~LWw;>f-H9LPzuy)%d~i`V{|yO^;IwAGQF5VsxAjb->!zzO{mYB)`m;p z)u?8RsmHOQn%DXi9=4sP_Dh%rt^7*QRU%!bdl|FerTkpGdz;@vNb)d-qKR!}cGNv= zH^|bT3dph<9Hk3L^7BP+vR#BHVMlL&x!uc>+O7_txe@AIB9`wyeup%tfltG zI4&y*3G1y&d+k;`@UJ*kzP0b~?SLn;9UEs$O3(A1@`+*Zqw``3KZKn{8=UAV4)ShW zjEb_sI%rM0_`lnYXD(VkWt+ZuLAJ+7azN99a<7Q&O>eY~K0Kus7Zxz!@BuDdj5DCP zR2^UWzL8? hUqn_7hH>ErwY%`fc_Pc4Zu=(@)QGlu#n*4SORI^osg7e9IZI*bo* zn~f3QulY4zNJk3`ODJ;0-M9gSNb}`E5FWUDp8CiSwaa*xwRSQb-DZazu@5VgAZ}as z+4_OJ|JLf(mgWy78}Br3t}QJ^xj$IE(30c-D_NpUqC@;*-BR1Wefw3H5$!{uZxDj} z)#&R^pL~$lKi$ZnUf5O_7Ne!q=yTx){MU}7W#n*%v6J1MpBm4sKgLA<@N4^2?$ESa zXjtL^f4$i2?UN&|_{h3J=r-Xjkwg2a`71thUzNyJm?3XiK}`SNW^jw|tG7oD48P8r zWmdhq7$P|umd#(K5r5QQjFB2pogWpce!iu){lU$6QvpN;j2ceP@aMxg&TyioF9Xc41sN zKfV_~?kN9(+7((-N38`pH4=kjd6I>cX=+@Z)Oup)>D=a&vaKGbEARJT?@2GVL9*3w zVZ>ZBwiLqdY%wQzu)F`HJZ{uwHifCJu=wO z;zRS`x)U@H&JC8yL(QfEF5O9q;1duKu*AM>C|A`-r;DV}XwYiNhA)6wz+TwphOii= zUW=34&l&|oSPeh|(xvo5K$Vc+pP~d>J!ahglyP672-M1~c9YzinwrR|$A*XkL2^B< zv(FIjI8c*TZpRpy~k40;c1^ z;P|)`)xT2#PR75_?h6dX0g%W%4A-=_H%*wZN9DfIVGqVFGswD6YI&T~REAq_gmAIL zRQ*PDcbeMVWfwtWHh;>cAf#oJjBbg-h);v}61OL`P37MHM5of_XYCdsB|;;gAz8o1 zgJi0qfR$%E0^kD^-i<>G<|YMJ2m;hpyC+D6bSl&&vER91au}7>)skfVl^T^E-GE<% zy+5iHQ<|I4a6oiNS&mH;(`_Q}C9{)Jr$^oO6D3wty1ToV0f-YP)DM{MTIO?D@goWf z3N&2wYN^`lYuz?=b!uB~R`Y6WSy*M$mV;R`qiR5_=Iqo~ViZyME);ygOU_jRHw zAiQQ$Ta+nvI&s_pOAmack=$jNKxEdu(u%-ZAxG8$$Wi_7&S{*2XJzA17&WEoc~eu< zj1q~_G>u5?kw}+9Cijps+s{Xjhbeecl`8%Bk9pGkGASGBxnA@Df16;XH>ka=7|gQW zK;y19?sCXE#KSYxaV=a-Hww>sSF}3%ay5Z`GP^<@x0RUwVOn6t3OR>N%xL#P#%EH# zr5|>ZO3}#e=IPM`U~~neg+^9)&MAaL)_LUXRLmtu&i)P*A)sm5dJCs^qi^Wzef^=G z<8@)3agA^vBnq_8BQmIBR!9J~gnnnI*@bbzLdZUDT9|Q8kHV2;-tjITKpjZ@0xsx`dfV0?r8l6SOOu^IW47e>WGydy;)4^q-3w!;LO*y0E8WO=isLg&N~)fn4O(9Dl02{fHH-Frw>r$;`B5I zD)G@$u@84-$~B=R9j{)uCwEpot1x^Nv^YnPZluFTsZ?aoH@_6~Z~pA#S%5P|v=!Df zd=t}8mH2Rnp&yNeG8PuFa~$>0*YQt6w*QGB`5%Y+&oB)}H2*q>xx^Gj$26n-D=zk` zM5~T6pIr8PDYT}7eYIB?B9Y-kaSSnqfPn8cT#S?vWK(qdXu{Cwfnc@(MA$bs5=v6>>YVl>TGi|nttxzA*X;n94>(HchQ7d{pkn(6(+ba0hS>w- z=Sz8M3zeR<64i(`uMd^?mr}(RpBypvoG&;?gBOQ~hYy&JtyEp?*lnc}<-LsQtFBh> zL|M%&EST+-zI{!X28--AN0iRw1}^8|%x#>4tHL+Hnw2FJHvRn-^$(ZYA0PZp_W7Ul zNRm20Tn`>oa;#TPHp*|Sc26r!vL8Fow8Dj3&3^o7!jkSXo{y-;47sU{1v-&3t;-ye zArN?lri2D$mdcGb`ox6uER&f%Q($r&WYcr=pTg?x+uy?~Lt-knud@@3Z*s-f3TpbS z#|KjMRnOyUYHY?llf=fzDUM|gR>j4|Z#B!VM@nwro|l!iU-05jAnL7a_7PxV!E39=DA5 zaj|>XTR@!e?~Ex2HRGSmQvQ*E|1$~r|Bf8ag$U>>L>Qw#9~64E>T@KA{L$G0S5UKM zvtwgpiwOku^YqjCpgWbWve1BX_AwXB&tVs!{z&S)l2S-uU|^nI)d0WC@V0}ncXt!$ z1cFMRq>;~$d6o`=9(I<&C@!BprGRX_`YFVI{XN8zOG`);O3LhIn=RcNYC*4o-TXa? zOwRw*KurEQM1S(>Na`ONvV~fUSwiv) z`=5VR=ZhT_Q&3UM{rScg#g?94@db58b>vS;PYVG?JPR~h^u5LnBM)zH1<%62j*V%r z1MmbS5>RS07pgpqIjNY9b^M%D;ZR45!zn2^;T?v~dn`@1f((1nsq_in+TLm3?%fyk z^z_tpbrge2n|z2wqQ~0DAqnC41#Grm7Q8669)w*;!ZiGQqG4$^{EZd)ZCJ_vpt}>Y zw*b&S=XpjIq}_8cOL+nZ~byizsHwafC`mp!Y#KeC3r*5o>5FxD1} zw~iP)Nw@Cd{~SIxHdefUZR1au3OeDO*E8+C!dpMi3ROLaqZ@VDl+W#Sg{~@3=IuStc;00kv|+w`rIXG zsA*^v4pq3x+wTGeyQC0Dwyu!96%iip85-;WwZ>v%Vzbr;W05sh?fjqBK~DcLMCp<$ zaf`mxgvaA8G4fr+u}Ly-rz`-v9ZkAYuV1A6ZpRoC;k06&Ast_A84qz%Ui#=VB;8M zWmoNGYu6MWG&?_U3fd_TKvx$xHR;U*uFwOBI?`rbzaV9RskZsq)e2WrZkq=Fv0~lS zlS6zVAZJ2H=|C~0er`8nrPqK|-o8H_s9PpT8}=FjnbprZ*oKArz-}#i9>n%D09lA0 z5M^nGSPy#(M+ek%j}YBYz28B{JqR(;D?q1_bCqXb{yM{!QFP2Jb*tYwkqI&QO-g2 zvv~1-jfe#!N|{||w;p?Rav6|+%25je*~Z{JndaMy;=!$*83W1;W`3eMLTe0AV8uyS z!WX3D`%8mIkLsaVcZ-XYm|>9tc4;JGdgZQ^#H?CxuAoUO4>uYaQ9t!?7Xy%GmuEpb z1nAWr^$-YJTDGshXnx6w5!pX>BwCq^$O=#d&iMotmEJdH#^-IXY*HT5WPuB?VT zb*p46IEtkLd~sH*C(H~YKpshVyI+*i6TD^2H!M7JRmTt~Z%7Umo}?-Y*1=T{R@jlo zg=;l|fYO!`@B5+%sc!$TmjgymiM4`?v`fru{>{wl3LWr=!!^kRK;80 z&9xq2?<4?2K81u=eqr9u;gHs@r~@J}139#m!c8i6{ddI!zitWC)w|aHVr5<|E2BDX zDD5ejdd>7!2mGRJ{K34Y5M4Ooy z*>_IH^n%WwtbOH~Zy{@NVgJ6L;G3hFUY*MsGh}UZR;1SOL#^ z*I*X0P;?hTQY2%F6FSPzxrF@@Ps%GAP9Lc;*Xc>O$jD+S_=a_UuOa^I3#cpHPXS^F z5>O^sj0y>I@$rI~W3iE{vl!-T-sBpjiVFnzkx5|Ut&3*hQYQSOfDXf4N_X+huwVC! zz;{m{3~jFu8W-zt{9tCH5Fdg?Y4a33jd==b*_$_S?w0`+&Am#AXL#sT>L-|Dkw7~m zQ-c|~RyqJW$-L_-?BQd?h#`4Wf}9LRuk1tcyZTxLz*^%3HAy50rpsCB)8ES1| z5#GA9TDHQIsJ*V`K{Ag%mF(J6%&k)z4zH-Zh%yEZ zb|9IMdyoIr_RHuILm7&go<^ZKelc|*WEGA|>qc&y)+=d(2#O7`&{1Meg&urs>I3!K z*_O)LbSE>EqR_`^Zjh{-sTX&5q7fcYe3qvuMv5oqY=FJ{Ir4h|5-d!vWo>Iex-rGI zD>PJpVtTs1prVtn<>6{^yv(8Q^w`bU5)vr5auc*nym}$kkAqTL-xGDYSV2rhf0wO? z@YJz9Vvo=+Eo`E~GpjqHprBHaVcR+%dF$k@$De;ST!o6C8zQ>YynCi`H+rUHJbzlD z#w?yH6sq!G%8+%-hA=Nov6cIrqo9t$Irap7)O(;V#_-j`!;$_Hk?=|yXF!ZJnj)LI zpG!7JWm$I~p8v>X`Z{lmVE~l}_=PJmJd`zwZ)rJ&yTVK+$SVe!es~<*ou$`sw(N_7 zedz|^9h_y+^&{=_#JQCGAe#T{6XaI{Gmkb!xz_o?rtGvdX+_UBfy}u%v}MDF!DG-% zJFBx$>yifaN#X7{l^qG5rUoa}SbKVm(xYeNi-Xw5WY9f`sW1kja=g zuhAQ@8KnZ0ac{r1GZ%UE!9_};Vt;5V19v+_ckyqA(QjFICkR-5+9@%1S#RV3A zMtS0dNV8eq9J#nL*wl`f2rsD87x|?v{kk^rij?S?6`UWHy!Hhog@HNet^rNdBzJk2 zyhUBN;C8yk_^a9l!uxu|G5y6i`310 z5a#5k=d#p|sVC4L6U3PfH3bZ6t*1AasK+k6Qk(=dNLOt3#V|}iolaW8F6kkHJ2SbK zBJzuHB!|zi2^VQWbNzgq%nZBi#*h=ltVCBh_qi%hE3PnR#Me}y;b2b+B-HcQa+>iZ zGaWMdJS6G@q3uwZ;cw&@wKUJpiwK7!z6ekl92-uwcD(81Ne!&1d@HV>Szp{%=V=A~?@lwB#|f*@MS3S-XqxDR^$HQWp|=t1T?pHW{x zQEr*F`ob;W!d$PaR#1SdCzZs+Y%OybpK#U|^ic9+{I!~Z(9zg%f7sNWYVl`#p{#XY z0q0Fjmm9CI)@Yi?Y2>wNo2Oc0q!b&p1oJIw9-w0|21L-{ohq?scAviQ4OGPpzzG-w zH6$aVhMGy`)uhiNLT)M`A_NKymIl>&6hY4j=$E$lIpD>w>Z?pq}q@!e-NdB2#} z_FVx0%bs~^HaX;wLk+hhVn2H>|5Gn-WwF+ zfkexk;t*h}4Oqm;>t~2|p6C-Wn64MlyfVb#>u18>8Y7<|fS|bmqM8{}Y{Vr8s1t}# z$VgdpSX{h;sjl;gHV#PZ8%W}ci<4$!S}H%bkQr&orT06tnNGlIb@IEF2F)}mH-nU6D3b2|N=vfgy^?=DC;h1BnsxU?lyjz{x$rL6X_M<^5Ti=fY5ryANTAs|k!>YXTfxY7MX+&K< z_1BRhCo5|LSfHI_Nyqbn$>7cA<+DLiWx2VB$tC&IZiug@mgCxHR@c@_?2`vxsLxv4 z6S-V&|1zx;^&;E^%*l$gTu1q$#V z(D^^h+VuC)gTKOIpt|~BfKYrsN5T~_yHU7P`Nk;)SBeMTNt*^F`t0PRgaKx5=bz6H>^CJ0al%Br6Nz6+3@VxX0~ z=hmL@Q$+!B#$~Fhm1fl!4~4F&vHE6H;g(AD;@|0@-zm^t8$y9F3|c3-6%{IAsr-I8 zE!ZExBa@l!+`StazRL#zys#dEUG(K1W#F|+> zYb9;lB{ksvf`I~|=kK%HCG}U)n|kRp1wwv35wrve=0W>v zgoA+fd8*`uKqzTSoJ2O(u0J*+LgZtkOQ5s!fz_222z_)IJ2_{4tVe(D@6tn8fWG8G z2)gr{4+!cH+^o)B70G7SG|`y!w#2Hm9|*3F)$aj=O8$`SUdWmGVyo*gZ;rKQ9*~w!vaEC+ zt6!PQ7WzF;2PvL(@5`>r6tTsOr^S0f%D>h2N=x;BNImX5-6wI&-EKIdKQF+I_$`7LL&0STOND&63< zW-6M0{2UCBhz4xhNI(|*7ghF+_H|hp%vp9zi?XCqz_j$Wnp(#b0;T{nnicS?v4yAS znk_K8?%sL(nyjjCxz!0`M%8*mRcFj*sm+{$_84e1hK*k zxBWgrA9{tEkEUgX&s77M0*kQ4_pFUL0RwX-jOh9Ki-6|7%!%QVW_=wmIe^!@+yel* z9`#MfV6dNoTAp;J*wQF~-kmw)3hpQ|0Ya?)l_umbXyMk&p?jJ>(Qa*Nct9eYa{|=Z zXJ(ktlKp0d3>7xhPs=wWp>KHZX+=z=a(EcB7YqtVr@p?(iS2;z&83Wtl=@g*sTlq& zTqs+s|Em;aNA6C}D(fy57|Tq@V<-&&Cwb92V4Me@H1A8o{*GiGP6Xh6!D z<4~b&AOM0{SHbqx4-QTtC+0qWJQ=Ne>C*v@K?eZM*MGHe0ZQ?(3G^xe@32^GAlb@B z!qEp{7L~vyT?xz~K>YTx(z)e!B?TFg4~!=o^EKnqpIq$=Vz7xNfQ}?Kzq--c59otL zJY>R8mbdX)^n=V;5IPdy?gWgXtx@?`j_ZjyJ?0ResLfw|Z4nT_(yW$+IeP+h@aTm) z2QwL`NIzhiUWj-1wwXz;GkdiSQxEC*u!qE0yKQ*1R(E>j%7#sbHUyN z>_s`#{DHNuV+E#8c$>5u(OU6<<}&2sGmd{D%WTgD2e9u+QZ(*_~Irch2_IC*FkfrKH) zyN;bS8OeygIXwS_l0Pr}(5vdw$zj~v=X0Sae6XV5qcHQSH4drg$`pq=mw@Ndb`kNC z{L`_lXAiY!iz=_T-d9D@iKIllf8g+RWvs>Apm&ft!Nb~kc&arbFH=cZ-Fvx5?%V`0 zVd^`|>Y{+|Q6fwV2D8Zga=WsbVHCiEbK{9gtb!i<#(lgIyo|7{G%VT+U%h!lSPZ&;D9i zSg^+>;%j=N$Lb{Op^-|+)g-V6`XLY7<4L19*+J4e^V)6@_nt=l|vfY{5TVX$5*s^)Im1F2nn#6bhNWcB>E4sB&&(k6J?T+)UQ;VD)H2&%bZRRbx zDkn3?GLGH|;tei}8Gy&46lV>WGq&O0|dlFb}wx zi{H<0ggtu!oxYy-)V?3UzL{47EYiq$+t(;wbMx>@c^0!UMKtfi2$6EIr&LaVtUu=$ z1@eztQ4r`nvcKHkdnWme7HX-W*u&NMHA*M9Sc~YLm7Hb_z&7QLUxxSlK?(A0V?}J6^fpcBg}f3hd0r@08DoSxKHIzk2)e zRPkJ0jp!S}v=Cc4-(}k0E<-rc6j&kV323h8TCG)LE*oHvRio|oZZ!j_*4`n-u6=s0 zsxai7_FOf*FjnSc*29tIA;yyklzx$whbt?0csuON80cz4ie zmS$M92X+7k=rGmZk3&+voAuUv&lvPbG}0U8;z!JVM~-;+pT->|$$gXywFEQ4A;2r| zbx-%MK91%2)DaGnBB;MK^=kx206L3m);g~A?uYD^va#_e(mmnpZ@O1!=E6cnkO8}X z!D9o=WEXUskm8|rb(k=2#jaG(?t4llkop+H7&1eRl9egh$em~CiNv-FZ>_JXBS|~3&*Hwtp{Uuxcv5mH*1S|a zg2Z0n7;sze%agDH)us(y(V=AlWhc%yeKq>~jeuPSJixvLiat2b$ZM1xsw!yimq{)Y z}g%?$PS{YWk&F9c2OFLLkpML z^s45e`$BP4)>?_Y@bPAw)q+jir1L;{HDQClfSDWt_a57{R)tf*GDUBzJSK$0JuN&I zPn0&8EKXvK?0=l4&AjO0L+V<2LX^CqL|rK^E@Wq|)TrsHne(!I1^KlIutNt4ID9;_ zBw@{#0B=`)M*`;m>*Evt%ro`O$>AOv4?}Grx%o7Gg^ ziCI}lCp_M4;Y~lZ(BOR|e6dDDy2f0ak(V|i4pZI*N~Xk9sf3{Ax5s2^Y$AC|9xu_3 zM3T0s+bNHP(^qoD``gwB6TLxbiIR|YA4!Z^D-CjubZtg|Ko%>b5-0}-bmd^Tii;rW zi?W=clfX9)r|XQTF5p)e0x2@M3-+0j&O{QI){)uOyZ$&xr?ED61A_xT4Ren3B-J3MS zi*7tA_)w;Qa!sr|-PLBzVl(W=j=r1?Z7uT1rTdfIWL)Hp^7ER$`KZCNBm8Y zzGJac`6Gy>^uTechWi8zFyI9WIrKuX;m)ZrJ@CMPNprsCxBVB;sVu$bZ5e-g$Wc_{ zo2S#waePhrlm1PPDWl%T;nK#5#x>5kNH9)}xf~<~ zdjwDg=M0)!iwrXKS~+IncwxqqPw$?Wnv$=BVDl;$doxM5=?e~CN5@{h&3ItuwwM#M z(z_Z`C@y-bI~LLBWC6@o(qJtS067=VZ5Bs7n(i7}2e`v?s2^L@{iO$N-ayj8FFly7 zJO}OLK#J1m+xxwB_0m!zQiD1NxOeVCdAWSvtS-b6$_;i6AF(UF59IGxz1_v(6OLuv z{7!r6*PVb)o}UWX1ltW#qjJo2cV;@+af9;p`_=pM*b7elNf{C?#VHF5tiuCnddcLg zQ=CWwZ@$J!8){lE>4B|Z{7^cdpPtDVVLPYABsG7o{Xq(`?p*F#3-HM?K+Jp04_A4N z)dxW;Oplh^n>>I?&xp6s;5!U9Ad07d0aMogPPzFTRkV=50=$G3wJ=z(rlbtmJnZjT zoBksc#^0Gq{=OpsU|Z>t5U9lzs(;4L+E;@LUjY-#bD&~|tWQ5A3?geji~U)*0l#Wl z`ZO(@bKz0v=czdTH#Q>KI|*=N82}<%soKj;J5}QP;Vd+1|AzHKllJcl)D&opIay(K z?B9j)s;#I2!S-jdDG=>t!>8a9T?S+bzyl600I}g38g$675iI~0_{{|Ncb?Q`3Rq;% zJL$#tKwXz+2I$!j4tHVAl}9#;4IC7o2xCo~mhQ#cRh=Ma>V!b>m-10BNeSphfSjcI zW5ld~c6=pb8Au1{8a}2ee@3WWcF@dH3G{CH@;h{ii?qz+ew3 zS3J#n9BbXe7oO{px~>@4dy*e8o=0FW$WzKL`@jwWM%Otl)i&)hf|p0h6O0VUE$D>4 z-_LlaAZdncTF3|tU^?y`Mu0^4LxB4&kLf#QSr!{7{OkBdyc&1MM~IiCzMX@Luv>4> zY}x$G^{LldoiNzZEU?CiUg|EwAK5nFExD=;x7D2k zy;L0Z-zS+(Zol&6(+$~P6CxOF2WUn~T^dZTx1U~Q_C%x(ZmW>kLCCk~Eo3$if+pf5 zbkUsYv6MpQsDNiUzca&E6h%~E==%uU?o>;_oS$o*`>I*h!i=^eq>Y=6*YlPQyLajy zcC4Dc7dX^GC~R4oIecCW_B|+|CKeQIQyExxi-h0aHl!!`e@r=6ngoi$3gTO;1Rf=6 z9p2(FNDJ6|0Ijpa^4tnje*0(Wc5~5d;0o>sz;3}<#22t?cPBa73BpuGE##TJ4+X@^O??> zRUceY3)*M?G^jL%z6mft9^aWgPLjd0J~NdCF)=X%BvP(dZe{4w^{BEo{BL+U7H8E* z0U06^NACSi$~&@KP0Fff>1SN=Na(8<_Dzim_WvJYFZNKxYR>|3ibciJa~vdg;k3Kk+owBN{n^-NA=#|Y>2TW>l_aURu*!HP zkF!>S!FIR(;c)x^2POI!RL1WPH%0cpcr*8atu?Mdj?QhS?U)V5T*q z_QX7xqZnXcv&@<4&+)%o?UVQfm?R-ESOdaxX(mT(9w3p(xz9!;`LiiTt-s}QFknC) z;~S+{Bm+|%!>a17ioy1w0Q@Z;`L^B7!Xk0W_DE3a1Bkcpqd=V`-^CJJV&>%JRAT6O zP*Z$jEf_Uj(g!)>tcHQ&e^cV#7yn}73`|Ps0dz_|Qpy^!)z9uK8>S zFekfcDFXuS|8ZFceaXMpVE&zs`;TAzgRL5V<3C2pYQ{ z_sL;z3_=Rpj0)`301nw~dHuhJ*Sv6 Date: Fri, 1 May 2026 20:36:15 -0400 Subject: [PATCH 04/14] Fix path in smoke tests --- .../nlu_benchmark/smoke_tests/analyze_smoke_runner_logs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/analyze_smoke_runner_logs.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/analyze_smoke_runner_logs.py index 989c8b1..4666db7 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/analyze_smoke_runner_logs.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/analyze_smoke_runner_logs.py @@ -13,7 +13,7 @@ def main() -> None: maze_stem = Path(args.maze).stem suffix = f"_{args.tag}" if args.tag else "" - p = Path(__file__).resolve().parent / "results" / f"smoke_runner_matrix_{maze_stem}{suffix}" / "detailed_logs.json" + p = Path(__file__).resolve().parent / "results" / f"smoke_all_{maze_stem}{suffix}" / "detailed_logs.json" d = json.loads(p.read_text(encoding="utf-8")) runs = d["runs"] print("runs", len(runs)) From 1f022c195772e4990c12344e22cf00c491f76d4b Mon Sep 17 00:00:00 2001 From: Arushi Jain Date: Mon, 4 May 2026 00:13:51 +0000 Subject: [PATCH 05/14] Update code to run claude model --- .gitignore | 2 + src/v2/nlu_pipeline/nlu_benchmark/agents.py | 169 ++++++++++++++---- src/v2/nlu_pipeline/nlu_benchmark/config.py | 8 +- .../nlu_benchmark/examples/run_llm.py | 23 +-- .../nlu_benchmark/examples/run_local_llm.py | 8 +- .../nlu_benchmark/examples/run_random.py | 9 - src/v2/nlu_pipeline/nlu_benchmark/feedback.py | 6 +- .../nlu_pipeline/nlu_benchmark/observation.py | 12 +- .../smoke_tests/analyze_smoke_runner_logs.py | 15 +- .../nlu_benchmark/smoke_tests/smoke_bfs.py | 24 +-- .../nlu_benchmark/smoke_tests/smoke_claude.py | 83 +++++++++ .../smoke_prompting_observation_querying.py | 12 +- .../nlu_pipeline/nlu_benchmark/smoke_trace.py | 50 ++++++ 13 files changed, 317 insertions(+), 104 deletions(-) delete mode 100644 src/v2/nlu_pipeline/nlu_benchmark/examples/run_random.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_claude.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/smoke_trace.py diff --git a/.gitignore b/.gitignore index 72676a3..280fa3f 100644 --- a/.gitignore +++ b/.gitignore @@ -3,8 +3,10 @@ __pycache__ *.Zone.Identifier .venv/ token.txt +api_key.txt .pytest_cache/ .env src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/results/ +src/v2/nlu_pipeline/nlu_benchmark/benchmark_mazes/ src/v2/nlu_pipeline/nlu_benchmark/terminal_output.txt diff --git a/src/v2/nlu_pipeline/nlu_benchmark/agents.py b/src/v2/nlu_pipeline/nlu_benchmark/agents.py index 6567ff3..83252af 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/agents.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/agents.py @@ -1,70 +1,163 @@ from __future__ import annotations +import json import os -import random +import urllib.error +import urllib.request from dataclasses import dataclass, field -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional, Tuple -from huggingface_hub import InferenceClient, get_token from transformers import AutoModelForCausalLM, AutoTokenizer -from nlu_benchmark.parser import ACTION_ORDER - -# More stable defaults for local model downloads on Windows. +# Stable defaults for HF Hub downloads on Windows (local Transformers path). os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "0") os.environ.setdefault("HF_HUB_DISABLE_XET", "1") -# Keep empty in source. Prefer env var `HF_TOKEN` or `huggingface-cli login`. -_LOCAL_HF_TOKEN = "" -if _LOCAL_HF_TOKEN.strip() and not os.environ.get("HF_TOKEN"): - os.environ["HF_TOKEN"] = _LOCAL_HF_TOKEN.strip() +DEFAULT_LOCAL_MODEL = "HuggingFaceTB/SmolLM2-360M-Instruct" -class RandomAgent: - def __call__(self, messages: list[dict]) -> str: - return f"FINAL_OUTPUT: {random.choice(ACTION_ORDER)}" +DEFAULT_CLAUDE_MODEL = "claude-sonnet-4-20250514" + + +def _parse_data_image_url(url: str) -> tuple[str, str]: + """Split ``data:;base64,`` into media type and raw base64 payload.""" + if not isinstance(url, str) or not url.startswith("data:"): + raise ValueError("Expected a data: URL with base64 image payload.") + rest = url[5:] + if ";base64," not in rest: + raise ValueError("Expected ';base64,' in image data URL.") + meta, _, b64 = rest.partition(";base64,") + media_type = (meta.strip() or "image/png").split(";")[0].strip() + return media_type, b64.strip() + + +def _openai_blocks_to_anthropic(blocks: List[dict]) -> List[dict]: + """Convert runner/OpenAI-style content blocks to Anthropic Messages ``content`` blocks.""" + out: List[dict] = [] + for b in blocks: + if not isinstance(b, dict): + continue + t = b.get("type") + if t == "text": + out.append({"type": "text", "text": str(b.get("text", ""))}) + elif t == "image_url": + url_holder = b.get("image_url") + url = url_holder.get("url") if isinstance(url_holder, dict) else url_holder + if isinstance(url, str) and url.startswith("data:"): + mt, raw_b64 = _parse_data_image_url(url) + out.append({"type": "image", "source": {"type": "base64", "media_type": mt, "data": raw_b64}}) + return out + + +def _anthropic_turn_content(content: object, role: str) -> object: + if isinstance(content, str): + return content if role != "assistant" else content.strip() + if isinstance(content, list): + anthropic_blocks = _openai_blocks_to_anthropic(content) + if not anthropic_blocks: + return "" + if len(anthropic_blocks) == 1 and anthropic_blocks[0].get("type") == "text": + return str(anthropic_blocks[0].get("text", "")) + return anthropic_blocks + return str(content) + + +def _anthropic_chat_turns(messages: List[dict]) -> Tuple[Optional[str], List[Dict[str, object]]]: + """Split OpenAI-style chat messages into Anthropic `system` + user/assistant turns.""" + system_parts: List[str] = [] + turns: List[Dict[str, object]] = [] + for m in messages: + role = m.get("role") + content = m.get("content", "") + if role == "system": + system_parts.append(str(content)) + elif role in ("user", "assistant"): + turns.append({"role": role, "content": _anthropic_turn_content(content, role)}) + else: + raise ValueError(f"Unsupported message role for Claude agent: {role!r}") + system = "\n\n".join(system_parts) if system_parts else None + return system, turns + + +def _anthropic_messages_http( + api_key: str, + *, + model: str, + max_tokens: int, + temperature: float, + system: Optional[str], + messages: List[Dict[str, object]], + timeout: Optional[float], +) -> str: + """POST /v1/messages (Anthropic Messages API); uses stdlib only.""" + body: Dict[str, object] = { + "model": model, + "max_tokens": max_tokens, + "messages": messages, + "temperature": temperature, + } + if system: + body["system"] = system + + req = urllib.request.Request( + "https://api.anthropic.com/v1/messages", + data=json.dumps(body).encode("utf-8"), + headers={ + "Content-Type": "application/json", + "x-api-key": api_key, + "anthropic-version": "2023-06-01", + }, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=timeout or 60.0) as resp: + payload = json.loads(resp.read().decode()) + except urllib.error.HTTPError as e: + detail = e.read().decode(errors="replace") + raise RuntimeError(f"Anthropic API HTTP {e.code}: {detail}") from e -DEFAULT_ROUTER_MODEL = "meta-llama/Llama-3.1-8B-Instruct:cerebras" -DEFAULT_LOCAL_MODEL = "HuggingFaceTB/SmolLM2-360M-Instruct" + parts: List[str] = [] + for block in payload.get("content", []) or []: + if isinstance(block, dict) and block.get("type") == "text": + parts.append(str(block.get("text", ""))) + return "".join(parts).strip() @dataclass -class HFLLMConfig: - model: str = DEFAULT_ROUTER_MODEL +class ClaudeAnthropicConfig: + model: str = DEFAULT_CLAUDE_MODEL temperature: float = 0.0 - max_tokens: int = 64 - timeout: Optional[float] = 30.0 + max_tokens: int = 1024 + timeout: Optional[float] = 60.0 @dataclass -class HuggingFaceLLMAgent: - """Remote HF Router-backed chat-completions agent.""" +class ClaudeAnthropicAgent: + """Claude via Anthropic Messages API (`ANTHROPIC_API_KEY`). Supports vision user turns.""" - config: HFLLMConfig = field(default_factory=HFLLMConfig) - client: Optional[InferenceClient] = None + config: ClaudeAnthropicConfig = field(default_factory=ClaudeAnthropicConfig) + api_key: Optional[str] = None def __post_init__(self) -> None: - if self.client is None: - token = os.environ.get("HF_TOKEN") or get_token() - if not token: - raise ValueError( - "No Hugging Face token found. Set HF_TOKEN or run `huggingface-cli login`." - ) - - self.client = InferenceClient( - api_key=token, - timeout=self.config.timeout, + key = (self.api_key or os.environ.get("ANTHROPIC_API_KEY") or "").strip() + if not key: + raise ValueError( + "No Anthropic API key found. Set ANTHROPIC_API_KEY or pass api_key=... to ClaudeAnthropicAgent." ) + self.api_key = key - def __call__(self, messages: List[Dict[str, str]]) -> str: - response = self.client.chat.completions.create( + def __call__(self, messages: List[dict]) -> str: + system, turns = _anthropic_chat_turns(messages) + return _anthropic_messages_http( + self.api_key, model=self.config.model, - messages=messages, - temperature=self.config.temperature, max_tokens=self.config.max_tokens, + temperature=self.config.temperature, + system=system, + messages=turns, + timeout=self.config.timeout, ) - return response.choices[0].message.content.strip() @dataclass diff --git a/src/v2/nlu_pipeline/nlu_benchmark/config.py b/src/v2/nlu_pipeline/nlu_benchmark/config.py index d0f2783..a96638f 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/config.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/config.py @@ -15,9 +15,9 @@ class ExperimentConfig: Maze **layout** text is in the system / user split from ``observation``, not from prompting. observation - text_only – initial NL maze in system; current situation text per user turn; last3 history - image_text – same as text_only + live PNG each turn; last3 = full feedback - screenshot_only – live PNG only (no NL map); last3 = action-only lines + text_only – initial NL maze in system; current situation text per user turn; last3 history + image_text – same as text_only + live PNG each turn; last3 = full feedback + image_only – live PNG only (no NL map); last3 = action-only lines (default) context_window current – only the current observation (no prior steps in the prompt) @@ -30,7 +30,7 @@ class ExperimentConfig: """ prompting: Literal["minimal", "standard", "verbose"] = "minimal" - observation: Literal["text_only", "image_text", "screenshot_only"] = "text_only" + observation: Literal["text_only", "image_text", "image_only"] = "image_only" context_window: Literal["current", "last3"] = "current" querying: Literal["step_by_step", "subgoal", "full_trajectory"] = "step_by_step" diff --git a/src/v2/nlu_pipeline/nlu_benchmark/examples/run_llm.py b/src/v2/nlu_pipeline/nlu_benchmark/examples/run_llm.py index dd96ed8..ab6e386 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/examples/run_llm.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/examples/run_llm.py @@ -1,18 +1,21 @@ import os +from pathlib import Path -# Optional: paste a token here for quick runs, or set HF_TOKEN in your shell / `huggingface-cli login`. -_HF_TOKEN_FOR_THIS_SCRIPT = "" -if _HF_TOKEN_FOR_THIS_SCRIPT: - os.environ["HF_TOKEN"] = _HF_TOKEN_FOR_THIS_SCRIPT +# Load Anthropic API key from repo-root api_key.txt if ANTHROPIC_API_KEY is unset. +if not os.environ.get("ANTHROPIC_API_KEY"): + for directory in Path(__file__).resolve().parents: + key_file = directory / "api_key.txt" + if key_file.is_file(): + os.environ["ANTHROPIC_API_KEY"] = key_file.read_text().strip() + break -from nlu_benchmark.runner import EpisodeRunner -from nlu_benchmark.agents import HuggingFaceLLMAgent, HFLLMConfig +from nlu_benchmark.runner import ExperimentRunner +from nlu_benchmark.agents import ClaudeAnthropicAgent, ClaudeAnthropicConfig -runner = EpisodeRunner.from_json("nlu_benchmark/sample mazes/V01_empty_room.json") +runner = ExperimentRunner.from_json("nlu_benchmark/sample mazes/V01_empty_room.json") -# Uses HFLLMConfig defaults (small Qwen on HF Router). Override model=... if needed. -agent = HuggingFaceLLMAgent(config=HFLLMConfig()) +# Override model=... on ClaudeAnthropicConfig if needed (see Anthropic model IDs). +agent = ClaudeAnthropicAgent(config=ClaudeAnthropicConfig()) result = runner.run(agent) print(result["success"]) - diff --git a/src/v2/nlu_pipeline/nlu_benchmark/examples/run_local_llm.py b/src/v2/nlu_pipeline/nlu_benchmark/examples/run_local_llm.py index 037d528..1b7511d 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/examples/run_local_llm.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/examples/run_local_llm.py @@ -1,7 +1,11 @@ -from nlu_benchmark.runner import EpisodeRunner +from nlu_benchmark.config import ExperimentConfig +from nlu_benchmark.runner import ExperimentRunner from nlu_benchmark.agents import LocalTransformersAgent, LocalLLMConfig -runner = EpisodeRunner.from_json("nlu_benchmark/sample mazes/V01_empty_room.json") +runner = ExperimentRunner.from_json( + "nlu_benchmark/sample mazes/V01_empty_room.json", + config=ExperimentConfig(observation="text_only"), +) # Small local model (no HF inference credits required). agent = LocalTransformersAgent( diff --git a/src/v2/nlu_pipeline/nlu_benchmark/examples/run_random.py b/src/v2/nlu_pipeline/nlu_benchmark/examples/run_random.py deleted file mode 100644 index 4e7a3ca..0000000 --- a/src/v2/nlu_pipeline/nlu_benchmark/examples/run_random.py +++ /dev/null @@ -1,9 +0,0 @@ -from nlu_benchmark.runner import EpisodeRunner -from nlu_benchmark.agents import RandomAgent - -runner = EpisodeRunner.from_json("nlu_benchmark/sample mazes/V01_empty_room.json") - -agent = RandomAgent() -result = runner.run(agent) - -print("Success:", result["success"]) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/feedback.py b/src/v2/nlu_pipeline/nlu_benchmark/feedback.py index 7bf58db..d58b1e5 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/feedback.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/feedback.py @@ -4,12 +4,12 @@ from typing import Any, Literal -ObservationKind = Literal["text_only", "image_text", "screenshot_only"] +ObservationKind = Literal["text_only", "image_text", "image_only"] def action_feedback_for_prompt(observation: ObservationKind, text: str) -> str: - """Step outcomes for ``Last result:`` / history; cleared for screenshot-only. Parse failures are never masked (see runner).""" - if observation == "screenshot_only": + """Step outcomes for ``Last result:`` / history; cleared for ``image_only``. Parse failures stay visible in runner.""" + if observation == "image_only": return "" return text diff --git a/src/v2/nlu_pipeline/nlu_benchmark/observation.py b/src/v2/nlu_pipeline/nlu_benchmark/observation.py index a4e3062..1a03496 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/observation.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/observation.py @@ -4,9 +4,9 @@ system message once per episode. Each user turn: ``render_user_observation_text``, last3 history, and live PNG when image is enabled. -* **screenshot_only** – No initial NL block; user gets live PNG and action-only - history; step feedback as configured elsewhere. Fallback PNG on disk if live - render fails. +* **image_only** – No initial NL map in system; live PNG each query; last3 history is action-only lines. + +``build_image_blocks`` adds PNGs whenever observation is not ``text_only`` (see ``runner._build_message``). """ from __future__ import annotations @@ -33,7 +33,7 @@ class ObservationBuilder: def __init__( self, - observation: Literal["text_only", "image_text", "screenshot_only"], + observation: Literal["text_only", "image_text", "image_only"], context_window: Literal["current", "last3"], ) -> None: self._observation = observation @@ -50,7 +50,7 @@ def history_text(self) -> str: if self._context_window == "current" or not self._history: return "" recs = self._history[-3:] - if self._observation == "screenshot_only": + if self._observation == "image_only": lines = ["Recent steps (oldest first, action only):"] for rec in recs: lines.append(f" {rec.action}") @@ -63,7 +63,7 @@ def history_text(self) -> str: return "\n".join(lines) def build_text(self, state) -> str: - if self._observation == "screenshot_only": + if self._observation == "image_only": return "" return render_user_observation_text(state) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/analyze_smoke_runner_logs.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/analyze_smoke_runner_logs.py index 4666db7..c6514c3 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/analyze_smoke_runner_logs.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/analyze_smoke_runner_logs.py @@ -41,10 +41,11 @@ def main() -> None: issues.append((label, "image_mode_not_list")) has_initial = "Initial maze (fixed for this episode):" in system_prompt - if cfg["observation"] == "screenshot_only" and has_initial: - issues.append((label, "screenshot_has_initial_maze")) - if cfg["observation"] != "screenshot_only" and not has_initial: - issues.append((label, "non_screenshot_missing_initial_maze")) + obs = cfg["observation"] + if obs == "image_only" and has_initial: + issues.append((label, "image_only_has_initial_maze")) + if obs != "image_only" and not has_initial: + issues.append((label, "missing_initial_maze")) has_mechanism_list = "The environment may contain:" in system_prompt has_rules = "RULES (domain logic):" in system_prompt @@ -72,9 +73,9 @@ def main() -> None: if cfg["context_window"] == "current" and (has_recent or has_action_only): issues.append((label, "current_has_history")) if cfg["context_window"] == "last3": - if cfg["observation"] == "screenshot_only" and not has_action_only: - issues.append((label, "last3_screenshot_missing_action_history")) - if cfg["observation"] != "screenshot_only" and not has_recent: + if obs == "image_only" and not has_action_only: + issues.append((label, "last3_image_only_missing_action_history")) + if obs != "image_only" and not has_recent: issues.append((label, "last3_missing_history")) steps = [t["step"] for t in transcript] diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_bfs.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_bfs.py index 946db55..dd1350a 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_bfs.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_bfs.py @@ -13,7 +13,7 @@ from nlu_benchmark.env import FACING_ORDER, FACING_TO_DELTA from nlu_benchmark.loader import load_maze -from nlu_benchmark.renderer import render_maze_image_png_bytes +from nlu_benchmark.smoke_trace import trace_prepare, trace_reset, trace_step, trace_write_text_artifacts from automatic_maze_generation.mazegen.models import Door, Gate, Key, MazeInstance, Switch from automatic_maze_generation.mazegen.solver import solve_maze @@ -117,12 +117,7 @@ def main() -> None: maze_stem = Path(args.maze).stem suffix = f"_{args.tag}" if args.tag else "" out_dir = Path(__file__).resolve().parent / "results" / f"smoke_{maze_stem}_bfs{suffix}" - out_dir.mkdir(parents=True, exist_ok=True) - for p in out_dir.glob("*.png"): - p.unlink() - for p in out_dir.glob("*.txt"): - p.unlink() - + trace_prepare(out_dir) env_plan = load_maze(maze_path) plan_state = env_plan.reset() maze_inst = _state_to_maze_instance(plan_state) @@ -137,24 +132,15 @@ def main() -> None: env = load_maze(maze_path) state = env.reset() - (out_dir / "step_000_reset.png").write_bytes(render_maze_image_png_bytes(state)) - lines = [f"000 RESET pos={state.agent_pos} facing={state.facing} inv={state.inventory}"] + lines = trace_reset(out_dir, state) for step, action in enumerate(executable_actions, start=1): before = state.agent_pos - state, event = env.step(action) - (out_dir / f"step_{step:03d}_{action}.png").write_bytes(render_maze_image_png_bytes(state)) - line = ( - f"{step:03d} {action:<12} {event.type:<10} from={before} " - f"to={state.agent_pos} facing={state.facing} inv={state.inventory}" - ) - print(line) - lines.append(line) + state, event = trace_step(out_dir, lines, step, action, env, position_before=before) if event.type == "DONE": break - (out_dir / "run_log.txt").write_text("\n".join(lines), encoding="utf-8") - (out_dir / "plan.txt").write_text("\n".join(executable_actions), encoding="utf-8") + trace_write_text_artifacts(out_dir, lines, executable_actions) print(f"\nsuccess={state.agent_pos == state.goal}") print(f"steps_used={state.step_count}") print(f"out={out_dir}") diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_claude.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_claude.py new file mode 100644 index 0000000..4707e5f --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_claude.py @@ -0,0 +1,83 @@ +from __future__ import annotations + +import argparse +import os +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[2] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) +V2_ROOT = Path(__file__).resolve().parents[3] +if str(V2_ROOT) not in sys.path: + sys.path.insert(0, str(V2_ROOT)) + +from nlu_benchmark.agents import ClaudeAnthropicAgent, ClaudeAnthropicConfig +from nlu_benchmark.loader import load_maze +from nlu_benchmark.runner import ExperimentRunner +from nlu_benchmark.smoke_trace import trace_prepare, trace_reset, trace_step, trace_write_text_artifacts + + +def _ensure_anthropic_api_key() -> None: + if os.environ.get("ANTHROPIC_API_KEY"): + return + for directory in Path(__file__).resolve().parents: + key_file = directory / "api_key.txt" + if key_file.is_file(): + os.environ["ANTHROPIC_API_KEY"] = key_file.read_text().strip() + return + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Smoke test: Claude agent episode in NLU env (PNG trace under results/smoke_*_claude/).", + ) + parser.add_argument("--maze", default="V04_single_key.json", help="Maze JSON filename under sample mazes/") + parser.add_argument("--tag", default="", help="Optional output tag suffix.") + args = parser.parse_args() + + maze_path = ROOT / "nlu_benchmark" / "sample mazes" / args.maze + maze_stem = Path(args.maze).stem + suffix = f"_{args.tag}" if args.tag else "" + out_dir = Path(__file__).resolve().parent / "results" / f"smoke_{maze_stem}_claude{suffix}" + + if not maze_path.is_file(): + print(f"Missing maze file: {maze_path}") + return + + trace_prepare(out_dir) + + _ensure_anthropic_api_key() + + runner = ExperimentRunner.from_json(str(maze_path.resolve())) + agent = ClaudeAnthropicAgent(config=ClaudeAnthropicConfig()) + + try: + result = runner.run(agent, verbose=False) + except Exception as e: + print(f"runner.run raised: {e}") + return + + transcript = result["transcript"] + planned_actions = [rec["action"] for rec in transcript] + + env = load_maze(maze_path) + state = env.reset() + + lines = trace_reset(out_dir, state) + + for step, action in enumerate(planned_actions, start=1): + before = state.agent_pos + state, event = trace_step(out_dir, lines, step, action, env, position_before=before) + if event.type == "DONE": + break + + trace_write_text_artifacts(out_dir, lines, planned_actions) + + print(f"\nsuccess={state.agent_pos == state.goal}") + print(f"steps_used={state.step_count}") + print(f"out={out_dir}") + + +if __name__ == "__main__": + main() diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py index 97fe4d8..3e3ce2b 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py @@ -253,7 +253,7 @@ def _suite_cases(base: ExperimentConfig, suite: str): (replace(base, context_window="last3"), "context=last3"), (replace(base, observation="text_only", context_window="last3"), "obs=text_only"), (replace(base, observation="image_text", context_window="last3"), "obs=image_text"), - (replace(base, observation="screenshot_only", context_window="last3"), "obs=screenshot_only"), + (replace(base, observation="image_only", context_window="last3"), "obs=image_only"), (replace(base, querying="step_by_step"), "query=step_by_step"), (replace(base, querying="subgoal"), "query=subgoal"), (replace(base, querying="full_trajectory"), "query=full_trajectory"), @@ -308,10 +308,10 @@ def run_smoke_suite(maze_name: str, tag: str, max_steps: int, suite: str = "all" _assert(first["user_content_type"] == "list", f"{label}: image mode should send list content", errors) _assert(first["has_image"], f"{label}: image mode should include image block", errors) - if cfg.observation == "screenshot_only": - _assert("Initial maze (fixed for this episode):" not in first["system"], f"{label}: screenshot_only should omit initial NL map", errors) + if cfg.observation == "image_only": + _assert("Initial maze (fixed for this episode):" not in first["system"], f"{label}: image_only should omit initial NL map", errors) else: - _assert("Initial maze (fixed for this episode):" in first["system"], f"{label}: non-screenshot should include initial NL map", errors) + _assert("Initial maze (fixed for this episode):" in first["system"], f"{label}: expected initial NL map in system prompt", errors) if cfg.context_window == "current" and len(agent.calls) > 1: second_text = agent.calls[1]["user_text"] @@ -319,8 +319,8 @@ def run_smoke_suite(maze_name: str, tag: str, max_steps: int, suite: str = "all" _assert("Recent steps (oldest first, action only):" not in second_text, f"{label}: current unexpectedly includes action history", errors) if cfg.context_window == "last3" and len(agent.calls) > 1: second_text = agent.calls[1]["user_text"] - if cfg.observation == "screenshot_only": - _assert("Recent steps (oldest first, action only):" in second_text, f"{label}: last3 screenshot should include action-only history", errors) + if cfg.observation == "image_only": + _assert("Recent steps (oldest first, action only):" in second_text, f"{label}: last3 image_only should include action-only history", errors) else: _assert("Recent history (last 3 steps, oldest first):" in second_text, f"{label}: last3 should include full history", errors) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_trace.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_trace.py new file mode 100644 index 0000000..3c13a81 --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_trace.py @@ -0,0 +1,50 @@ +"""PNG + text artifacts for smoke scripts (``smoke_bfs``, ``smoke_claude``, …). + +Writes ``step_000_reset.png``, ``step_NNN_.png``, ``run_log.txt``, ``plan.txt`` +under a caller-chosen ``results/…`` directory. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any, Tuple + +from nlu_benchmark.renderer import render_maze_image_png_bytes + + +def trace_prepare(out_dir: Path) -> None: + out_dir.mkdir(parents=True, exist_ok=True) + for p in out_dir.glob("*.png"): + p.unlink() + for p in out_dir.glob("*.txt"): + p.unlink() + + +def trace_reset(out_dir: Path, state: Any) -> list[str]: + (out_dir / "step_000_reset.png").write_bytes(render_maze_image_png_bytes(state)) + return [f"000 RESET pos={state.agent_pos} facing={state.facing} inv={state.inventory}"] + + +def trace_step( + out_dir: Path, + lines: list[str], + step: int, + action: str, + env: Any, + *, + position_before: tuple[Any, ...], +) -> Tuple[Any, Any]: + state, event = env.step(action) + (out_dir / f"step_{step:03d}_{action}.png").write_bytes(render_maze_image_png_bytes(state)) + line = ( + f"{step:03d} {action:<12} {event.type:<10} from={position_before} " + f"to={state.agent_pos} facing={state.facing} inv={state.inventory}" + ) + print(line) + lines.append(line) + return state, event + + +def trace_write_text_artifacts(out_dir: Path, lines: list[str], plan_actions: list[str]) -> None: + (out_dir / "run_log.txt").write_text("\n".join(lines), encoding="utf-8") + (out_dir / "plan.txt").write_text("\n".join(plan_actions), encoding="utf-8") From b07ffd60def835d7c1166dc1c845c6998284ef2a Mon Sep 17 00:00:00 2001 From: Arushi Jain Date: Mon, 4 May 2026 01:40:57 +0000 Subject: [PATCH 06/14] Validate benchmark mazes --- .../mazegen/validator.py | 148 ++++++++++++++++++ .../render_dataset.py | 5 +- src/v2/nlu_pipeline/nlu_benchmark/loader.py | 128 ++++++++++++++- src/v2/nlu_pipeline/nlu_benchmark/renderer.py | 24 +++ .../nlu_benchmark/smoke_tests/__init__.py | 0 .../smoke_tests/smoke_benchmark_mazes.py | 81 ++++++++++ .../nlu_benchmark/smoke_tests/smoke_bfs.py | 126 +-------------- .../smoke_prompting_observation_querying.py | 50 +----- .../smoke_tests/solver_plan_trace.py | 99 ++++++++++++ 9 files changed, 489 insertions(+), 172 deletions(-) create mode 100644 src/v2/automatic_maze_generation/mazegen/validator.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/__init__.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_benchmark_mazes.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/solver_plan_trace.py diff --git a/src/v2/automatic_maze_generation/mazegen/validator.py b/src/v2/automatic_maze_generation/mazegen/validator.py new file mode 100644 index 0000000..3442852 --- /dev/null +++ b/src/v2/automatic_maze_generation/mazegen/validator.py @@ -0,0 +1,148 @@ +from __future__ import annotations + +from dataclasses import replace +from typing import List, Optional + +from .models import MazeInstance, MazeLayout, ValidationParams +from .solver import count_shortest_paths, solve_maze, solve_navigation_only + + +def validate_navigation_layout(layout: MazeLayout, params: ValidationParams) -> dict: + result = solve_navigation_only(layout) + reasons: List[str] = [] + if params.require_solvable and not result["is_solvable"]: + reasons.append("maze is not solvable") + if params.require_unique_shortest_path and result["is_solvable"]: + nsp = count_shortest_paths(layout) + if nsp != 1: + reasons.append(f"expected unique shortest path, found {nsp}") + return { + "is_valid": len(reasons) == 0, + "reasons": reasons, + "solver_result": result, + } + + + + + + + + + +def _clone_maze(maze: MazeInstance) -> MazeInstance: + return MazeInstance( + width=maze.width, + height=maze.height, + walls=set(maze.walls), + start=maze.start, + goal=maze.goal, + keys=[replace(k) for k in maze.keys], + doors=[replace(d) for d in maze.doors], + switches=[replace(s, controls=list(s.controls)) for s in maze.switches], + gates=[replace(g) for g in maze.gates], + metadata=dict(maze.metadata), + ) + + + +def _remove_mechanism_by_id(maze: MazeInstance, mech_id: str) -> MazeInstance: + new_maze = _clone_maze(maze) + new_maze.keys = [k for k in new_maze.keys if k.id != mech_id] + new_maze.doors = [d for d in new_maze.doors if d.id != mech_id] + new_maze.switches = [s for s in new_maze.switches if s.id != mech_id] + new_maze.gates = [g for g in new_maze.gates if g.id != mech_id] + + for sw in new_maze.switches: + sw.controls = [gid for gid in sw.controls if gid != mech_id] + return new_maze + +def _extract_required_ids(maze: MazeInstance, expected_logic: Optional[str]) -> List[str]: + if expected_logic is None: + return [] + + if expected_logic == "kd": + return [maze.keys[0].id] if maze.keys else [] + + if expected_logic == "sg": + return [maze.switches[0].id] if maze.switches else [] + + if expected_logic == "ks": + ids = [] + if maze.keys: + ids.append(maze.keys[0].id) + if maze.switches: + ids.append(maze.switches[0].id) + return ids + + if expected_logic == "sk": + ids = [] + if maze.switches: + ids.append(maze.switches[0].id) + if maze.keys: + ids.append(maze.keys[0].id) + return ids + + if expected_logic == "kk": + return [k.id for k in maze.keys[:2]] + + return [] + + + +def _run_ablation_checks(maze: MazeInstance, expected_logic: Optional[str]) -> List[str]: + reasons: List[str] = [] + for mech_id in _extract_required_ids(maze, expected_logic): + ablated = _remove_mechanism_by_id(maze, mech_id) + result = solve_maze(ablated) + if result["is_solvable"]: + reasons.append(f"mechanism {mech_id} is not necessary under ablation") + return reasons + + + +def validate_maze(maze: MazeInstance, expected_logic: Optional[str] = None) -> dict: + solver_result = solve_maze(maze) + reasons: List[str] = [] + if not solver_result["is_solvable"]: + reasons.append("maze is not solvable") + + chain_pattern = maze.metadata.get("chain_pattern") + if expected_logic is not None and chain_pattern not in {expected_logic, None}: + reasons.append("chain pattern metadata does not match expected logic") + + interactions = solver_result.get("interactions", []) + if expected_logic == "kd": + if not any(x.startswith("pickup:k") for x in interactions): + reasons.append("expected kd maze to require a key pickup") + if not any(x.startswith("open:D") for x in interactions): + reasons.append("expected kd maze to require opening a door") + elif expected_logic == "sg": + if not any(x.startswith("toggle:s") for x in interactions): + reasons.append("expected sg maze to require activating a switch") + if not any(x.startswith("cross:g") for x in interactions): + reasons.append("expected sg maze to require crossing a gate") + elif expected_logic in {"ks", "sk", "kk"}: + required_prefixes = { + "ks": ["pickup:k", "open:D", "toggle:s", "cross:g"], + "sk": ["toggle:s", "cross:g", "pickup:k", "open:D"], + "kk": ["pickup:k", "open:D", "pickup:k", "open:D"], + }[expected_logic] + idx = 0 + for interaction in interactions: + if interaction.startswith(required_prefixes[idx]): + idx += 1 + if idx == len(required_prefixes): + break + if idx < len(required_prefixes): + reasons.append(f"expected ordered chain {expected_logic} was not observed in solver interactions") + + if solver_result["is_solvable"] and expected_logic is not None: + reasons.extend(_run_ablation_checks(maze, expected_logic)) + + return { + "is_valid": len(reasons) == 0, + "reasons": reasons, + "solver_result": solver_result, + } + diff --git a/src/v2/automatic_maze_generation/render_dataset.py b/src/v2/automatic_maze_generation/render_dataset.py index 83c820f..9c65d2d 100644 --- a/src/v2/automatic_maze_generation/render_dataset.py +++ b/src/v2/automatic_maze_generation/render_dataset.py @@ -42,7 +42,8 @@ def _row_col_payload_to_xy_payload(payload: dict) -> dict: def rc_to_xy(pos): r, c = pos - return [c, r] + # Payloads use 1-based (row, col); drawing uses 0-based (x=col, y=row). + return [c - 1, r - 1] dims = maze.get("dimensions") if dims and len(dims) == 2: @@ -320,7 +321,7 @@ def _figure_from_maze_payload(payload: dict, title: str) -> Tuple[Any, Any, int] ax.set_title(title) ax.set_xlim(0, width) ax.set_ylim(0, height) - ax.set_aspect("equal") + ax.set_aspect("equal", adjustable="box") ax.axis("off") return fig, ax, height diff --git a/src/v2/nlu_pipeline/nlu_benchmark/loader.py b/src/v2/nlu_pipeline/nlu_benchmark/loader.py index c852f58..5c590a7 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/loader.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/loader.py @@ -1,10 +1,17 @@ +from __future__ import annotations + +import copy import json +from dataclasses import replace from pathlib import Path -from nlu_benchmark.env import GridWorldEnv +from typing import Any +from automatic_maze_generation.mazegen.models import Door, Gate, Key, MazeInstance, Switch -def load_maze(path) -> GridWorldEnv: - data = json.loads(Path(path).read_text(encoding="utf-8")) +from nlu_benchmark.env import GridState, GridWorldEnv + + +def _task_dict_to_env(data: dict[str, Any]) -> GridWorldEnv: maze = data["maze"] rows, cols = maze["dimensions"] walls = {tuple(w) for w in maze["walls"]} @@ -21,3 +28,118 @@ def load_maze(path) -> GridWorldEnv: max_steps=max_steps, mechanisms=mechanisms, ) + + +def load_maze(path) -> GridWorldEnv: + data = json.loads(Path(path).read_text(encoding="utf-8")) + return _task_dict_to_env(data) + + +def grid_state_to_maze_instance(st: GridState) -> MazeInstance: + def rc_to_xy(pos): + r, c = pos + # NLU grids are 1-based (row, col); mazegen solver uses 0-based (x, y). + return (c - 1, r - 1) + + return MazeInstance( + width=st.cols, + height=st.rows, + walls={rc_to_xy(w) for w in st.walls}, + start=rc_to_xy(st.start), + goal=rc_to_xy(st.goal), + keys=[ + Key(id=k.get("id", f"key_{i}"), position=rc_to_xy(tuple(k["position"])), color=k["color"]) + for i, k in enumerate(st.keys) + ], + doors=[ + Door( + id=d.get("id", f"door_{i}"), + position=rc_to_xy(tuple(d["position"])), + requires_key=d["requires_key"], + initial_state=d.get("initial_state", "locked"), + ) + for i, d in enumerate(st.doors) + ], + switches=[ + Switch( + id=s.get("id", f"switch_{i}"), + position=rc_to_xy(tuple(s["position"])), + controls=list(s.get("controls", [])), + switch_type=s.get("switch_type", "toggle"), + initial_state=s.get("initial_state", "off"), + ) + for i, s in enumerate(st.switches) + ], + gates=[ + Gate( + id=g.get("id", f"gate_{i}"), + position=rc_to_xy(tuple(g["position"])), + initial_state=g.get("initial_state", "closed"), + ) + for i, g in enumerate(st.gates) + ], + ) + + +def load_maze_instance(path) -> MazeInstance: + """Parse task JSON like :func:`load_maze`, reset env once, and build a :class:`MazeInstance` for mazegen.""" + p = Path(path) + data = json.loads(p.read_text(encoding="utf-8")) + return maze_instance_from_task_dict(data) + + +def maze_instance_from_task_dict(data: dict[str, Any]) -> MazeInstance: + """Same as :func:`load_maze_instance` but from an already-parsed task dict (avoids a second disk read).""" + inst = grid_state_to_maze_instance(_task_dict_to_env(data).reset()) + return replace(inst, metadata=dict(data.get("metadata", {}))) + + +def task_dict_shrink_dimensions_minus_two(data: dict[str, Any]) -> dict[str, Any]: + """ + Return a deep copy whose ``maze.dimensions`` are each reduced by 2 (e.g. ``[10, 10] -> [8, 8]``). + + Row/column coordinates are **unchanged**. Use when the JSON names a larger grid than the + coordinates actually use (common in the ogbench-style exports this repo ingests). + + Raises ``ValueError`` if the new size would be <2 or any coordinate lies outside the shrunk grid. + """ + out = copy.deepcopy(data) + maze = out["maze"] + rows, cols = maze["dimensions"] + if rows < 2 or cols < 2: + raise ValueError("maze dimensions must be at least 2 to shrink by 2") + nr, nc = rows - 2, cols - 2 + + def bad_rc(r: int, c: int) -> bool: + return not (1 <= r <= nr and 1 <= c <= nc) + + sr, sc = int(maze["start"][0]), int(maze["start"][1]) + gr, gc = int(maze["goal"][0]), int(maze["goal"][1]) + if bad_rc(sr, sc) or bad_rc(gr, gc): + raise ValueError(f"start/goal outside shrunk grid 1..{nr} x 1..{nc}: start={maze['start']} goal={maze['goal']}") + + for w in maze["walls"]: + r, c = int(w[0]), int(w[1]) + if bad_rc(r, c): + raise ValueError(f"wall {w} outside shrunk grid ({nr}x{nc})") + + mech = out.get("mechanisms", {}) + for name in ("keys", "doors", "switches", "gates"): + for item in mech.get(name, []): + pos = item.get("position") + if pos is None: + continue + r, c = int(pos[0]), int(pos[1]) + if bad_rc(r, c): + raise ValueError(f"{name} position {pos} outside shrunk grid ({nr}x{nc})") + + g = out.get("goal") + if isinstance(g, dict) and g.get("type") == "reach_position": + t = g.get("target") + if isinstance(t, (list, tuple)) and len(t) == 2: + r, c = int(t[0]), int(t[1]) + if bad_rc(r, c): + raise ValueError(f"goal.target {t} outside shrunk grid ({nr}x{nc})") + + maze["dimensions"] = [nr, nc] + return out diff --git a/src/v2/nlu_pipeline/nlu_benchmark/renderer.py b/src/v2/nlu_pipeline/nlu_benchmark/renderer.py index ab00dff..2dc7823 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/renderer.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/renderer.py @@ -5,6 +5,7 @@ import importlib.util import sys from pathlib import Path +from typing import Any from nlu_benchmark.env import GridState @@ -128,3 +129,26 @@ def render_maze_image_png_bytes(state: GridState) -> bytes: agent_pos=(ar, ac), facing=state.facing, ) + + +def render_task_json_with_solver_path_png( + task_data: dict[str, Any], + solver_path_xy: list[tuple[int, int]], + output_path: Path, +) -> None: + """ + One static figure like ``automatic_maze_generation/render_dataset.py`` / ``main()``: + maze + mechanisms + semi-transparent optimal route. + + ``solver_path_xy`` is ``solve_maze(...)["path"]`` (mazegen 0-based ``(x, y)``; ``x`` = column index, ``y`` = row index). + """ + optimal_path_rc = [[y + 1, x + 1] for (x, y) in solver_path_xy] + payload = { + **task_data, + "validation": { + **task_data.get("validation", {}), + "optimal_path": optimal_path_rc, + }, + } + mod = _render_dataset_module() + mod.render_maze_payload(payload, output_path) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/__init__.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_benchmark_mazes.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_benchmark_mazes.py new file mode 100644 index 0000000..d15d92c --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_benchmark_mazes.py @@ -0,0 +1,81 @@ +""" +For each ``nlu_benchmark/benchmark_mazes/**/*.json``: + +0. Apply :func:`~nlu_benchmark.loader.task_dict_shrink_dimensions_minus_two` (labeled 10×10 → 8×8 grid; coordinates unchanged). +1. Run mazegen :func:`~automatic_maze_generation.mazegen.solver.solve_maze` (no ``validate_maze``). +2. If solvable, write **one** PNG (same style as ``automatic_maze_generation/render_dataset.py``): + maze + mechanisms + overlaid optimal path, under + ``smoke_tests/results/benchmark_png//.png``. + +Run from repo root:: + + PYTHONPATH=src/v2 python3 src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_benchmark_mazes.py +""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[2] +V2_ROOT = Path(__file__).resolve().parents[3] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) +if str(V2_ROOT) not in sys.path: + sys.path.insert(0, str(V2_ROOT)) + +from automatic_maze_generation.mazegen.solver import solve_maze # noqa: E402 +from nlu_benchmark.loader import maze_instance_from_task_dict, task_dict_shrink_dimensions_minus_two # noqa: E402 +from nlu_benchmark.renderer import render_task_json_with_solver_path_png # noqa: E402 + +_SCRIPT_DIR = Path(__file__).resolve().parent +_PNG_ROOT = _SCRIPT_DIR / "results" / "benchmark_png" + + +def main() -> None: + base = ROOT / "nlu_benchmark" / "benchmark_mazes" + if not base.is_dir(): + print(f"error: {base} not found (add benchmark JSONs there)", file=sys.stderr) + sys.exit(2) + + paths = sorted(base.glob("**/*.json")) + if not paths: + print(f"warning: no JSON files under {base}", file=sys.stderr) + sys.exit(0) + + failed = 0 + failures: list[tuple[Path, str]] = [] + for path in paths: + rel = path.relative_to(base) + try: + text = path.read_text(encoding="utf-8") + data = task_dict_shrink_dimensions_minus_two(json.loads(text)) + result = solve_maze(maze_instance_from_task_dict(data)) + if not result.get("is_solvable"): + failed += 1 + msg = "not solvable" + failures.append((rel, msg)) + print(f"FAIL {rel}: {msg}", flush=True) + continue + out_png = _PNG_ROOT / rel.parent / f"{path.stem}.png" + out_png.parent.mkdir(parents=True, exist_ok=True) + render_task_json_with_solver_path_png(data, result.get("path", []), out_png) + print(f"ok {rel} cost={result.get('optimal_cost')} png={out_png}") + except Exception as e: + failed += 1 + msg = str(e) + failures.append((rel, msg)) + print(f"FAIL {rel}: {msg}", file=sys.stderr, flush=True) + + print(f"\n{len(paths)} files, {failed} failed") + if failures: + print("Failed files:", file=sys.stderr) + for rel, msg in failures: + print(f" - {rel}: {msg}", file=sys.stderr) + print(f"PNGs under {_PNG_ROOT}") + sys.exit(1 if failed else 0) + + +if __name__ == "__main__": + main() diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_bfs.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_bfs.py index dd1350a..2cbdad6 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_bfs.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_bfs.py @@ -11,100 +11,7 @@ if str(V2_ROOT) not in sys.path: sys.path.insert(0, str(V2_ROOT)) -from nlu_benchmark.env import FACING_ORDER, FACING_TO_DELTA -from nlu_benchmark.loader import load_maze -from nlu_benchmark.smoke_trace import trace_prepare, trace_reset, trace_step, trace_write_text_artifacts -from automatic_maze_generation.mazegen.models import Door, Gate, Key, MazeInstance, Switch -from automatic_maze_generation.mazegen.solver import solve_maze - - -def _state_to_maze_instance(st) -> MazeInstance: - def rc_to_xy(pos): - r, c = pos - # NLU grids are 1-based (row, col); mazegen solver uses 0-based (x, y). - return (c - 1, r - 1) - - return MazeInstance( - width=st.cols, - height=st.rows, - walls={rc_to_xy(w) for w in st.walls}, - start=rc_to_xy(st.start), - goal=rc_to_xy(st.goal), - keys=[ - Key(id=k.get("id", f"key_{i}"), position=rc_to_xy(tuple(k["position"])), color=k["color"]) - for i, k in enumerate(st.keys) - ], - doors=[ - Door( - id=d.get("id", f"door_{i}"), - position=rc_to_xy(tuple(d["position"])), - requires_key=d["requires_key"], - initial_state=d.get("initial_state", "locked"), - ) - for i, d in enumerate(st.doors) - ], - switches=[ - Switch( - id=s.get("id", f"switch_{i}"), - position=rc_to_xy(tuple(s["position"])), - controls=list(s.get("controls", [])), - switch_type=s.get("switch_type", "toggle"), - initial_state=s.get("initial_state", "off"), - ) - for i, s in enumerate(st.switches) - ], - gates=[ - Gate( - id=g.get("id", f"gate_{i}"), - position=rc_to_xy(tuple(g["position"])), - initial_state=g.get("initial_state", "closed"), - ) - for i, g in enumerate(st.gates) - ], - ) - - -def _path_to_actions(path, start_facing: str = "NORTH") -> list[str]: - if not path or len(path) < 2: - return ["DONE"] - facing = start_facing - actions: list[str] = [] - for (r, c), (nr, nc) in zip(path, path[1:]): - dr, dc = nr - r, nc - c - target = next((f for f, d in FACING_TO_DELTA.items() if d == (dr, dc)), None) - if target is None: - continue - cur_idx = FACING_ORDER.index(facing) - tgt_idx = FACING_ORDER.index(target) - diff = (tgt_idx - cur_idx) % 4 - if diff == 1: - actions.append("TURN_RIGHT") - elif diff == 2: - actions.extend(["TURN_RIGHT", "TURN_RIGHT"]) - elif diff == 3: - actions.append("TURN_LEFT") - actions.append("MOVE_FORWARD") - facing = target - actions.append("DONE") - return actions - - -def _xy_path_to_rc(path_xy) -> list[tuple[int, int]]: - return [(y + 1, x + 1) for (x, y) in path_xy] - - -def _inject_pickups(actions: list[str], env, state) -> list[str]: - """Nlu env needs explicit PICKUP; solver assumes pickup-on-entry.""" - out: list[str] = [] - sim_state = state - for a in actions: - has_key_here = any(tuple(k["position"]) == sim_state.agent_pos for k in sim_state.keys) - if has_key_here and a != "PICKUP": - out.append("PICKUP") - sim_state, _ = env.step("PICKUP") - out.append(a) - sim_state, _ = env.step(a) - return out +from nlu_benchmark.smoke_tests.solver_plan_trace import write_png_trace_for_maze_json # noqa: E402 def main() -> None: @@ -117,33 +24,14 @@ def main() -> None: maze_stem = Path(args.maze).stem suffix = f"_{args.tag}" if args.tag else "" out_dir = Path(__file__).resolve().parent / "results" / f"smoke_{maze_stem}_bfs{suffix}" - trace_prepare(out_dir) - env_plan = load_maze(maze_path) - plan_state = env_plan.reset() - maze_inst = _state_to_maze_instance(plan_state) - solver_result = solve_maze(maze_inst) - if not solver_result.get("is_solvable"): + + r = write_png_trace_for_maze_json(maze_path, out_dir) + if not r["ok"]: print("Solver reported unsolvable maze.") return - path_rc = _xy_path_to_rc(solver_result.get("path", [])) - planned_actions = _path_to_actions(path_rc, start_facing="NORTH") - executable_actions = _inject_pickups(planned_actions, env_plan, plan_state) - - env = load_maze(maze_path) - state = env.reset() - - lines = trace_reset(out_dir, state) - - for step, action in enumerate(executable_actions, start=1): - before = state.agent_pos - state, event = trace_step(out_dir, lines, step, action, env, position_before=before) - if event.type == "DONE": - break - - trace_write_text_artifacts(out_dir, lines, executable_actions) - print(f"\nsuccess={state.agent_pos == state.goal}") - print(f"steps_used={state.step_count}") - print(f"out={out_dir}") + print(f"\nsuccess={r['success']}") + print(f"steps_used={r['steps_used']}") + print(f"out={r['out_dir']}") if __name__ == "__main__": diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py index 3e3ce2b..004b1ba 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py @@ -18,10 +18,9 @@ from nlu_benchmark.config import ExperimentConfig from nlu_benchmark.env import FACING_ORDER, FACING_TO_DELTA -from nlu_benchmark.loader import load_maze +from nlu_benchmark.loader import load_maze, grid_state_to_maze_instance from nlu_benchmark.runner import ExperimentRunner import nlu_benchmark.observation as observation_module -from automatic_maze_generation.mazegen.models import Door, Gate, Key, MazeInstance, Switch from automatic_maze_generation.mazegen.solver import solve_maze @@ -88,51 +87,6 @@ def _plan_to_goal_from_prompt(user_text: str, budget: int = 6) -> list[str]: return actions[:budget] if actions else ["DONE"] -def _state_to_maze_instance(st) -> MazeInstance: - def rc_to_xy(pos): - r, c = pos - return (c - 1, r - 1) - - return MazeInstance( - width=st.cols, - height=st.rows, - walls={rc_to_xy(w) for w in st.walls}, - start=rc_to_xy(st.start), - goal=rc_to_xy(st.goal), - keys=[ - Key(id=k.get("id", f"key_{i}"), position=rc_to_xy(tuple(k["position"])), color=k["color"]) - for i, k in enumerate(st.keys) - ], - doors=[ - Door( - id=d.get("id", f"door_{i}"), - position=rc_to_xy(tuple(d["position"])), - requires_key=d["requires_key"], - initial_state=d.get("initial_state", "locked"), - ) - for i, d in enumerate(st.doors) - ], - switches=[ - Switch( - id=s.get("id", f"switch_{i}"), - position=rc_to_xy(tuple(s["position"])), - controls=list(s.get("controls", [])), - switch_type=s.get("switch_type", "toggle"), - initial_state=s.get("initial_state", "off"), - ) - for i, s in enumerate(st.switches) - ], - gates=[ - Gate( - id=g.get("id", f"gate_{i}"), - position=rc_to_xy(tuple(g["position"])), - initial_state=g.get("initial_state", "closed"), - ) - for i, g in enumerate(st.gates) - ], - ) - - def _xy_path_to_rc(path_xy) -> list[tuple[int, int]]: return [(y + 1, x + 1) for (x, y) in path_xy] @@ -178,7 +132,7 @@ def _inject_pickups(actions: list[str], env, state) -> list[str]: def _full_trajectory_actions_for_maze(maze_path: Path) -> list[str]: env = load_maze(maze_path) state = env.reset() - maze_inst = _state_to_maze_instance(state) + maze_inst = grid_state_to_maze_instance(state) solver_result = solve_maze(maze_inst) if not solver_result.get("is_solvable"): return ["DONE"] diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/solver_plan_trace.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/solver_plan_trace.py new file mode 100644 index 0000000..5c553df --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/solver_plan_trace.py @@ -0,0 +1,99 @@ +"""Build mazegen solver plan for a task JSON maze and replay it in :class:`~nlu_benchmark.env.GridWorldEnv` with PNG traces (same outputs as ``smoke_bfs``).""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from automatic_maze_generation.mazegen.solver import solve_maze +from nlu_benchmark.env import FACING_ORDER, FACING_TO_DELTA +from nlu_benchmark.loader import grid_state_to_maze_instance, load_maze +from nlu_benchmark.smoke_trace import trace_prepare, trace_reset, trace_step, trace_write_text_artifacts + + +def path_to_actions(path: list[tuple[int, int]], start_facing: str = "NORTH") -> list[str]: + if not path or len(path) < 2: + return ["DONE"] + facing = start_facing + actions: list[str] = [] + for (r, c), (nr, nc) in zip(path, path[1:]): + dr, dc = nr - r, nc - c + target = next((f for f, d in FACING_TO_DELTA.items() if d == (dr, dc)), None) + if target is None: + continue + cur_idx = FACING_ORDER.index(facing) + tgt_idx = FACING_ORDER.index(target) + diff = (tgt_idx - cur_idx) % 4 + if diff == 1: + actions.append("TURN_RIGHT") + elif diff == 2: + actions.extend(["TURN_RIGHT", "TURN_RIGHT"]) + elif diff == 3: + actions.append("TURN_LEFT") + actions.append("MOVE_FORWARD") + facing = target + actions.append("DONE") + return actions + + +def xy_path_to_rc(path_xy: list[tuple[int, int]]) -> list[tuple[int, int]]: + return [(y + 1, x + 1) for (x, y) in path_xy] + + +def inject_pickups(actions: list[str], env: Any, state: Any) -> list[str]: + """NLU env needs explicit PICKUP; solver assumes pickup-on-entry.""" + out: list[str] = [] + sim_state = state + for a in actions: + has_key_here = any(tuple(k["position"]) == sim_state.agent_pos for k in sim_state.keys) + if has_key_here and a != "PICKUP": + out.append("PICKUP") + sim_state, _ = env.step("PICKUP") + out.append(a) + sim_state, _ = env.step(a) + return out + + +def write_png_trace_for_maze_json(maze_path: Path, out_dir: Path) -> dict[str, Any]: + """ + Solve ``maze_path``, replay the plan in the NLU env, write ``step_*.png``, ``run_log.txt``, ``plan.txt`` under ``out_dir``. + + Returns a dict with keys ``ok`` (bool), ``optimal_cost``, ``success``, ``steps_used``, ``out_dir``, + and on failure ``reason`` (str) or ``solver_result``. + """ + trace_prepare(out_dir) + env_plan = load_maze(maze_path) + plan_state = env_plan.reset() + maze_inst = grid_state_to_maze_instance(plan_state) + solver_result = solve_maze(maze_inst) + if not solver_result.get("is_solvable"): + return { + "ok": False, + "out_dir": out_dir, + "solver_result": solver_result, + "reason": "solver reported unsolvable", + } + + path_rc = xy_path_to_rc(solver_result.get("path", [])) + planned_actions = path_to_actions(path_rc, start_facing="NORTH") + executable_actions = inject_pickups(planned_actions, env_plan, plan_state) + + env = load_maze(maze_path) + state = env.reset() + lines = trace_reset(out_dir, state) + + for step, action in enumerate(executable_actions, start=1): + before = state.agent_pos + state, event = trace_step(out_dir, lines, step, action, env, position_before=before) + if event.type == "DONE": + break + + trace_write_text_artifacts(out_dir, lines, executable_actions) + success = state.agent_pos == state.goal + return { + "ok": True, + "out_dir": out_dir, + "optimal_cost": solver_result.get("optimal_cost"), + "success": success, + "steps_used": state.step_count, + } From f3f3f61fa5e1c77df2d8df7b344449668394742b Mon Sep 17 00:00:00 2001 From: Arushi Jain Date: Mon, 4 May 2026 01:58:35 +0000 Subject: [PATCH 07/14] Store paths in benchmark solver --- .../smoke_tests/smoke_benchmark_mazes.py | 86 +++++++++++++++++-- 1 file changed, 79 insertions(+), 7 deletions(-) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_benchmark_mazes.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_benchmark_mazes.py index d15d92c..5bfc736 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_benchmark_mazes.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_benchmark_mazes.py @@ -5,7 +5,11 @@ 1. Run mazegen :func:`~automatic_maze_generation.mazegen.solver.solve_maze` (no ``validate_maze``). 2. If solvable, write **one** PNG (same style as ``automatic_maze_generation/render_dataset.py``): maze + mechanisms + overlaid optimal path, under - ``smoke_tests/results/benchmark_png//.png``. + ``smoke_tests/results/benchmark_solver//.png``. +3. Write ``smoke_tests/results/benchmark_solver/benchmark_mazes_metadata.csv`` with columns: + ``rel_path``, ``chain_pattern``, ``is_solvable``, ``optimal_cost``, ``optimal_path``, ``n_interactions``, ``error``. + ``optimal_path`` is a JSON list of ``[x, y]`` cells in mazegen 0-based coordinates (column, row), + only filled when ``is_solvable``; otherwise empty. Run from repo root:: @@ -14,6 +18,7 @@ from __future__ import annotations +import csv import json import sys from pathlib import Path @@ -30,7 +35,34 @@ from nlu_benchmark.renderer import render_task_json_with_solver_path_png # noqa: E402 _SCRIPT_DIR = Path(__file__).resolve().parent -_PNG_ROOT = _SCRIPT_DIR / "results" / "benchmark_png" +_RESULTS_ROOT = _SCRIPT_DIR / "results" +_BENCHMARK_SOLVER_DIR = _RESULTS_ROOT / "benchmark_solver" +_CSV_PATH = _BENCHMARK_SOLVER_DIR / "benchmark_mazes_metadata.csv" + +_CSV_FIELDNAMES = [ + "rel_path", + "chain_pattern", + "is_solvable", + "optimal_cost", + "optimal_path", + "n_interactions", + "error", +] + + +def _fill_solver_columns(row: dict[str, object], result: dict) -> None: + solvable = bool(result.get("is_solvable")) + inter = result.get("interactions") or [] + cost = result.get("optimal_cost") + row["is_solvable"] = solvable + row["optimal_cost"] = "" if cost is None else cost + row["n_interactions"] = len(inter) + if solvable: + pts = result.get("path") or [] + as_lists = [[int(x), int(y)] for x, y in pts] + row["optimal_path"] = json.dumps(as_lists, ensure_ascii=False) + else: + row["optimal_path"] = "" def main() -> None: @@ -44,36 +76,76 @@ def main() -> None: print(f"warning: no JSON files under {base}", file=sys.stderr) sys.exit(0) + _RESULTS_ROOT.mkdir(parents=True, exist_ok=True) + _BENCHMARK_SOLVER_DIR.mkdir(parents=True, exist_ok=True) + csv_rows: list[dict[str, object]] = [] + failed = 0 failures: list[tuple[Path, str]] = [] for path in paths: rel = path.relative_to(base) + row: dict[str, object] = { + "rel_path": str(rel), + "chain_pattern": "", + "is_solvable": "", + "optimal_cost": "", + "optimal_path": "", + "n_interactions": "", + "error": "", + } + try: text = path.read_text(encoding="utf-8") - data = task_dict_shrink_dimensions_minus_two(json.loads(text)) + raw = json.loads(text) + data = task_dict_shrink_dimensions_minus_two(raw) + row["chain_pattern"] = (raw.get("metadata") or {}).get("chain_pattern", "") + except Exception as e: + failed += 1 + msg = str(e) + row["error"] = msg + failures.append((rel, msg)) + print(f"FAIL {rel}: {msg}", file=sys.stderr, flush=True) + csv_rows.append(row) + continue + + try: result = solve_maze(maze_instance_from_task_dict(data)) - if not result.get("is_solvable"): + _fill_solver_columns(row, result) + solvable = bool(result.get("is_solvable")) + if not solvable: failed += 1 msg = "not solvable" + row["error"] = msg failures.append((rel, msg)) print(f"FAIL {rel}: {msg}", flush=True) + csv_rows.append(row) continue - out_png = _PNG_ROOT / rel.parent / f"{path.stem}.png" + + path_pts = result.get("path") or [] + out_png = _BENCHMARK_SOLVER_DIR / rel.parent / f"{path.stem}.png" out_png.parent.mkdir(parents=True, exist_ok=True) - render_task_json_with_solver_path_png(data, result.get("path", []), out_png) + render_task_json_with_solver_path_png(data, path_pts, out_png) print(f"ok {rel} cost={result.get('optimal_cost')} png={out_png}") except Exception as e: failed += 1 msg = str(e) + row["error"] = msg failures.append((rel, msg)) print(f"FAIL {rel}: {msg}", file=sys.stderr, flush=True) + csv_rows.append(row) + + with _CSV_PATH.open("w", encoding="utf-8", newline="") as f: + w = csv.DictWriter(f, fieldnames=_CSV_FIELDNAMES, extrasaction="ignore") + w.writeheader() + w.writerows(csv_rows) print(f"\n{len(paths)} files, {failed} failed") if failures: print("Failed files:", file=sys.stderr) for rel, msg in failures: print(f" - {rel}: {msg}", file=sys.stderr) - print(f"PNGs under {_PNG_ROOT}") + print(f"Outputs under {_BENCHMARK_SOLVER_DIR}") + print(f"CSV {_CSV_PATH}") sys.exit(1 if failed else 0) From d4cec0707df9d85d15e29f5ad4dc860a93c16f81 Mon Sep 17 00:00:00 2001 From: Arushi Jain Date: Tue, 5 May 2026 05:40:29 +0000 Subject: [PATCH 08/14] Add failing mazes png --- .../smoke_tests/smoke_benchmark_mazes.py | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_benchmark_mazes.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_benchmark_mazes.py index 5bfc736..3ed3d67 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_benchmark_mazes.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_benchmark_mazes.py @@ -3,9 +3,9 @@ 0. Apply :func:`~nlu_benchmark.loader.task_dict_shrink_dimensions_minus_two` (labeled 10×10 → 8×8 grid; coordinates unchanged). 1. Run mazegen :func:`~automatic_maze_generation.mazegen.solver.solve_maze` (no ``validate_maze``). -2. If solvable, write **one** PNG (same style as ``automatic_maze_generation/render_dataset.py``): - maze + mechanisms + overlaid optimal path, under - ``smoke_tests/results/benchmark_solver//.png``. +2. Write **one** PNG (same style as ``automatic_maze_generation/render_dataset.py``) under + ``smoke_tests/results/benchmark_solver//.png``: + if solvable, maze + mechanisms + overlaid optimal path; if not solvable, maze + mechanisms only (no path). 3. Write ``smoke_tests/results/benchmark_solver/benchmark_mazes_metadata.csv`` with columns: ``rel_path``, ``chain_pattern``, ``is_solvable``, ``optimal_cost``, ``optimal_path``, ``n_interactions``, ``error``. ``optimal_path`` is a JSON list of ``[x, y]`` cells in mazegen 0-based coordinates (column, row), @@ -112,20 +112,14 @@ def main() -> None: result = solve_maze(maze_instance_from_task_dict(data)) _fill_solver_columns(row, result) solvable = bool(result.get("is_solvable")) - if not solvable: - failed += 1 - msg = "not solvable" - row["error"] = msg - failures.append((rel, msg)) - print(f"FAIL {rel}: {msg}", flush=True) - csv_rows.append(row) - continue - - path_pts = result.get("path") or [] + path_pts = (result.get("path") or []) if solvable else [] out_png = _BENCHMARK_SOLVER_DIR / rel.parent / f"{path.stem}.png" out_png.parent.mkdir(parents=True, exist_ok=True) render_task_json_with_solver_path_png(data, path_pts, out_png) - print(f"ok {rel} cost={result.get('optimal_cost')} png={out_png}") + if solvable: + print(f"ok {rel} cost={result.get('optimal_cost')} png={out_png}") + else: + print(f"ok {rel} not solvable png={out_png} (no path)") except Exception as e: failed += 1 msg = str(e) From ec69deabd5274ad9fc364735bf45fcd814605376 Mon Sep 17 00:00:00 2001 From: Arushi Jain Date: Tue, 5 May 2026 06:42:31 +0000 Subject: [PATCH 09/14] Smoke test claude --- src/v2/nlu_pipeline/nlu_benchmark/agents.py | 25 ++- src/v2/nlu_pipeline/nlu_benchmark/runner.py | 63 +++++- .../nlu_benchmark/smoke_tests/smoke_claude.py | 83 ------- .../nlu_benchmark/smoke_tests/smoke_llm.py | 204 ++++++++++++++++++ .../nlu_pipeline/nlu_benchmark/smoke_trace.py | 5 + 5 files changed, 293 insertions(+), 87 deletions(-) delete mode 100644 src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_claude.py create mode 100644 src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_llm.py diff --git a/src/v2/nlu_pipeline/nlu_benchmark/agents.py b/src/v2/nlu_pipeline/nlu_benchmark/agents.py index 83252af..48646e2 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/agents.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/agents.py @@ -1,7 +1,9 @@ from __future__ import annotations import json +import logging import os +import time import urllib.error import urllib.request from dataclasses import dataclass, field @@ -13,11 +15,11 @@ os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "0") os.environ.setdefault("HF_HUB_DISABLE_XET", "1") +logger = logging.getLogger(__name__) DEFAULT_LOCAL_MODEL = "HuggingFaceTB/SmolLM2-360M-Instruct" - -DEFAULT_CLAUDE_MODEL = "claude-sonnet-4-20250514" +DEFAULT_CLAUDE_MODEL = "claude-sonnet-4-6" def _parse_data_image_url(url: str) -> tuple[str, str]: @@ -100,9 +102,13 @@ def _anthropic_messages_http( if system: body["system"] = system + raw = json.dumps(body).encode("utf-8") + if logger.isEnabledFor(logging.DEBUG): + logger.debug("Anthropic request: model=%s json_bytes=%d", model, len(raw)) + req = urllib.request.Request( "https://api.anthropic.com/v1/messages", - data=json.dumps(body).encode("utf-8"), + data=raw, headers={ "Content-Type": "application/json", "x-api-key": api_key, @@ -110,12 +116,16 @@ def _anthropic_messages_http( }, method="POST", ) + t0 = time.perf_counter() try: with urllib.request.urlopen(req, timeout=timeout or 60.0) as resp: payload = json.loads(resp.read().decode()) except urllib.error.HTTPError as e: detail = e.read().decode(errors="replace") raise RuntimeError(f"Anthropic API HTTP {e.code}: {detail}") from e + elapsed = time.perf_counter() - t0 + if logger.isEnabledFor(logging.DEBUG): + logger.debug("Anthropic Messages API: model=%s elapsed=%.2fs", model, elapsed) parts: List[str] = [] for block in payload.get("content", []) or []: @@ -198,12 +208,21 @@ def __call__(self, messages: List[Dict[str, str]]) -> str: inputs = self.tokenizer(prompt, return_tensors="pt") inputs = {k: v.to(self.model.device) for k, v in inputs.items()} + t0 = time.perf_counter() generated = self.model.generate( **inputs, max_new_tokens=self.config.max_new_tokens, temperature=self.config.temperature, do_sample=self.config.temperature > 0, ) + gen_s = time.perf_counter() - t0 + if logger.isEnabledFor(logging.DEBUG): + logger.debug( + "Local generate: model=%s elapsed=%.2fs prompt_tokens=%d", + self.config.model, + gen_s, + inputs["input_ids"].shape[1], + ) prompt_len = inputs["input_ids"].shape[1] new_tokens = generated[0][prompt_len:] diff --git a/src/v2/nlu_pipeline/nlu_benchmark/runner.py b/src/v2/nlu_pipeline/nlu_benchmark/runner.py index 39427ee..38d4485 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/runner.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/runner.py @@ -10,6 +10,12 @@ result = runner.run(agent) # verbose=True: print progress result = runner.run(agent, verbose=False) # quiet for batch runs +Or enable library logging in your script:: + + import logging + logging.basicConfig(level=logging.INFO) + result = runner.run(agent, verbose=False) + Or from a JSON file directly: runner = ExperimentRunner.from_json("path/to/maze.json", config=cfg) @@ -18,8 +24,19 @@ from __future__ import annotations +import logging +import time from typing import Callable, List, Optional +logger = logging.getLogger(__name__) + + +def _user_message_has_image(message: dict) -> bool: + content = message.get("content") + if not isinstance(content, list): + return False + return any(isinstance(b, dict) and b.get("type") == "image_url" for b in content) + from nlu_benchmark.config import ExperimentConfig from nlu_benchmark.feedback import action_feedback_for_prompt, format_step_feedback from nlu_benchmark.observation import ObservationBuilder @@ -137,18 +154,54 @@ def run(self, agent: Callable[[List[dict]], str], *, verbose: bool = True) -> di consecutive_failures = 0 transcript: List[dict] = [] max_steps = self.env.initial.max_steps + query_count = 0 + + if logger.isEnabledFor(logging.INFO): + logger.info( + "Episode start: max_steps=%s querying=%s observation=%s context_window=%s", + max_steps, + self.config.querying, + self.config.observation, + self.config.context_window, + ) while state.step_count < max_steps: # --- Query model if needed --- if self.querying.should_query(action_queue, consecutive_failures): consecutive_failures = 0 - messages.append(self._build_message(state, last_feedback)) + query_count += 1 + user_message = self._build_message(state, last_feedback) + has_image = _user_message_has_image(user_message) + messages.append(user_message) + if logger.isEnabledFor(logging.INFO): + logger.info( + "LLM query #%d: messages_in_context=%d current_turn_has_image=%s", + query_count, + len(messages), + has_image, + ) + t_llm = time.perf_counter() model_text = agent(messages) + llm_s = time.perf_counter() - t_llm messages.append({"role": "assistant", "content": model_text}) action_queue = self.querying.parse_actions(model_text) + if logger.isEnabledFor(logging.INFO): + logger.info( + "LLM query #%d finished in %.2fs: reply_chars=%d actions_parsed=%d", + query_count, + llm_s, + len(model_text), + len(action_queue), + ) + if logger.isEnabledFor(logging.DEBUG): + logger.debug("LLM query #%d reply preview: %s", query_count, model_text[:4000]) if not action_queue: + logger.warning( + "LLM query #%d: no valid actions parsed; retrying with parser feedback", + query_count, + ) last_feedback = ( f"Could not parse FINAL_OUTPUT (one or more valid actions). " f"Use only: {ACTIONS_HINT}." @@ -187,6 +240,8 @@ def run(self, agent: Callable[[List[dict]], str], *, verbose: bool = True) -> di self.obs.record(state.agent_pos, state.facing, action, last_feedback) if event_type == "DONE": + if logger.isEnabledFor(logging.INFO): + logger.info("Episode success at env step %s (LLM queries=%d)", state.step_count, query_count) if verbose: print(f" Success at step {state.step_count}") return self._result(True, state, transcript) @@ -194,6 +249,12 @@ def run(self, agent: Callable[[List[dict]], str], *, verbose: bool = True) -> di if verbose: print(f" Step {state.step_count}/{max_steps}: {action} -> {event_type}") + if logger.isEnabledFor(logging.INFO): + logger.info( + "Episode ended without DONE: env_steps=%s success=false LLM_queries=%d", + state.step_count, + query_count, + ) return self._result(False, state, transcript) # ------------------------------------------------------------------ diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_claude.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_claude.py deleted file mode 100644 index 4707e5f..0000000 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_claude.py +++ /dev/null @@ -1,83 +0,0 @@ -from __future__ import annotations - -import argparse -import os -import sys -from pathlib import Path - -ROOT = Path(__file__).resolve().parents[2] -if str(ROOT) not in sys.path: - sys.path.insert(0, str(ROOT)) -V2_ROOT = Path(__file__).resolve().parents[3] -if str(V2_ROOT) not in sys.path: - sys.path.insert(0, str(V2_ROOT)) - -from nlu_benchmark.agents import ClaudeAnthropicAgent, ClaudeAnthropicConfig -from nlu_benchmark.loader import load_maze -from nlu_benchmark.runner import ExperimentRunner -from nlu_benchmark.smoke_trace import trace_prepare, trace_reset, trace_step, trace_write_text_artifacts - - -def _ensure_anthropic_api_key() -> None: - if os.environ.get("ANTHROPIC_API_KEY"): - return - for directory in Path(__file__).resolve().parents: - key_file = directory / "api_key.txt" - if key_file.is_file(): - os.environ["ANTHROPIC_API_KEY"] = key_file.read_text().strip() - return - - -def main() -> None: - parser = argparse.ArgumentParser( - description="Smoke test: Claude agent episode in NLU env (PNG trace under results/smoke_*_claude/).", - ) - parser.add_argument("--maze", default="V04_single_key.json", help="Maze JSON filename under sample mazes/") - parser.add_argument("--tag", default="", help="Optional output tag suffix.") - args = parser.parse_args() - - maze_path = ROOT / "nlu_benchmark" / "sample mazes" / args.maze - maze_stem = Path(args.maze).stem - suffix = f"_{args.tag}" if args.tag else "" - out_dir = Path(__file__).resolve().parent / "results" / f"smoke_{maze_stem}_claude{suffix}" - - if not maze_path.is_file(): - print(f"Missing maze file: {maze_path}") - return - - trace_prepare(out_dir) - - _ensure_anthropic_api_key() - - runner = ExperimentRunner.from_json(str(maze_path.resolve())) - agent = ClaudeAnthropicAgent(config=ClaudeAnthropicConfig()) - - try: - result = runner.run(agent, verbose=False) - except Exception as e: - print(f"runner.run raised: {e}") - return - - transcript = result["transcript"] - planned_actions = [rec["action"] for rec in transcript] - - env = load_maze(maze_path) - state = env.reset() - - lines = trace_reset(out_dir, state) - - for step, action in enumerate(planned_actions, start=1): - before = state.agent_pos - state, event = trace_step(out_dir, lines, step, action, env, position_before=before) - if event.type == "DONE": - break - - trace_write_text_artifacts(out_dir, lines, planned_actions) - - print(f"\nsuccess={state.agent_pos == state.goal}") - print(f"steps_used={state.step_count}") - print(f"out={out_dir}") - - -if __name__ == "__main__": - main() diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_llm.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_llm.py new file mode 100644 index 0000000..af72d7a --- /dev/null +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_llm.py @@ -0,0 +1,204 @@ +from __future__ import annotations + +import argparse +import json +import logging +import os +import sys +from pathlib import Path +from typing import Callable, Dict, List + +ROOT = Path(__file__).resolve().parents[2] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) +V2_ROOT = Path(__file__).resolve().parents[3] +if str(V2_ROOT) not in sys.path: + sys.path.insert(0, str(V2_ROOT)) + +from nlu_benchmark.agents import ( + ClaudeAnthropicAgent, + ClaudeAnthropicConfig, + DEFAULT_LOCAL_MODEL, + LocalLLMConfig, + LocalTransformersAgent, +) +from nlu_benchmark.loader import load_maze +from nlu_benchmark.runner import ExperimentRunner +from nlu_benchmark.smoke_trace import trace_prepare, trace_reset, trace_step, trace_write_text_artifacts + + +def _configure_benchmark_logging(level_name: str) -> None: + level = getattr(logging, level_name.upper(), logging.WARNING) + if level == logging.WARNING: + return + logging.basicConfig( + level=level, + format="%(asctime)s %(levelname)s [%(name)s] %(message)s", + datefmt="%H:%M:%S", + ) + + +def _ensure_anthropic_api_key() -> None: + if os.environ.get("ANTHROPIC_API_KEY"): + return + for directory in Path(__file__).resolve().parents: + key_file = directory / "api_key.txt" + if key_file.is_file(): + os.environ["ANTHROPIC_API_KEY"] = key_file.read_text().strip() + return + + +def _persist_llm_queries_jsonl(out_dir: Path, records: List[dict]) -> None: + if not records: + return + path = out_dir / "llm_queries.jsonl" + with path.open("w", encoding="utf-8") as f: + for row in records: + f.write(json.dumps(row, ensure_ascii=False) + "\n") + + +def _write_episode_json( + out_dir: Path, + result: Dict[str, object], + *, + backend: str, + model: str, +) -> None: + payload = { + "success": result["success"], + "steps_used": result["steps_used"], + "config": result["config"], + "transcript": result["transcript"], + "smoke": {"backend": backend, "model": model}, + } + (out_dir / "episode.json").write_text( + json.dumps(payload, indent=2, ensure_ascii=False, default=str), + encoding="utf-8", + ) + + +class _AgentRecorder: + """Delegates to a real agent and records each assistant reply for ``llm_queries.jsonl``.""" + + __slots__ = ("_inner", "_records", "_query_seq") + + def __init__(self, inner: Callable[[List[dict]], str], records: List[dict]) -> None: + self._inner = inner + self._records = records + self._query_seq = 0 + + def __call__(self, messages: List[dict]) -> str: + self._query_seq += 1 + text = self._inner(messages) + self._records.append( + { + "query": self._query_seq, + "messages_in_context": len(messages), + "reply": text, + } + ) + return text + + +def main() -> None: + parser = argparse.ArgumentParser( + description=( + "Smoke test: LLM-guided maze episode (PNG trace under results/). " + "Writes llm_queries.jsonl (model replies) and episode.json (transcript + config). " + "Anthropic runs in the cloud; --backend local uses Hugging Face. " + "--log-level INFO for query timing; -v for per-step prints." + ), + ) + parser.add_argument("--maze", default="V04_single_key.json", help="Maze JSON filename under sample mazes/") + parser.add_argument("--tag", default="", help="Optional output tag suffix.") + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="Print per-step progress from ExperimentRunner.", + ) + parser.add_argument( + "--log-level", + default="WARNING", + choices=["DEBUG", "INFO", "WARNING"], + help="Structured logs from nlu_benchmark (timestamps, LLM query stats; default: no extra logs).", + ) + parser.add_argument( + "--backend", + choices=("anthropic", "local"), + default="anthropic", + help="anthropic: Claude API (remote). local: Hugging Face Transformers on this machine.", + ) + parser.add_argument( + "--hf-model", + default=DEFAULT_LOCAL_MODEL, + help="With --backend local: Hugging Face model id (default: %(default)s).", + ) + parser.add_argument( + "--device-map", + default="auto", + help='With --backend local: passed to from_pretrained device_map (e.g. "auto", "cuda:0").', + ) + args = parser.parse_args() + _configure_benchmark_logging(args.log_level) + + maze_path = ROOT / "nlu_benchmark" / "sample mazes" / args.maze + maze_stem = Path(args.maze).stem + suffix = f"_{args.tag}" if args.tag else "" + out_slug = "claude" if args.backend == "anthropic" else "local" + out_dir = Path(__file__).resolve().parent / "results" / f"smoke_{maze_stem}_{out_slug}{suffix}" + + if not maze_path.is_file(): + print(f"Missing maze file: {maze_path}") + return + + trace_prepare(out_dir) + + runner = ExperimentRunner.from_json(str(maze_path.resolve())) + + query_log: List[dict] = [] + if args.backend == "anthropic": + _ensure_anthropic_api_key() + claude_cfg = ClaudeAnthropicConfig() + agent_inner = ClaudeAnthropicAgent(config=claude_cfg) + model_id = claude_cfg.model + else: + llm_cfg = LocalLLMConfig(model=args.hf_model, device_map=args.device_map) + agent_inner = LocalTransformersAgent(config=llm_cfg) + model_id = llm_cfg.model + + agent = _AgentRecorder(agent_inner, query_log) + + try: + result = runner.run(agent, verbose=args.verbose) + except Exception as e: + _persist_llm_queries_jsonl(out_dir, query_log) + print(f"runner.run raised: {e}") + return + + _persist_llm_queries_jsonl(out_dir, query_log) + _write_episode_json(out_dir, result, backend=args.backend, model=model_id) + + transcript = result["transcript"] + planned_actions = [rec["action"] for rec in transcript] + + env = load_maze(maze_path) + state = env.reset() + + lines = trace_reset(out_dir, state) + + for step, action in enumerate(planned_actions, start=1): + before = state.agent_pos + state, event = trace_step(out_dir, lines, step, action, env, position_before=before) + if event.type == "DONE": + break + + trace_write_text_artifacts(out_dir, lines, planned_actions) + + print(f"\nsuccess={state.agent_pos == state.goal}") + print(f"steps_used={state.step_count}") + print(f"out={out_dir}") + + +if __name__ == "__main__": + main() diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_trace.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_trace.py index 3c13a81..0e80359 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_trace.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_trace.py @@ -2,6 +2,7 @@ Writes ``step_000_reset.png``, ``step_NNN_.png``, ``run_log.txt``, ``plan.txt`` under a caller-chosen ``results/…`` directory. +``trace_prepare`` also removes ``*.json`` and ``*.jsonl`` there (e.g. LLM smoke sidecars). """ from __future__ import annotations @@ -18,6 +19,10 @@ def trace_prepare(out_dir: Path) -> None: p.unlink() for p in out_dir.glob("*.txt"): p.unlink() + for p in out_dir.glob("*.json"): + p.unlink() + for p in out_dir.glob("*.jsonl"): + p.unlink() def trace_reset(out_dir: Path, state: Any) -> list[str]: From 2dc4ac798471fef8d86c19b9413b7c4e2f152f20 Mon Sep 17 00:00:00 2001 From: Arushi Jain Date: Tue, 5 May 2026 08:29:30 +0000 Subject: [PATCH 10/14] Add claude smoke --- .../render_dataset.py | 25 ++++++---- src/v2/nlu_pipeline/nlu_benchmark/config.py | 18 +++++-- src/v2/nlu_pipeline/nlu_benchmark/feedback.py | 11 ++-- .../nlu_pipeline/nlu_benchmark/observation.py | 7 +-- src/v2/nlu_pipeline/nlu_benchmark/renderer.py | 18 ++++--- src/v2/nlu_pipeline/nlu_benchmark/runner.py | 50 ++++++++++++++----- .../smoke_tests/analyze_smoke_runner_logs.py | 6 +-- .../nlu_benchmark/smoke_tests/smoke_llm.py | 24 ++++++++- .../smoke_prompting_observation_querying.py | 4 +- .../nlu_pipeline/nlu_benchmark/smoke_trace.py | 13 +++-- 10 files changed, 129 insertions(+), 47 deletions(-) diff --git a/src/v2/automatic_maze_generation/render_dataset.py b/src/v2/automatic_maze_generation/render_dataset.py index 9c65d2d..6195206 100644 --- a/src/v2/automatic_maze_generation/render_dataset.py +++ b/src/v2/automatic_maze_generation/render_dataset.py @@ -42,8 +42,8 @@ def _row_col_payload_to_xy_payload(payload: dict) -> dict: def rc_to_xy(pos): r, c = pos - # Payloads use 1-based (row, col); drawing uses 0-based (x=col, y=row). - return [c - 1, r - 1] + # NLU JSON: 1-based (row, col). Legacy draw indices ``(x, y) = (c, r)`` (no ``-1``); matches historical traces. + return [c, r] dims = maze.get("dimensions") if dims and len(dims) == 2: @@ -195,8 +195,7 @@ def _draw_gate(ax, x: int, y: int, height: int, label: str): def _draw_agent(ax, ar: int, ac: int, height: int, facing: str) -> None: - """Overlay current agent (row, col) and facing; same cell coords as ``_draw_centered_text``.""" - # GridState uses (row, col). Rendering uses x=col, y=row (inverted vertical axis). + """Agent overlay; ``ar, ac`` are NLU **1-based** ``(row, col)`` (same as ``GridState``), matching ``rc_to_xy`` → ``[c, r]``.""" cx = ac + 0.5 cy = height - 1 - ar + 0.5 ax.plot( @@ -318,10 +317,14 @@ def _figure_from_maze_payload(payload: dict, title: str) -> Tuple[Any, Any, int] x, y = gate["position"] _draw_gate(ax, x, y, height, "G") - ax.set_title(title) + if title: + ax.set_title(title) ax.set_xlim(0, width) ax.set_ylim(0, height) - ax.set_aspect("equal", adjustable="box") + # Use default adjustable ("datalim"): ``adjustable="box"`` rescales the axes + # rectangle inside the subplot; with ``tight_layout`` + ``bbox_inches="tight"`` + # that often yields asymmetric white bands (e.g. extra empty rows/cols on one side). + ax.set_aspect("equal") ax.axis("off") return fig, ax, height @@ -342,11 +345,15 @@ def render_maze_payload_bytes( agent_pos: Optional[Tuple[int, int]] = None, facing: str = "NORTH", ) -> bytes: - """Same layout as ``render_maze_payload``, PNG bytes (e.g. NLU live observations).""" - title = str(payload.get("task_id", "maze")) + """Same layout as ``render_maze_payload``, PNG bytes (e.g. NLU live observations). + + ``agent_pos`` is NLU 1-based ``(row, col)``; passed through to ``_draw_agent`` unchanged (legacy pairing with ``rc_to_xy`` → ``[c, r]``). + """ + title = str(payload.get("task_id", "") or "") fig, ax, height = _figure_from_maze_payload(payload, title) if agent_pos is not None: - _draw_agent(ax, agent_pos[0], agent_pos[1], height, facing) + r1, c1 = int(agent_pos[0]), int(agent_pos[1]) + _draw_agent(ax, r1, c1, height, facing) plt.tight_layout() buf = BytesIO() fig.savefig(buf, format="png", dpi=dpi, bbox_inches="tight") diff --git a/src/v2/nlu_pipeline/nlu_benchmark/config.py b/src/v2/nlu_pipeline/nlu_benchmark/config.py index a96638f..9ce6af0 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/config.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/config.py @@ -17,22 +17,34 @@ class ExperimentConfig: observation text_only – initial NL maze in system; current situation text per user turn; last3 history image_text – same as text_only + live PNG each turn; last3 = full feedback - image_only – live PNG only (no NL map); last3 = action-only lines (default) + image_only – live PNG only (no NL map); last3 = action -> outcome lines (same strings as ``Last result:``) context_window current – only the current observation (no prior steps in the prompt) - last3 – last 3 steps as structured lines prepended to the prompt + last3 – last 3 steps as structured lines prepended to the prompt (default) querying step_by_step – one LLM call per env step (only the first action in FINAL_OUTPUT is used) subgoal – SUB_GOAL + ACTIONS list; re-queries when queue empty, stuck, or mid-budget full_trajectory – same format as subgoal, but exactly one LLM call per episode (no re-query) + + chat_history + stateless – each API call is only ``[system, current_user]`` (cheapest; no prior assistant text). + rolling – append user/assistant each query, but keep at most ``chat_turns_max`` prior + user+assistant **pairs** after ``system`` (default: good balance for vision + reasoning). + full – append without trimming (original behavior; high token use with images). + + chat_turns_max + For ``chat_history=\"rolling\"``: max number of full ``(user, assistant)`` rounds kept in the API + payload after ``system``. Ignored for ``stateless`` / ``full``. """ prompting: Literal["minimal", "standard", "verbose"] = "minimal" observation: Literal["text_only", "image_text", "image_only"] = "image_only" - context_window: Literal["current", "last3"] = "current" + context_window: Literal["current", "last3"] = "last3" querying: Literal["step_by_step", "subgoal", "full_trajectory"] = "step_by_step" + chat_history: Literal["stateless", "rolling", "full"] = "rolling" + chat_turns_max: int = 3 def to_dict(self) -> dict: return asdict(self) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/feedback.py b/src/v2/nlu_pipeline/nlu_benchmark/feedback.py index d58b1e5..afaa8f2 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/feedback.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/feedback.py @@ -7,10 +7,13 @@ ObservationKind = Literal["text_only", "image_text", "image_only"] -def action_feedback_for_prompt(observation: ObservationKind, text: str) -> str: - """Step outcomes for ``Last result:`` / history; cleared for ``image_only``. Parse failures stay visible in runner.""" - if observation == "image_only": - return "" +def action_feedback_for_prompt(_observation: ObservationKind, text: str) -> str: + """Step outcomes for ``Last result:`` and for observation history. + + All observation modes receive the same ``text`` (from :func:`format_step_feedback`). + ``image_only`` includes this so stateless API turns still see BLOCKED/MOVED/etc. without + relying on prior assistant messages. ``_observation`` is kept for call-site compatibility. + """ return text diff --git a/src/v2/nlu_pipeline/nlu_benchmark/observation.py b/src/v2/nlu_pipeline/nlu_benchmark/observation.py index 1a03496..ab428ab 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/observation.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/observation.py @@ -4,7 +4,8 @@ system message once per episode. Each user turn: ``render_user_observation_text``, last3 history, and live PNG when image is enabled. -* **image_only** – No initial NL map in system; live PNG each query; last3 history is action-only lines. +* **image_only** – No initial NL map in system; live PNG each query; last3 history lists + recent ``action → feedback`` lines (same feedback strings as ``Last result:``). ``build_image_blocks`` adds PNGs whenever observation is not ``text_only`` (see ``runner._build_message``). """ @@ -51,9 +52,9 @@ def history_text(self) -> str: return "" recs = self._history[-3:] if self._observation == "image_only": - lines = ["Recent steps (oldest first, action only):"] + lines = ["Recent steps (oldest first, action -> outcome):"] for rec in recs: - lines.append(f" {rec.action}") + lines.append(f" {rec.action} -> {rec.feedback}") return "\n".join(lines) lines = ["Recent history (last 3 steps, oldest first):"] for rec in recs: diff --git a/src/v2/nlu_pipeline/nlu_benchmark/renderer.py b/src/v2/nlu_pipeline/nlu_benchmark/renderer.py index 2dc7823..0eb306b 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/renderer.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/renderer.py @@ -28,10 +28,9 @@ def _render_dataset_module(): return _RENDER_DATASET_MOD -def _grid_state_to_maze_payload(state: GridState) -> dict: +def _grid_state_to_maze_payload(state: GridState, *, task_id: str = "") -> dict: """JSON-shaped maze dict for ``render_maze_payload`` / ``render_maze_payload_bytes``.""" - return { - "task_id": "nlu_live", + out: dict[str, Any] = { "maze": { # Unified convention: payloads are always (row, col). "dimensions": [state.rows, state.cols], @@ -46,6 +45,9 @@ def _grid_state_to_maze_payload(state: GridState) -> dict: "gates": [dict(g) for g in state.gates], }, } + if task_id: + out["task_id"] = task_id + return out def _static_layout_lines(state: GridState) -> list[str]: @@ -118,10 +120,14 @@ def render_user_observation_text(state: GridState) -> str: return "\n".join(head) -def render_maze_image_png_bytes(state: GridState) -> bytes: - """Render the current ``GridState`` to a PNG (same style as ``render_dataset.render_maze_payload``).""" +def render_maze_image_png_bytes(state: GridState, *, task_id: str = "") -> bytes: + """Render the current ``GridState`` to a PNG (same style as ``render_dataset.render_maze_payload``). + + ``task_id`` is only for the optional figure title (smoke replay uses the JSON id; LLM observations + default to empty so the title does not change ``tight_layout`` / margins). + """ mod = _render_dataset_module() - payload = _grid_state_to_maze_payload(state) + payload = _grid_state_to_maze_payload(state, task_id=task_id) ar, ac = state.agent_pos return mod.render_maze_payload_bytes( payload, diff --git a/src/v2/nlu_pipeline/nlu_benchmark/runner.py b/src/v2/nlu_pipeline/nlu_benchmark/runner.py index 38d4485..bfecacd 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/runner.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/runner.py @@ -37,6 +37,17 @@ def _user_message_has_image(message: dict) -> bool: return False return any(isinstance(b, dict) and b.get("type") == "image_url" for b in content) + +def _trim_rolling_chat(messages: List[dict], max_pairs: int) -> None: + """Keep ``messages[0]`` (system) and at most ``max_pairs`` following (user, assistant) pairs.""" + if max_pairs < 1 or len(messages) <= 1: + return + tail_len = len(messages) - 1 + cap = 2 * max_pairs + if tail_len > cap: + del messages[1 : 1 + (tail_len - cap)] + + from nlu_benchmark.config import ExperimentConfig from nlu_benchmark.feedback import action_feedback_for_prompt, format_step_feedback from nlu_benchmark.observation import ObservationBuilder @@ -89,7 +100,7 @@ def build_runner( # --------------------------------------------------------------------------- class ExperimentRunner: - """Runs a maze episode. Owns the full episode loop.""" + """Runs a maze episode. API chat style is set by ``ExperimentConfig.chat_history``.""" def __init__( self, @@ -131,11 +142,11 @@ def run(self, agent: Callable[[List[dict]], str], *, verbose: bool = True) -> di Returns ------- dict: - success – bool - steps_used – int - final_state – GridState - transcript – list[dict] with one record per executed action - config – dict, serialised ExperimentConfig for this run + success bool + steps_used int + final_state GridState + transcript list[dict] with one record per executed action + config dict, serialised ExperimentConfig for this run """ state = self.env.reset() self.obs.reset() @@ -147,7 +158,9 @@ def run(self, agent: Callable[[List[dict]], str], *, verbose: bool = True) -> di f"{system_prompt}\n\nInitial maze (fixed for this episode):\n" f"{render_initial_maze_text(state)}" ) - messages: List[dict] = [{"role": "system", "content": system_prompt}] + system_message = {"role": "system", "content": system_prompt} + chat_history = self.config.chat_history + messages: List[dict] = [system_message] if chat_history in ("rolling", "full") else [] action_queue: List[str] = [] last_feedback = "Episode start." @@ -158,11 +171,14 @@ def run(self, agent: Callable[[List[dict]], str], *, verbose: bool = True) -> di if logger.isEnabledFor(logging.INFO): logger.info( - "Episode start: max_steps=%s querying=%s observation=%s context_window=%s", + "Episode start: max_steps=%s querying=%s observation=%s context_window=%s " + "chat_history=%s chat_turns_max=%s", max_steps, self.config.querying, self.config.observation, self.config.context_window, + chat_history, + self.config.chat_turns_max if chat_history == "rolling" else "-", ) while state.step_count < max_steps: @@ -173,18 +189,26 @@ def run(self, agent: Callable[[List[dict]], str], *, verbose: bool = True) -> di query_count += 1 user_message = self._build_message(state, last_feedback) has_image = _user_message_has_image(user_message) - messages.append(user_message) + if chat_history == "stateless": + agent_messages: List[dict] = [system_message, user_message] + else: + messages.append(user_message) + agent_messages = messages if logger.isEnabledFor(logging.INFO): logger.info( - "LLM query #%d: messages_in_context=%d current_turn_has_image=%s", + "LLM query #%d: chat_history=%s messages_in_context=%d current_turn_has_image=%s", query_count, - len(messages), + chat_history, + len(agent_messages), has_image, ) t_llm = time.perf_counter() - model_text = agent(messages) + model_text = agent(agent_messages) llm_s = time.perf_counter() - t_llm - messages.append({"role": "assistant", "content": model_text}) + if chat_history != "stateless": + messages.append({"role": "assistant", "content": model_text}) + if chat_history == "rolling": + _trim_rolling_chat(messages, max(1, self.config.chat_turns_max)) action_queue = self.querying.parse_actions(model_text) if logger.isEnabledFor(logging.INFO): logger.info( diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/analyze_smoke_runner_logs.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/analyze_smoke_runner_logs.py index c6514c3..49c7d8f 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/analyze_smoke_runner_logs.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/analyze_smoke_runner_logs.py @@ -69,11 +69,11 @@ def main() -> None: if len(queries) >= 2: second_text = queries[1]["user_text"] has_recent = "Recent history (last 3 steps, oldest first):" in second_text - has_action_only = "Recent steps (oldest first, action only):" in second_text - if cfg["context_window"] == "current" and (has_recent or has_action_only): + has_action_outcome = "Recent steps (oldest first, action -> outcome):" in second_text + if cfg["context_window"] == "current" and (has_recent or has_action_outcome): issues.append((label, "current_has_history")) if cfg["context_window"] == "last3": - if obs == "image_only" and not has_action_only: + if obs == "image_only" and not has_action_outcome: issues.append((label, "last3_image_only_missing_action_history")) if obs != "image_only" and not has_recent: issues.append((label, "last3_missing_history")) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_llm.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_llm.py index af72d7a..bc076cb 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_llm.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_llm.py @@ -22,6 +22,7 @@ LocalLLMConfig, LocalTransformersAgent, ) +from nlu_benchmark.config import ExperimentConfig from nlu_benchmark.loader import load_maze from nlu_benchmark.runner import ExperimentRunner from nlu_benchmark.smoke_trace import trace_prepare, trace_reset, trace_step, trace_write_text_artifacts @@ -139,6 +140,19 @@ def main() -> None: default="auto", help='With --backend local: passed to from_pretrained device_map (e.g. "auto", "cuda:0").', ) + parser.add_argument( + "--chat-history", + choices=("stateless", "rolling", "full"), + default=None, + help="ExperimentConfig.chat_history (default: rolling = last N turns in API).", + ) + parser.add_argument( + "--chat-turns-max", + type=int, + default=None, + metavar="N", + help="ExperimentConfig.chat_turns_max for rolling mode (default: 3).", + ) args = parser.parse_args() _configure_benchmark_logging(args.log_level) @@ -154,7 +168,15 @@ def main() -> None: trace_prepare(out_dir) - runner = ExperimentRunner.from_json(str(maze_path.resolve())) + exp_cfg: ExperimentConfig | None = None + if args.chat_history is not None or args.chat_turns_max is not None: + exp_cfg = ExperimentConfig() + if args.chat_history is not None: + exp_cfg.chat_history = args.chat_history + if args.chat_turns_max is not None: + exp_cfg.chat_turns_max = args.chat_turns_max + + runner = ExperimentRunner.from_json(str(maze_path.resolve()), config=exp_cfg) query_log: List[dict] = [] if args.backend == "anthropic": diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py index 004b1ba..3fc8ee5 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py @@ -270,11 +270,11 @@ def run_smoke_suite(maze_name: str, tag: str, max_steps: int, suite: str = "all" if cfg.context_window == "current" and len(agent.calls) > 1: second_text = agent.calls[1]["user_text"] _assert("Recent history (last 3 steps" not in second_text, f"{label}: current unexpectedly includes history", errors) - _assert("Recent steps (oldest first, action only):" not in second_text, f"{label}: current unexpectedly includes action history", errors) + _assert("Recent steps (oldest first, action -> outcome):" not in second_text, f"{label}: current unexpectedly includes action history", errors) if cfg.context_window == "last3" and len(agent.calls) > 1: second_text = agent.calls[1]["user_text"] if cfg.observation == "image_only": - _assert("Recent steps (oldest first, action only):" in second_text, f"{label}: last3 image_only should include action-only history", errors) + _assert("Recent steps (oldest first, action -> outcome):" in second_text, f"{label}: last3 image_only should include action+outcome history", errors) else: _assert("Recent history (last 3 steps, oldest first):" in second_text, f"{label}: last3 should include full history", errors) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_trace.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_trace.py index 0e80359..bcad8b6 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_trace.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_trace.py @@ -3,6 +3,10 @@ Writes ``step_000_reset.png``, ``step_NNN_.png``, ``run_log.txt``, ``plan.txt`` under a caller-chosen ``results/…`` directory. ``trace_prepare`` also removes ``*.json`` and ``*.jsonl`` there (e.g. LLM smoke sidecars). + +PNG frames omit a figure title by default (see ``task_id=""`` on ``trace_reset`` / ``trace_step``) +so ``tight_layout`` + ``bbox_inches="tight"`` framing stays balanced; pass a non-empty +``task_id`` only if you want a title. """ from __future__ import annotations @@ -25,8 +29,8 @@ def trace_prepare(out_dir: Path) -> None: p.unlink() -def trace_reset(out_dir: Path, state: Any) -> list[str]: - (out_dir / "step_000_reset.png").write_bytes(render_maze_image_png_bytes(state)) +def trace_reset(out_dir: Path, state: Any, *, task_id: str = "") -> list[str]: + (out_dir / "step_000_reset.png").write_bytes(render_maze_image_png_bytes(state, task_id=task_id)) return [f"000 RESET pos={state.agent_pos} facing={state.facing} inv={state.inventory}"] @@ -38,9 +42,12 @@ def trace_step( env: Any, *, position_before: tuple[Any, ...], + task_id: str = "", ) -> Tuple[Any, Any]: state, event = env.step(action) - (out_dir / f"step_{step:03d}_{action}.png").write_bytes(render_maze_image_png_bytes(state)) + (out_dir / f"step_{step:03d}_{action}.png").write_bytes( + render_maze_image_png_bytes(state, task_id=task_id) + ) line = ( f"{step:03d} {action:<12} {event.type:<10} from={position_before} " f"to={state.agent_pos} facing={state.facing} inv={state.inventory}" From 68082036c8acb2394831dbde5ef0d10e6b2d7575 Mon Sep 17 00:00:00 2001 From: Arushi Jain Date: Wed, 6 May 2026 01:49:02 +0000 Subject: [PATCH 11/14] Shrink dimensions before running smoke tests on sample jsons --- .../render_dataset.py | 32 +++++++++++-------- src/v2/nlu_pipeline/nlu_benchmark/loader.py | 5 +++ .../nlu_benchmark/smoke_tests/smoke_llm.py | 25 ++++++++------- .../smoke_tests/solver_plan_trace.py | 17 ++++++++-- 4 files changed, 52 insertions(+), 27 deletions(-) diff --git a/src/v2/automatic_maze_generation/render_dataset.py b/src/v2/automatic_maze_generation/render_dataset.py index 6195206..c156af4 100644 --- a/src/v2/automatic_maze_generation/render_dataset.py +++ b/src/v2/automatic_maze_generation/render_dataset.py @@ -42,8 +42,8 @@ def _row_col_payload_to_xy_payload(payload: dict) -> dict: def rc_to_xy(pos): r, c = pos - # NLU JSON: 1-based (row, col). Legacy draw indices ``(x, y) = (c, r)`` (no ``-1``); matches historical traces. - return [c, r] + # NLU JSON: 1-based (row, col). Matplotlib cells are 0..cols-1 / 0..rows-1 after the dimension swap below. + return [c - 1, r - 1] dims = maze.get("dimensions") if dims and len(dims) == 2: @@ -193,29 +193,35 @@ def _draw_gate(ax, x: int, y: int, height: int, label: str): "WEST": (0, -1), } +_AGENT_ARROW_CELL_FRAC = 0.5 + def _draw_agent(ax, ar: int, ac: int, height: int, facing: str) -> None: - """Agent overlay; ``ar, ac`` are NLU **1-based** ``(row, col)`` (same as ``GridState``), matching ``rc_to_xy`` → ``[c, r]``.""" - cx = ac + 0.5 - cy = height - 1 - ar + 0.5 + """Agent overlay; ``ar, ac`` are NLU 1-based ``(row, col)``. Draw space matches ``rc_to_xy`` (0-based column, row).""" + sx, sy = ac - 1, ar - 1 + cx = sx + 0.5 + cy = height - 1 - sy + 0.5 ax.plot( cx, cy, "o", color="black", - markersize=10, + markersize=6, zorder=6, markeredgecolor="black", ) dr, dc = _AGENT_FACING_DELTA.get(facing, (0, 0)) if dr == 0 and dc == 0: return - nr, nc = ar + dr, ac + dc - tip_x = nc + 0.5 - tip_y = height - 1 - nr + 0.5 + tip_sx = sx + dc + tip_sy = sy + dr + tip_x = tip_sx + 0.5 + tip_y = height - 1 - tip_sy + 0.5 + end_x = cx + _AGENT_ARROW_CELL_FRAC * (tip_x - cx) + end_y = cy + _AGENT_ARROW_CELL_FRAC * (tip_y - cy) ax.annotate( "", - xy=(tip_x, tip_y), + xy=(end_x, end_y), xytext=(cx, cy), arrowprops=dict(arrowstyle="->", color="black", lw=1.5), zorder=7, @@ -334,7 +340,7 @@ def render_maze_payload(payload: dict, output_path: Path) -> None: title = payload.get("task_id", output_path.stem) fig, _ax, _height = _figure_from_maze_payload(payload, title) plt.tight_layout() - fig.savefig(output_path, dpi=150, bbox_inches="tight") + fig.savefig(output_path, dpi=150, bbox_inches="tight", pad_inches=0.08) plt.close(fig) @@ -347,7 +353,7 @@ def render_maze_payload_bytes( ) -> bytes: """Same layout as ``render_maze_payload``, PNG bytes (e.g. NLU live observations). - ``agent_pos`` is NLU 1-based ``(row, col)``; passed through to ``_draw_agent`` unchanged (legacy pairing with ``rc_to_xy`` → ``[c, r]``). + ``agent_pos`` is NLU 1-based ``(row, col)``; aligned with ``rc_to_xy`` (0-based draw indices). """ title = str(payload.get("task_id", "") or "") fig, ax, height = _figure_from_maze_payload(payload, title) @@ -356,7 +362,7 @@ def render_maze_payload_bytes( _draw_agent(ax, r1, c1, height, facing) plt.tight_layout() buf = BytesIO() - fig.savefig(buf, format="png", dpi=dpi, bbox_inches="tight") + fig.savefig(buf, format="png", dpi=dpi, bbox_inches="tight", pad_inches=0.08) plt.close(fig) return buf.getvalue() diff --git a/src/v2/nlu_pipeline/nlu_benchmark/loader.py b/src/v2/nlu_pipeline/nlu_benchmark/loader.py index 5c590a7..5864dbf 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/loader.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/loader.py @@ -35,6 +35,11 @@ def load_maze(path) -> GridWorldEnv: return _task_dict_to_env(data) +def load_maze_from_dict(data: dict[str, Any]) -> GridWorldEnv: + """Build env from a parsed task dict (same schema as ``load_maze`` JSON files).""" + return _task_dict_to_env(data) + + def grid_state_to_maze_instance(st: GridState) -> MazeInstance: def rc_to_xy(pos): r, c = pos diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_llm.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_llm.py index bc076cb..19349da 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_llm.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_llm.py @@ -23,8 +23,8 @@ LocalTransformersAgent, ) from nlu_benchmark.config import ExperimentConfig -from nlu_benchmark.loader import load_maze -from nlu_benchmark.runner import ExperimentRunner +from nlu_benchmark.loader import load_maze_from_dict, task_dict_shrink_dimensions_minus_two +from nlu_benchmark.runner import ExperimentRunner, build_runner from nlu_benchmark.smoke_trace import trace_prepare, trace_reset, trace_step, trace_write_text_artifacts @@ -168,15 +168,18 @@ def main() -> None: trace_prepare(out_dir) - exp_cfg: ExperimentConfig | None = None - if args.chat_history is not None or args.chat_turns_max is not None: - exp_cfg = ExperimentConfig() - if args.chat_history is not None: - exp_cfg.chat_history = args.chat_history - if args.chat_turns_max is not None: - exp_cfg.chat_turns_max = args.chat_turns_max + task_data = task_dict_shrink_dimensions_minus_two( + json.loads(maze_path.read_text(encoding="utf-8")) + ) + maze_env = load_maze_from_dict(task_data) + + exp_cfg = ExperimentConfig() + if args.chat_history is not None: + exp_cfg.chat_history = args.chat_history + if args.chat_turns_max is not None: + exp_cfg.chat_turns_max = args.chat_turns_max - runner = ExperimentRunner.from_json(str(maze_path.resolve()), config=exp_cfg) + runner = build_runner(exp_cfg, maze_env, maze_json_path=str(maze_path.resolve())) query_log: List[dict] = [] if args.backend == "anthropic": @@ -204,7 +207,7 @@ def main() -> None: transcript = result["transcript"] planned_actions = [rec["action"] for rec in transcript] - env = load_maze(maze_path) + env = load_maze_from_dict(task_data) state = env.reset() lines = trace_reset(out_dir, state) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/solver_plan_trace.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/solver_plan_trace.py index 5c553df..8344357 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/solver_plan_trace.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/solver_plan_trace.py @@ -2,12 +2,17 @@ from __future__ import annotations +import json from pathlib import Path from typing import Any from automatic_maze_generation.mazegen.solver import solve_maze from nlu_benchmark.env import FACING_ORDER, FACING_TO_DELTA -from nlu_benchmark.loader import grid_state_to_maze_instance, load_maze +from nlu_benchmark.loader import ( + grid_state_to_maze_instance, + load_maze_from_dict, + task_dict_shrink_dimensions_minus_two, +) from nlu_benchmark.smoke_trace import trace_prepare, trace_reset, trace_step, trace_write_text_artifacts @@ -58,11 +63,17 @@ def write_png_trace_for_maze_json(maze_path: Path, out_dir: Path) -> dict[str, A """ Solve ``maze_path``, replay the plan in the NLU env, write ``step_*.png``, ``run_log.txt``, ``plan.txt`` under ``out_dir``. + Applies :func:`~nlu_benchmark.loader.task_dict_shrink_dimensions_minus_two` to the task JSON first + (same convention as benchmark maze smoke). + Returns a dict with keys ``ok`` (bool), ``optimal_cost``, ``success``, ``steps_used``, ``out_dir``, and on failure ``reason`` (str) or ``solver_result``. """ trace_prepare(out_dir) - env_plan = load_maze(maze_path) + raw: dict[str, Any] = task_dict_shrink_dimensions_minus_two( + json.loads(maze_path.read_text(encoding="utf-8")) + ) + env_plan = load_maze_from_dict(raw) plan_state = env_plan.reset() maze_inst = grid_state_to_maze_instance(plan_state) solver_result = solve_maze(maze_inst) @@ -78,7 +89,7 @@ def write_png_trace_for_maze_json(maze_path: Path, out_dir: Path) -> dict[str, A planned_actions = path_to_actions(path_rc, start_facing="NORTH") executable_actions = inject_pickups(planned_actions, env_plan, plan_state) - env = load_maze(maze_path) + env = load_maze_from_dict(raw) state = env.reset() lines = trace_reset(out_dir, state) From a3f6854e72da26f30314e6888bf775299254f32f Mon Sep 17 00:00:00 2001 From: Arushi Jain Date: Wed, 6 May 2026 03:29:31 +0000 Subject: [PATCH 12/14] Fix image context --- src/v2/nlu_pipeline/nlu_benchmark/config.py | 6 +- .../nlu_pipeline/nlu_benchmark/observation.py | 63 +++++++++++++++---- src/v2/nlu_pipeline/nlu_benchmark/runner.py | 33 +++++----- .../smoke_tests/analyze_smoke_runner_logs.py | 8 +-- .../smoke_prompting_observation_querying.py | 14 ++++- 5 files changed, 87 insertions(+), 37 deletions(-) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/config.py b/src/v2/nlu_pipeline/nlu_benchmark/config.py index 9ce6af0..43ef023 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/config.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/config.py @@ -17,7 +17,7 @@ class ExperimentConfig: observation text_only – initial NL maze in system; current situation text per user turn; last3 history image_text – same as text_only + live PNG each turn; last3 = full feedback - image_only – live PNG only (no NL map); last3 = action -> outcome lines (same strings as ``Last result:``) + image_only – live PNG only (no NL map); last3 = prior decision-frame PNGs + ``Action: …`` only (multimodal) context_window current – only the current observation (no prior steps in the prompt) @@ -39,9 +39,9 @@ class ExperimentConfig: payload after ``system``. Ignored for ``stateless`` / ``full``. """ - prompting: Literal["minimal", "standard", "verbose"] = "minimal" + prompting: Literal["minimal", "standard", "verbose"] = "standard" observation: Literal["text_only", "image_text", "image_only"] = "image_only" - context_window: Literal["current", "last3"] = "last3" + context_window: Literal["current", "last3"] = "current" querying: Literal["step_by_step", "subgoal", "full_trajectory"] = "step_by_step" chat_history: Literal["stateless", "rolling", "full"] = "rolling" chat_turns_max: int = 3 diff --git a/src/v2/nlu_pipeline/nlu_benchmark/observation.py b/src/v2/nlu_pipeline/nlu_benchmark/observation.py index ab428ab..9cbe4c5 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/observation.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/observation.py @@ -4,8 +4,9 @@ system message once per episode. Each user turn: ``render_user_observation_text``, last3 history, and live PNG when image is enabled. -* **image_only** – No initial NL map in system; live PNG each query; last3 history lists - recent ``action → feedback`` lines (same feedback strings as ``Last result:``). +* **image_only** – No initial NL map in system; live PNG each query; ``last3`` + history is multimodal: up to three prior **decision-frame** PNGs (view before each + executed action) plus ``Action: …`` lines only — pose/outcome are left to the image. ``build_image_blocks`` adds PNGs whenever observation is not ``text_only`` (see ``runner._build_message``). """ @@ -20,13 +21,14 @@ class _StepRecord: - __slots__ = ("position", "facing", "action", "feedback") + __slots__ = ("position", "facing", "action", "feedback", "png_raw") - def __init__(self, position, facing, action, feedback): + def __init__(self, position, facing, action, feedback, png_raw: Optional[bytes] = None): self.position = position self.facing = facing self.action = action self.feedback = feedback + self.png_raw = png_raw class ObservationBuilder: @@ -44,18 +46,55 @@ def __init__( def reset(self) -> None: self._history.clear() - def record(self, position, facing: str, action: str, feedback: str) -> None: - self._history.append(_StepRecord(position, facing, action, feedback)) + def render_decision_frame_png(self, state) -> Optional[bytes]: + """PNG of the maze **before** ``env.step`` mutates ``state`` (``image_only`` only).""" + if self._observation != "image_only": + return None + try: + return render_maze_image_png_bytes(state) + except Exception: + return None + + def record( + self, + position, + facing: str, + action: str, + feedback: str, + *, + decision_frame_png: Optional[bytes] = None, + ) -> None: + png_raw = decision_frame_png if self._observation == "image_only" else None + self._history.append(_StepRecord(position, facing, action, feedback, png_raw)) + + def history_content_blocks(self) -> List[dict]: + """Multimedia tail for ``image_only`` + ``last3``: prior frames + action labels only.""" + if self._observation != "image_only" or self._context_window == "current" or not self._history: + return [] + recs = self._history[-3:] + blocks: List[dict] = [] + for rec in recs: + if not rec.png_raw: + continue + b64 = base64.b64encode(rec.png_raw).decode("utf-8") + blocks.append({"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}}) + blocks.append({"type": "text", "text": f"Action: {rec.action}\n\n"}) + if not blocks: + return [] + intro = ( + "Recent steps (oldest first). Each image is the maze view from which the " + "following action was chosen; infer pose and environment state from the image.\n\n" + ) + return [{"type": "text", "text": intro}] + blocks def history_text(self) -> str: - if self._context_window == "current" or not self._history: + if ( + self._context_window == "current" + or not self._history + or self._observation == "image_only" + ): return "" recs = self._history[-3:] - if self._observation == "image_only": - lines = ["Recent steps (oldest first, action -> outcome):"] - for rec in recs: - lines.append(f" {rec.action} -> {rec.feedback}") - return "\n".join(lines) lines = ["Recent history (last 3 steps, oldest first):"] for rec in recs: lines.append( diff --git a/src/v2/nlu_pipeline/nlu_benchmark/runner.py b/src/v2/nlu_pipeline/nlu_benchmark/runner.py index bfecacd..16be21a 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/runner.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/runner.py @@ -7,14 +7,6 @@ cfg = ExperimentConfig(prompting="verbose", querying="full_trajectory") runner = build_runner(cfg, env, maze_json_path="path/to/maze.json") - result = runner.run(agent) # verbose=True: print progress - result = runner.run(agent, verbose=False) # quiet for batch runs - -Or enable library logging in your script:: - - import logging - logging.basicConfig(level=logging.INFO) - result = runner.run(agent, verbose=False) Or from a JSON file directly: @@ -171,9 +163,10 @@ def run(self, agent: Callable[[List[dict]], str], *, verbose: bool = True) -> di if logger.isEnabledFor(logging.INFO): logger.info( - "Episode start: max_steps=%s querying=%s observation=%s context_window=%s " + "Episode start: max_steps=%s prompting=%s querying=%s observation=%s context_window=%s " "chat_history=%s chat_turns_max=%s", max_steps, + self.config.prompting, self.config.querying, self.config.observation, self.config.context_window, @@ -223,7 +216,8 @@ def run(self, agent: Callable[[List[dict]], str], *, verbose: bool = True) -> di if not action_queue: logger.warning( - "LLM query #%d: no valid actions parsed; retrying with parser feedback", + "LLM query #%d: no valid actions parsed; empty queue so another query follows, " + "user turn will include parse-hint feedback", query_count, ) last_feedback = ( @@ -240,6 +234,7 @@ def run(self, agent: Callable[[List[dict]], str], *, verbose: bool = True) -> di action = action_queue.pop(0) position_before = state.agent_pos + decision_png = self.obs.render_decision_frame_png(state) state, event = self.env.step(action) step_detail = format_step_feedback(action, event.type, event.message, position_before) last_feedback = action_feedback_for_prompt(self.config.observation, step_detail) @@ -261,7 +256,13 @@ def run(self, agent: Callable[[List[dict]], str], *, verbose: bool = True) -> di **self.querying.step_metadata(), }) - self.obs.record(state.agent_pos, state.facing, action, last_feedback) + self.obs.record( + state.agent_pos, + state.facing, + action, + last_feedback, + decision_frame_png=decision_png, + ) if event_type == "DONE": if logger.isEnabledFor(logging.INFO): @@ -288,10 +289,12 @@ def run(self, agent: Callable[[List[dict]], str], *, verbose: bool = True) -> di def _build_message(self, state, last_feedback: str) -> dict: obs_text = self.obs.build_text(state) history_text = self.obs.history_text() - prompt_text = self.prompt.build_user_prompt(obs_text, history_text, state, last_feedback) - images = self.obs.build_image_blocks(state, self.maze_json_path) - if images: - return {"role": "user", "content": images + [{"type": "text", "text": prompt_text}]} + prompt_text = self.prompt.build_user_prompt(obs_text, history_text, state, last_feedback) + hist_blocks = self.obs.history_content_blocks() + images = self.obs.build_image_blocks(state, self.maze_json_path) + text_block = {"type": "text", "text": prompt_text} + if hist_blocks or images: + return {"role": "user", "content": hist_blocks + images + [text_block]} return {"role": "user", "content": prompt_text} def _result(self, success: bool, state, transcript: List[dict]) -> dict: diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/analyze_smoke_runner_logs.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/analyze_smoke_runner_logs.py index 49c7d8f..b9d88a6 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/analyze_smoke_runner_logs.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/analyze_smoke_runner_logs.py @@ -69,12 +69,12 @@ def main() -> None: if len(queries) >= 2: second_text = queries[1]["user_text"] has_recent = "Recent history (last 3 steps, oldest first):" in second_text - has_action_outcome = "Recent steps (oldest first, action -> outcome):" in second_text - if cfg["context_window"] == "current" and (has_recent or has_action_outcome): + has_image_step_history = "infer pose and environment state from the image" in second_text + if cfg["context_window"] == "current" and (has_recent or has_image_step_history): issues.append((label, "current_has_history")) if cfg["context_window"] == "last3": - if obs == "image_only" and not has_action_outcome: - issues.append((label, "last3_image_only_missing_action_history")) + if obs == "image_only" and not has_image_step_history: + issues.append((label, "last3_image_only_missing_visual_history")) if obs != "image_only" and not has_recent: issues.append((label, "last3_missing_history")) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py index 3fc8ee5..29ccccd 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py @@ -230,7 +230,7 @@ def run_smoke_suite(maze_name: str, tag: str, max_steps: int, suite: str = "all" full_trajectory_actions = _full_trajectory_actions_for_maze(maze_path) # Smoke test already validated rendering elsewhere; use tiny static bytes for speed. observation_module.render_maze_image_png_bytes = lambda _state: _ONE_BY_ONE_PNG - base = ExperimentConfig(prompting="minimal", observation="text_only", context_window="last3", querying="step_by_step") + base = ExperimentConfig(prompting="standard", observation="image_only", context_window="last3", querying="step_by_step") selected = _suite_cases(base, suite) outputs = [ _run_case(cfg, maze_path, label, full_trajectory_actions, max_steps) @@ -270,11 +270,19 @@ def run_smoke_suite(maze_name: str, tag: str, max_steps: int, suite: str = "all" if cfg.context_window == "current" and len(agent.calls) > 1: second_text = agent.calls[1]["user_text"] _assert("Recent history (last 3 steps" not in second_text, f"{label}: current unexpectedly includes history", errors) - _assert("Recent steps (oldest first, action -> outcome):" not in second_text, f"{label}: current unexpectedly includes action history", errors) + _assert( + "infer pose and environment state from the image" not in second_text, + f"{label}: current unexpectedly includes visual step history", + errors, + ) if cfg.context_window == "last3" and len(agent.calls) > 1: second_text = agent.calls[1]["user_text"] if cfg.observation == "image_only": - _assert("Recent steps (oldest first, action -> outcome):" in second_text, f"{label}: last3 image_only should include action+outcome history", errors) + _assert( + "infer pose and environment state from the image" in second_text, + f"{label}: last3 image_only should include PNG+action history intro", + errors, + ) else: _assert("Recent history (last 3 steps, oldest first):" in second_text, f"{label}: last3 should include full history", errors) From 1363a93b1055eab9c85256d750cd0f332d1bcd63 Mon Sep 17 00:00:00 2001 From: Arushi Jain Date: Wed, 6 May 2026 04:23:22 +0000 Subject: [PATCH 13/14] Swap X and Y axis in rendering --- src/v2/nlu_pipeline/nlu_benchmark/config.py | 5 +- src/v2/nlu_pipeline/nlu_benchmark/env.py | 24 +++--- src/v2/nlu_pipeline/nlu_benchmark/loader.py | 74 +++++++++++++------ .../nlu_benchmark/prompt_strategies.py | 8 +- src/v2/nlu_pipeline/nlu_benchmark/renderer.py | 60 +++++++++------ .../sample mazes/V01_empty_room.json | 2 +- .../sample mazes/V04_single_key.json | 2 +- .../smoke_prompting_observation_querying.py | 23 +++--- .../smoke_tests/solver_plan_trace.py | 1 + 9 files changed, 127 insertions(+), 72 deletions(-) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/config.py b/src/v2/nlu_pipeline/nlu_benchmark/config.py index 43ef023..43acacb 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/config.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/config.py @@ -8,6 +8,9 @@ class ExperimentConfig: """Selects one implementation along each experimental axis. + Task JSON uses ``maze.dimensions = [rows, cols]`` and 1-based ``[x, y]`` cells (east, south from **top-left** ``(1,1)``), + i.e. ``[column, row]``. Env tuples are ``(row, column)``. + prompting minimal – goal + action list only (system prompt) standard – adds ``MECHANISM_LIST`` to the system prompt @@ -40,7 +43,7 @@ class ExperimentConfig: """ prompting: Literal["minimal", "standard", "verbose"] = "standard" - observation: Literal["text_only", "image_text", "image_only"] = "image_only" + observation: Literal["text_only", "image_text", "image_only"] = "text_only" context_window: Literal["current", "last3"] = "current" querying: Literal["step_by_step", "subgoal", "full_trajectory"] = "step_by_step" chat_history: Literal["stateless", "rolling", "full"] = "rolling" diff --git a/src/v2/nlu_pipeline/nlu_benchmark/env.py b/src/v2/nlu_pipeline/nlu_benchmark/env.py index 9e01473..247effc 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/env.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/env.py @@ -1,14 +1,16 @@ from dataclasses import dataclass, field from typing import Any, Dict, List, Set, Tuple, Optional +# Grid positions are 1-based ``(row, column)``: origin **top-left** ``(1, 1)``; row increases **southward**, +# column increases eastward. Movement deltas are ``(Δrow, Δcolumn)``. Pos = Tuple[int, int] FACING_ORDER = ["NORTH", "EAST", "SOUTH", "WEST"] FACING_TO_DELTA: Dict[str, Tuple[int, int]] = { "NORTH": (-1, 0), - "EAST": ( 0, 1), - "SOUTH": ( 1, 0), + "EAST": ( 0, +1), + "SOUTH": (+1, 0), "WEST": ( 0, -1), } @@ -18,7 +20,7 @@ class GridState: rows: int cols: int walls: Set[Pos] - start: Pos + start: Pos # (row, column) 1-based goal: Pos agent_pos: Pos facing: str = "NORTH" @@ -115,7 +117,7 @@ def step(self, action: str) -> tuple[GridState, StepEvent]: # --- Move one step forward --- if verb == "MOVE_FORWARD": dr, dc = FACING_TO_DELTA[self.state.facing] - r, c = self.state.agent_pos + r, c = self.state.agent_pos nr, nc = r + dr, c + dc reason = self._blocked(nr, nc) if reason: @@ -147,7 +149,7 @@ def step(self, action: str) -> tuple[GridState, StepEvent]: # --- Toggle facing switch only (opens/closes linked gates; doors and gates are not toggled directly) --- if verb == "TOGGLE": dr, dc = FACING_TO_DELTA[self.state.facing] - r, c = self.state.agent_pos + r, c = self.state.agent_pos target = (r + dr, c + dc) sw = self._switch_at(target) if sw: @@ -173,16 +175,16 @@ def step(self, action: str) -> tuple[GridState, StepEvent]: # Helpers # ------------------------------------------------------------------ - def _blocked(self, nr: int, nc: int) -> Optional[str]: - """Return a reason string if (nr, nc) is impassable, else None.""" - if nr < 1 or nr > self.state.rows or nc < 1 or nc > self.state.cols: + def _blocked(self, nrow: int, ncol: int) -> Optional[str]: + """Return a reason string if ``(nrow, ncol)`` is impassable, else ``None``. Indices are ``(row, column)``.""" + if nrow < 1 or nrow > self.state.rows or ncol < 1 or ncol > self.state.cols: return "out of bounds" - if (nr, nc) in self.state.walls: + if (nrow, ncol) in self.state.walls: return "wall" - door = self._door_at((nr, nc)) + door = self._door_at((nrow, ncol)) if door and door["requires_key"] not in self.state.inventory: return f"locked {door['requires_key']} door" - gate = self._gate_at((nr, nc)) + gate = self._gate_at((nrow, ncol)) if gate and gate.get("state", gate.get("initial_state", "closed")) == "closed": return "closed gate" return None diff --git a/src/v2/nlu_pipeline/nlu_benchmark/loader.py b/src/v2/nlu_pipeline/nlu_benchmark/loader.py index 5864dbf..1211553 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/loader.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/loader.py @@ -11,14 +11,45 @@ from nlu_benchmark.env import GridState, GridWorldEnv +def _swap_validation_v04_dimensions_if_raw(maze: dict[str, Any], task_id: str) -> None: + """``validation_10_v04_single_key.json`` lists ``dimensions`` as ``[cols, rows]`` = ``[14, 12]``; normalize once.""" + if str(task_id) != "validation_10_v04_single_key": + return + dims = maze.get("dimensions") + if isinstance(dims, list) and len(dims) == 2 and dims[0] == 14 and dims[1] == 12: + maze["dimensions"] = [12, 14] + + +def _json_cell_to_pos(pair: list | tuple) -> tuple[int, int]: + """JSON cell ``[x, y]`` with origin at **top-left** ``(1, 1)``: ``x`` east (column), ``y`` south (row). + + Same as ``[column, row]``. Internal env tuple is ``(row, column)``. + """ + col, row = int(pair[0]), int(pair[1]) + return (row, col) + + +def _normalize_mechanisms_from_json(mechs: dict[str, Any] | None) -> dict[str, Any]: + """Deep-copy mechanisms; JSON ``position`` is ``[x, y]`` / ``[column, row]``, stored as ``[row, column]`` internally.""" + m = copy.deepcopy(mechs or {}) + for name in ("keys", "doors", "switches", "gates"): + for item in m.get(name, []): + pos = item.get("position") + if isinstance(pos, (list, tuple)) and len(pos) == 2: + r, c = _json_cell_to_pos(pos) + item["position"] = [r, c] + return m + + def _task_dict_to_env(data: dict[str, Any]) -> GridWorldEnv: maze = data["maze"] + _swap_validation_v04_dimensions_if_raw(maze, str(data.get("task_id", ""))) rows, cols = maze["dimensions"] - walls = {tuple(w) for w in maze["walls"]} - start = tuple(maze["start"]) - goal = tuple(maze["goal"]) + walls = {_json_cell_to_pos(w) for w in maze["walls"]} + start = _json_cell_to_pos(maze["start"]) + goal = _json_cell_to_pos(maze["goal"]) max_steps = data.get("max_steps", 100) - mechanisms = data.get("mechanisms", {}) + mechanisms = _normalize_mechanisms_from_json(data.get("mechanisms", {})) return GridWorldEnv( rows=rows, cols=cols, @@ -42,9 +73,9 @@ def load_maze_from_dict(data: dict[str, Any]) -> GridWorldEnv: def grid_state_to_maze_instance(st: GridState) -> MazeInstance: def rc_to_xy(pos): - r, c = pos - # NLU grids are 1-based (row, col); mazegen solver uses 0-based (x, y). - return (c - 1, r - 1) + row, col = pos + # Mazegen ``y`` increases south from the north edge; NLU row 1 is north (top) → ``y = row - 1``. + return (col - 1, row - 1) return MazeInstance( width=st.cols, @@ -103,29 +134,30 @@ def task_dict_shrink_dimensions_minus_two(data: dict[str, Any]) -> dict[str, Any """ Return a deep copy whose ``maze.dimensions`` are each reduced by 2 (e.g. ``[10, 10] -> [8, 8]``). - Row/column coordinates are **unchanged**. Use when the JSON names a larger grid than the - coordinates actually use (common in the ogbench-style exports this repo ingests). + JSON coordinates are 1-based ``[x, y]`` with origin at the **top-left** cell ``(1, 1)``: ``x`` east (column), + ``y`` south (row). Same as ``[column, row]``. They are not rewritten—only ``dimensions`` shrink. Raises ``ValueError`` if the new size would be <2 or any coordinate lies outside the shrunk grid. """ out = copy.deepcopy(data) maze = out["maze"] + _swap_validation_v04_dimensions_if_raw(maze, str(out.get("task_id", ""))) rows, cols = maze["dimensions"] if rows < 2 or cols < 2: raise ValueError("maze dimensions must be at least 2 to shrink by 2") nr, nc = rows - 2, cols - 2 - def bad_rc(r: int, c: int) -> bool: - return not (1 <= r <= nr and 1 <= c <= nc) + def bad_cell(col: int, row: int) -> bool: + return not (1 <= row <= nr and 1 <= col <= nc) - sr, sc = int(maze["start"][0]), int(maze["start"][1]) - gr, gc = int(maze["goal"][0]), int(maze["goal"][1]) - if bad_rc(sr, sc) or bad_rc(gr, gc): - raise ValueError(f"start/goal outside shrunk grid 1..{nr} x 1..{nc}: start={maze['start']} goal={maze['goal']}") + scol, srow = int(maze["start"][0]), int(maze["start"][1]) + gcol, grow = int(maze["goal"][0]), int(maze["goal"][1]) + if bad_cell(scol, srow) or bad_cell(gcol, grow): + raise ValueError(f"start/goal outside shrunk grid x 1..{nc}, y 1..{nr}: start={maze['start']} goal={maze['goal']}") for w in maze["walls"]: - r, c = int(w[0]), int(w[1]) - if bad_rc(r, c): + wc, wr = int(w[0]), int(w[1]) + if bad_cell(wc, wr): raise ValueError(f"wall {w} outside shrunk grid ({nr}x{nc})") mech = out.get("mechanisms", {}) @@ -134,16 +166,16 @@ def bad_rc(r: int, c: int) -> bool: pos = item.get("position") if pos is None: continue - r, c = int(pos[0]), int(pos[1]) - if bad_rc(r, c): + wc, wr = int(pos[0]), int(pos[1]) + if bad_cell(wc, wr): raise ValueError(f"{name} position {pos} outside shrunk grid ({nr}x{nc})") g = out.get("goal") if isinstance(g, dict) and g.get("type") == "reach_position": t = g.get("target") if isinstance(t, (list, tuple)) and len(t) == 2: - r, c = int(t[0]), int(t[1]) - if bad_rc(r, c): + tc, tr = int(t[0]), int(t[1]) + if bad_cell(tc, tr): raise ValueError(f"goal.target {t} outside shrunk grid ({nr}x{nc})") maze["dimensions"] = [nr, nc] diff --git a/src/v2/nlu_pipeline/nlu_benchmark/prompt_strategies.py b/src/v2/nlu_pipeline/nlu_benchmark/prompt_strategies.py index 73aa321..90f8e2e 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/prompt_strategies.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/prompt_strategies.py @@ -138,9 +138,9 @@ def build_user_prompt( if steps_left <= max(5, state.max_steps // 5) else "" ) - r, c = state.agent_pos - gr, gc = state.goal - manhattan = abs(r - gr) + abs(c - gc) + row, col = state.agent_pos + grow, gcol = state.goal + manhattan = abs(row - grow) + abs(col - gcol) facing_idx = FACING_ORDER.index(state.facing) rel_dirs = [ @@ -152,7 +152,7 @@ def build_user_prompt( neighbour_lines = [] for rel, cardinal in rel_dirs: dr, dc = FACING_TO_DELTA[cardinal] - nr, nc = r + dr, c + dc + nr, nc = row + dr, col + dc if nr < 1 or nr > state.rows or nc < 1 or nc > state.cols: desc = "out of bounds" elif (nr, nc) in state.walls: diff --git a/src/v2/nlu_pipeline/nlu_benchmark/renderer.py b/src/v2/nlu_pipeline/nlu_benchmark/renderer.py index 0eb306b..e869acc 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/renderer.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/renderer.py @@ -28,21 +28,34 @@ def _render_dataset_module(): return _RENDER_DATASET_MOD +def _internal_pos_to_json_list(pos: tuple[int, int]) -> list[int]: + """Env ``(row, column)`` → JSON ``[x, y]`` = ``[column, row]`` (standard Cartesian order).""" + row, col = pos + return [col, row] + + +def _mechanism_dict_for_payload(item: dict[str, Any]) -> dict[str, Any]: + out = dict(item) + if "position" in out: + out["position"] = _internal_pos_to_json_list(tuple(out["position"])) + return out + + def _grid_state_to_maze_payload(state: GridState, *, task_id: str = "") -> dict: """JSON-shaped maze dict for ``render_maze_payload`` / ``render_maze_payload_bytes``.""" out: dict[str, Any] = { "maze": { - # Unified convention: payloads are always (row, col). + # Task JSON ``[x, y]`` = ``[column, row]`` (``dimensions`` are ``[rows, cols]``). "dimensions": [state.rows, state.cols], - "walls": [list(w) for w in sorted(state.walls)], - "start": list(state.start), - "goal": list(state.goal), + "walls": [_internal_pos_to_json_list(w) for w in sorted(state.walls)], + "start": _internal_pos_to_json_list(state.start), + "goal": _internal_pos_to_json_list(state.goal), }, "mechanisms": { - "keys": [dict(k) for k in state.keys], - "doors": [dict(d) for d in state.doors], - "switches": [dict(s) for s in state.switches], - "gates": [dict(g) for g in state.gates], + "keys": [_mechanism_dict_for_payload(k) for k in state.keys], + "doors": [_mechanism_dict_for_payload(d) for d in state.doors], + "switches": [_mechanism_dict_for_payload(s) for s in state.switches], + "gates": [_mechanism_dict_for_payload(g) for g in state.gates], }, } if task_id: @@ -51,11 +64,12 @@ def _grid_state_to_maze_payload(state: GridState, *, task_id: str = "") -> dict: def _static_layout_lines(state: GridState) -> list[str]: - wall_str = ", ".join(f"({r},{c})" for r, c in sorted(state.walls)) or "none" + wall_str = ", ".join(f"({row},{col})" for row, col in sorted(state.walls)) or "none" return [ f"The world is a {state.rows} by {state.cols} grid.", - "Coordinates are given as (row, column).", - "The top-left corner is (1,1).", + "Coordinates: JSON lists use ``[x, y]`` (east, south) from the **top-left** corner ``(1, 1)``;" + " tuples in this text use ``(row, column)`` matching env state (row southward, column east)." + " So ``x`` = column index, ``y`` = row index (e.g. goal ``[2, 12]`` is the cell ``(12, 2)``).", f"The start is at {state.start}.", f"The goal is at {state.goal}.", f"The following cells are walls: {wall_str}.", @@ -65,30 +79,30 @@ def _static_layout_lines(state: GridState) -> list[str]: def _mechanism_lines(state: GridState) -> list[str]: parts: list[str] = [] for key in state.keys: - r, c = key["position"] - parts.append(f"There is a {key['color']} key at ({r},{c}).") + row, col = key["position"] + parts.append(f"There is a {key['color']} key at ({row},{col}).") for door in state.doors: - r, c = door["position"] + row, col = door["position"] parts.append( - f"There is a locked {door['requires_key']} door at ({r},{c})." + f"There is a locked {door['requires_key']} door at ({row},{col})." f" It requires the {door['requires_key']} key to open." ) for switch in state.switches: - r, c = switch["position"] + row, col = switch["position"] controls = ", ".join(switch.get("controls", [])) on_off = "on" if switch.get("on") else "off" parts.append( - f"There is a {switch.get('switch_type', 'toggle')} switch at ({r},{c}) (currently {on_off})." + f"There is a {switch.get('switch_type', 'toggle')} switch at ({row},{col}) (currently {on_off})." f" It controls: {controls}." ) for gate in state.gates: - r, c = gate["position"] + row, col = gate["position"] cur = gate.get("state", gate.get("initial_state", "closed")) parts.append( - f"There is a gate ({gate['id']}) at ({r},{c})." + f"There is a gate ({gate['id']}) at ({row},{col})." f" It is currently {cur} (initially {gate.get('initial_state', 'closed')})." ) return parts @@ -128,11 +142,11 @@ def render_maze_image_png_bytes(state: GridState, *, task_id: str = "") -> bytes """ mod = _render_dataset_module() payload = _grid_state_to_maze_payload(state, task_id=task_id) - ar, ac = state.agent_pos + row, col = state.agent_pos return mod.render_maze_payload_bytes( payload, dpi=150, - agent_pos=(ar, ac), + agent_pos=(col, row), facing=state.facing, ) @@ -148,12 +162,12 @@ def render_task_json_with_solver_path_png( ``solver_path_xy`` is ``solve_maze(...)["path"]`` (mazegen 0-based ``(x, y)``; ``x`` = column index, ``y`` = row index). """ - optimal_path_rc = [[y + 1, x + 1] for (x, y) in solver_path_xy] + optimal_path_cells = [[x + 1, y + 1] for (x, y) in solver_path_xy] payload = { **task_data, "validation": { **task_data.get("validation", {}), - "optimal_path": optimal_path_rc, + "optimal_path": optimal_path_cells, }, } mod = _render_dataset_module() diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V01_empty_room.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V01_empty_room.json index 7da7b35..fcb8890 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V01_empty_room.json +++ b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V01_empty_room.json @@ -49,4 +49,4 @@ "wall_topology": "open" }, "max_steps": 100 -} +} \ No newline at end of file diff --git a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V04_single_key.json b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V04_single_key.json index de290aa..9744382 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V04_single_key.json +++ b/src/v2/nlu_pipeline/nlu_benchmark/sample mazes/V04_single_key.json @@ -93,4 +93,4 @@ "wall_topology": "room_chain_with_key_branch" }, "max_steps": 140 -} +} \ No newline at end of file diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py index 29ccccd..4266fe3 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/smoke_prompting_observation_querying.py @@ -49,10 +49,12 @@ def _parse_prompt_state(user_text: str): gm = _GOAL_RE.search(user_text) if not (pm and fm and gm): return None - pos = (int(pm.group(1)), int(pm.group(2))) + row = int(pm.group(1)) + col = int(pm.group(2)) facing = fm.group(1) - goal = (int(gm.group(1)), int(gm.group(2))) - return pos, facing, goal + grow = int(gm.group(1)) + gcol = int(gm.group(2)) + return (row, col), facing, (grow, gcol) def _turn_to_face(cur: str, target: str) -> list[str]: @@ -72,22 +74,23 @@ def _plan_to_goal_from_prompt(user_text: str, budget: int = 6) -> list[str]: parsed = _parse_prompt_state(user_text) if parsed is None: return ["TURN_RIGHT"] - (r, c), facing, (gr, gc) = parsed + (row, col), facing, (grow, gcol) = parsed actions: list[str] = [] - if c != gc: - target = "EAST" if gc > c else "WEST" + if col != gcol: + target = "EAST" if gcol > col else "WEST" actions.extend(_turn_to_face(facing, target)) - actions.extend(["MOVE_FORWARD"] * min(abs(gc - c), max(1, budget - len(actions)))) - elif r != gr: - target = "SOUTH" if gr > r else "NORTH" + actions.extend(["MOVE_FORWARD"] * min(abs(gcol - col), max(1, budget - len(actions)))) + elif row != grow: + target = "SOUTH" if grow > row else "NORTH" actions.extend(_turn_to_face(facing, target)) - actions.extend(["MOVE_FORWARD"] * min(abs(gr - r), max(1, budget - len(actions)))) + actions.extend(["MOVE_FORWARD"] * min(abs(grow - row), max(1, budget - len(actions)))) else: actions.append("DONE") return actions[:budget] if actions else ["DONE"] def _xy_path_to_rc(path_xy) -> list[tuple[int, int]]: + """mazegen 0-based ``(x, y)`` (y south) → NLU 1-based ``(row, column)`` (row southward).""" return [(y + 1, x + 1) for (x, y) in path_xy] diff --git a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/solver_plan_trace.py b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/solver_plan_trace.py index 8344357..b344da1 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/solver_plan_trace.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/smoke_tests/solver_plan_trace.py @@ -42,6 +42,7 @@ def path_to_actions(path: list[tuple[int, int]], start_facing: str = "NORTH") -> def xy_path_to_rc(path_xy: list[tuple[int, int]]) -> list[tuple[int, int]]: + """mazegen 0-based ``(x, y)`` (y south from north edge) → NLU ``(row, column)`` with row southward.""" return [(y + 1, x + 1) for (x, y) in path_xy] From be38a58dffd95f18356211a196bf946f3ca9d217 Mon Sep 17 00:00:00 2001 From: Arushi Jain Date: Wed, 6 May 2026 04:44:26 +0000 Subject: [PATCH 14/14] Fix rendering --- .../render_dataset.py | 45 ++++++++++--------- src/v2/nlu_pipeline/nlu_benchmark/config.py | 2 +- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/src/v2/automatic_maze_generation/render_dataset.py b/src/v2/automatic_maze_generation/render_dataset.py index c156af4..7fb4098 100644 --- a/src/v2/automatic_maze_generation/render_dataset.py +++ b/src/v2/automatic_maze_generation/render_dataset.py @@ -35,43 +35,46 @@ def _extract_payload_fields(payload: dict): def _row_col_payload_to_xy_payload(payload: dict) -> dict: - """Convert a row/col payload to renderer-space (x/y) without mutating input.""" + """Convert NLU maze JSON (1-based ``[x, y]`` = ``[column, row]``, top-left origin) to renderer indices.""" out = deepcopy(payload) maze = out.get("maze", {}) mechs = out.get("mechanisms", {}) - def rc_to_xy(pos): - r, c = pos - # NLU JSON: 1-based (row, col). Matplotlib cells are 0..cols-1 / 0..rows-1 after the dimension swap below. - return [c - 1, r - 1] - dims = maze.get("dimensions") if dims and len(dims) == 2: - rows, cols = dims + rows, cols = dims[0], dims[1] + else: + rows, cols = 0, 0 + + def cr_to_xy(pos): + col, row = pos[0], pos[1] + return [col - 1, row - 1] + + if dims and len(dims) == 2: maze["dimensions"] = [cols, rows] - maze["walls"] = [rc_to_xy(w) for w in maze.get("walls", [])] + maze["walls"] = [cr_to_xy(w) for w in maze.get("walls", [])] if "start" in maze: - maze["start"] = rc_to_xy(maze["start"]) + maze["start"] = cr_to_xy(maze["start"]) if "goal" in maze: - maze["goal"] = rc_to_xy(maze["goal"]) + maze["goal"] = cr_to_xy(maze["goal"]) for k in mechs.get("keys", []): if "position" in k: - k["position"] = rc_to_xy(k["position"]) + k["position"] = cr_to_xy(k["position"]) for d in mechs.get("doors", []): if "position" in d: - d["position"] = rc_to_xy(d["position"]) + d["position"] = cr_to_xy(d["position"]) for s in mechs.get("switches", []): if "position" in s: - s["position"] = rc_to_xy(s["position"]) + s["position"] = cr_to_xy(s["position"]) for g in mechs.get("gates", []): if "position" in g: - g["position"] = rc_to_xy(g["position"]) + g["position"] = cr_to_xy(g["position"]) validation = out.get("validation", {}) if "optimal_path" in validation: - validation["optimal_path"] = [rc_to_xy(p) for p in validation.get("optimal_path", [])] + validation["optimal_path"] = [cr_to_xy(p) for p in validation.get("optimal_path", [])] return out @@ -196,9 +199,9 @@ def _draw_gate(ax, x: int, y: int, height: int, label: str): _AGENT_ARROW_CELL_FRAC = 0.5 -def _draw_agent(ax, ar: int, ac: int, height: int, facing: str) -> None: - """Agent overlay; ``ar, ac`` are NLU 1-based ``(row, col)``. Draw space matches ``rc_to_xy`` (0-based column, row).""" - sx, sy = ac - 1, ar - 1 +def _draw_agent(ax, col: int, row: int, height: int, facing: str) -> None: + """Agent overlay; NLU 1-based ``column``, ``row`` (origin top-left, row increases downward).""" + sx, sy = col - 1, row - 1 cx = sx + 0.5 cy = height - 1 - sy + 0.5 ax.plot( @@ -353,13 +356,13 @@ def render_maze_payload_bytes( ) -> bytes: """Same layout as ``render_maze_payload``, PNG bytes (e.g. NLU live observations). - ``agent_pos`` is NLU 1-based ``(row, col)``; aligned with ``rc_to_xy`` (0-based draw indices). + ``agent_pos`` is NLU 1-based ``(column_index, row_index)`` for matplotlib overlay (same as JSON ``x``, ``y``). """ title = str(payload.get("task_id", "") or "") fig, ax, height = _figure_from_maze_payload(payload, title) if agent_pos is not None: - r1, c1 = int(agent_pos[0]), int(agent_pos[1]) - _draw_agent(ax, r1, c1, height, facing) + col1, row1 = int(agent_pos[0]), int(agent_pos[1]) + _draw_agent(ax, col1, row1, height, facing) plt.tight_layout() buf = BytesIO() fig.savefig(buf, format="png", dpi=dpi, bbox_inches="tight", pad_inches=0.08) diff --git a/src/v2/nlu_pipeline/nlu_benchmark/config.py b/src/v2/nlu_pipeline/nlu_benchmark/config.py index 43acacb..c7c6b23 100644 --- a/src/v2/nlu_pipeline/nlu_benchmark/config.py +++ b/src/v2/nlu_pipeline/nlu_benchmark/config.py @@ -43,7 +43,7 @@ class ExperimentConfig: """ prompting: Literal["minimal", "standard", "verbose"] = "standard" - observation: Literal["text_only", "image_text", "image_only"] = "text_only" + observation: Literal["text_only", "image_text", "image_only"] = "image_only" context_window: Literal["current", "last3"] = "current" querying: Literal["step_by_step", "subgoal", "full_trajectory"] = "step_by_step" chat_history: Literal["stateless", "rolling", "full"] = "rolling"