diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..0f82088 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "ogbench"] + path = ogbench + url = ./ogbench diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..690053b --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,21 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + // add python paths + "env": { + "PYTHONPATH": "${workspaceFolder}/ogbench:${workspaceFolder}/ogbench/ogbench:${workspaceFolder}/ogbench/ogbench/procgen" + } + + }, + + ] +} \ No newline at end of file diff --git a/ogbench b/ogbench new file mode 160000 index 0000000..d750941 --- /dev/null +++ b/ogbench @@ -0,0 +1 @@ +Subproject commit d7509418e7a2243ebea3c5fc081ed57557e89719 diff --git a/prompts/__init__.py b/prompts/__init__.py new file mode 100644 index 0000000..80835a2 --- /dev/null +++ b/prompts/__init__.py @@ -0,0 +1,5 @@ +"""Prompt condition sets for maze evaluation experiments.""" + +from .exp_design import CONDITION_SETS + +__all__ = ["CONDITION_SETS"] diff --git a/prompts/__pycache__/__init__.cpython-312.pyc b/prompts/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..f3a8235 Binary files /dev/null and b/prompts/__pycache__/__init__.cpython-312.pyc differ diff --git a/prompts/__pycache__/condition_set_1_prompt.cpython-312.pyc b/prompts/__pycache__/condition_set_1_prompt.cpython-312.pyc new file mode 100644 index 0000000..886a234 Binary files /dev/null and b/prompts/__pycache__/condition_set_1_prompt.cpython-312.pyc differ diff --git a/prompts/__pycache__/condition_set_6_in_context_learning.cpython-312.pyc b/prompts/__pycache__/condition_set_6_in_context_learning.cpython-312.pyc new file mode 100644 index 0000000..b0de019 Binary files /dev/null and b/prompts/__pycache__/condition_set_6_in_context_learning.cpython-312.pyc differ diff --git a/prompts/condition_set_1_prompt.py b/prompts/condition_set_1_prompt.py new file mode 100644 index 0000000..947e3b6 --- /dev/null +++ b/prompts/condition_set_1_prompt.py @@ -0,0 +1,63 @@ +"""Condition set 1: prompt verbosity.""" + +STANDARD_PROMPT = """You are the red triangular agent solving a maze. Your mission is to navigate to the green square. + +mechanisms present: +{MAZE_SPECIFIC_MECHS} + +Available actions: +{DOMAIN_SPECIFIC_ACTION_LIST} + +Current observation: +{DOMAIN_SPECIFIC_OBSERVATION} # image for 2D and 3D; NL for NL + +Inventory: +{INVENTORY} + +Choose exactly ONE action to take from the available actions. + +Output only the action name. +""" + +EXPLICIT_PROMPT = """You are the red triangular agent solving a maze. Your mission is to navigate to the green square. + +mechanisms present: +{MAZE_SPECIFIC_MECHS} + +Detailed rules: +1. Keys only open doors with matching colors. +2. Keys are consumed immediately after opening a matching door. +3. Opened doors remain open permanently. +4. Switches toggle associated gates between open and closed states. +5. Walls and closed doors cannot be crossed. +6. The agent occupies exactly one cell at a time. +7. Invalid actions do not help progress toward the goal. + +Available actions: +{DOMAIN_SPECIFIC_ACTION_LIST} + +Current observation: +{DOMAIN_SPECIFIC_OBSERVATION} + +Inventory: +{INVENTORY} + +Choose exactly ONE action to take from the available actions. + +Output only the action name. +""" + +CONDITION_SET = { + "name": "Prompt", + "comparisons": [ + "Standard: goal + mechanism descriptions + action list", + "Verbose: Standard + explicit rules", + ], + "decision": "If delta < 5%, use Standard. If > 5%, use Verbose.", + "prompts": { + "standard": STANDARD_PROMPT, + "explicit": EXPLICIT_PROMPT, + }, +} + +PROMPTS = CONDITION_SET["prompts"] diff --git a/prompts/condition_set_2_observation_format.py b/prompts/condition_set_2_observation_format.py new file mode 100644 index 0000000..070aaad --- /dev/null +++ b/prompts/condition_set_2_observation_format.py @@ -0,0 +1,39 @@ +"""Condition set 2: observation format.""" + +from .condition_set_1_prompt import STANDARD_PROMPT + +IMAGE_PLUS_TEXT_PROMPT = STANDARD_PROMPT + +IMAGE_ONLY_PROMPT = """You are the red triangular agent solving a maze. Your mission is to navigate to the green square. + +mechanisms present: +{MAZE_SPECIFIC_MECHS} + +Available actions: +{DOMAIN_SPECIFIC_ACTION_LIST} + +Current observation: +{DOMAIN_SPECIFIC_OBSERVATION} + +Inventory: +{INVENTORY} + +Choose exactly ONE action to take from the available actions. + +Output only the action name. +""" + +CONDITION_SET = { + "name": "Observation format", + "comparisons": [ + "Image + text prompt", + "Image only (no text)", + ], + "decision": "Does text add meaningful signal?", + "prompts": { + "image_plus_text": IMAGE_PLUS_TEXT_PROMPT, + "image_only": IMAGE_ONLY_PROMPT, + }, +} + +PROMPTS = CONDITION_SET["prompts"] diff --git a/prompts/condition_set_3_context_window.py b/prompts/condition_set_3_context_window.py new file mode 100644 index 0000000..3d45331 --- /dev/null +++ b/prompts/condition_set_3_context_window.py @@ -0,0 +1,69 @@ +"""Condition set 3: context window.""" + +HISTORY_PROMPT = """You are the red triangular agent solving a maze. Your mission is to navigate to the green square. + +mechanisms present: +{MAZE_SPECIFIC_MECHS} + +Available actions: +{DOMAIN_SPECIFIC_ACTION_LIST} + +Observation history: + +Frame t-2: +{FRAME_T_MINUS_2} + +Frame t-1: +{FRAME_T_MINUS_1} + +Current frame: +{CURRENT_FRAME} + +Inventory: +{INVENTORY} + +Choose exactly ONE action to take from the available actions. + +Output only the action name. +""" + +TEXT_SUMMARY_PROMPT = """You are the red triangular agent solving a maze. Your mission is to navigate to the green square. + +mechanisms present: +{MAZE_SPECIFIC_MECHS} + +Available actions: +{DOMAIN_SPECIFIC_ACTION_LIST} + +Current observation: +{CURRENT_FRAME} + +Exploration summary: +{MECHANISMS_INTERACTED_WITH}.{SUBGOALS_ACHIEVED}.{PATH_IN_LAST_10_FRAMES}. +# Example: you've interacted with the yellow key and the yellow door. +# You've opened the yellow door. +# In the last 10 frames, you've traveled from [1, 1] to [1, 10] + +Inventory: +{INVENTORY} + +Choose exactly ONE action to take from the available actions. + +Output only the action name. +""" + +CONDITION_SET = { + "name": "Context window", + "comparisons": [ + "0 history (current frame only)", + "Last 3 frames", + "Current frame + text summary of prior actions", + ], + "decision": "Is there a cheap alternative to feeding multiple frames?", + "prompts": { + "history": HISTORY_PROMPT, + "text_summary": TEXT_SUMMARY_PROMPT, + }, +} + +PROMPTS = CONDITION_SET["prompts"] diff --git a/prompts/condition_set_4_action_space.py b/prompts/condition_set_4_action_space.py new file mode 100644 index 0000000..9ea8632 --- /dev/null +++ b/prompts/condition_set_4_action_space.py @@ -0,0 +1,62 @@ +"""Condition set 4: action space.""" + +EGOCENTRIC_PROMPT = """You are the red triangular agent solving a maze. Your mission is to navigate to the green square. + +mechanisms present: +{MAZE_SPECIFIC_MECHS} + +Available actions: +- TURN_LEFT +- TURN_RIGHT +- MOVE_FORWARD +- INTERACT + +Current observation: +{DOMAIN_SPECIFIC_OBSERVATION} + +Inventory: +{INVENTORY} + +Choose exactly ONE action to take from the available actions. + +Output only the action name. +Output only the action name. +""" + +CARDINAL_PROMPT = """You are the red triangular agent solving a maze. Your mission is to navigate to the green square. + +mechanisms present: +{MAZE_SPECIFIC_MECHS} + +Available actions: +- MOVE_NORTH +- MOVE_SOUTH +- MOVE_EAST +- MOVE_WEST +- INTERACT + +Current observation: +{DOMAIN_SPECIFIC_OBSERVATION} + +Inventory: +{INVENTORY} + +Choose exactly ONE action to take from the available actions. + +Output only the action name. +""" + +CONDITION_SET = { + "name": "Action space", + "comparisons": [ + "Egocentric: TURN_LEFT, TURN_RIGHT, MOVE_FORWARD, INTERACT", + "Cardinal: MOVE_NORTH/SOUTH/EAST/WEST, INTERACT", + ], + "decision": "If delta is trivial (<5%), go egocentric. If massive (>15%), reassess.", + "prompts": { + "egocentric": EGOCENTRIC_PROMPT, + "cardinal": CARDINAL_PROMPT, + }, +} + +PROMPTS = CONDITION_SET["prompts"] diff --git a/prompts/condition_set_5_querying_strategy.py b/prompts/condition_set_5_querying_strategy.py new file mode 100644 index 0000000..cdcf7bf --- /dev/null +++ b/prompts/condition_set_5_querying_strategy.py @@ -0,0 +1,60 @@ +"""Condition set 5: querying strategy.""" + +SUBGOAL_PROMPT = """You are the red triangular agent solving a maze. Your mission is to navigate to the green square. + +mechanisms present: +{MAZE_SPECIFIC_MECHS} + +Current observation: +{DOMAIN_SPECIFIC_OBSERVATION} + +Inventory: +{INVENTORY} + +Before acting, produce a short high-level plan. + +Your plan should identify: +1. important keys +2. important doors or gates +3. likely exploration order +4. important switches or bottlenecks + +Output concise numbered subgoals only. +""" + +SUBGOAL_EXECUTION_PROMPT = """You are the red triangular agent solving a maze. Your mission is to navigate to the green square. + +mechanisms present: +{MAZE_SPECIFIC_MECHS} + +Current high-level plan: +{SUBGOAL_PLAN} + +Available actions: +{DOMAIN_SPECIFIC_ACTION_LIST} + +Current observation: +{DOMAIN_SPECIFIC_OBSERVATION} + +Inventory: +{INVENTORY} + +Choose exactly ONE action to take from the available actions. + +Output only the action name. +""" + +CONDITION_SET = { + "name": "Querying strategy", + "comparisons": [ + "Step-by-step: one action per query", + "Subgoal planning: model outputs plan first, then executes per-subgoal", + ], + "decision": "Does planning help? If yes, benchmark tests planning or execution?", + "prompts": { + "subgoal": SUBGOAL_PROMPT, + "subgoal_execution": SUBGOAL_EXECUTION_PROMPT, + }, +} + +PROMPTS = CONDITION_SET["prompts"] diff --git a/prompts/condition_set_6_in_context_learning.py b/prompts/condition_set_6_in_context_learning.py new file mode 100644 index 0000000..962ea21 --- /dev/null +++ b/prompts/condition_set_6_in_context_learning.py @@ -0,0 +1,80 @@ +"""Condition set 6: in-context learning.""" + +from .condition_set_1_prompt import STANDARD_PROMPT + +ONE_SHOT_PROMPT = """Example maze interaction: + +mechanisms present: +{ICL_MAZE_SPECIFIC_MECHS} + +Available actions: +{DOMAIN_SPECIFIC_ACTION_LIST} + +Observation: +{ICL_OBSERVATION_1} + +Inventory: +{ICL_INVENTORY_1} + +Action: +{ICL_ACTION_1} + +Observation: +{ICL_OBSERVATION_2} + +Inventory: +{ICL_INVENTORY_2} + +Action: +{ICL_ACTION_2} + +Observation: +{ICL_OBSERVATION_3} + +Inventory: +{ICL_INVENTORY_3} + +Action: +{ICL_ACTION_3} + +End of example. + +Now solve the following maze. + +You are the red triangular agent solving a maze. Your mission is to navigate to the green square. + +mechanisms present: +{MAZE_SPECIFIC_MECHS} + +Available actions: +{DOMAIN_SPECIFIC_ACTION_LIST} + +Current observation: +{DOMAIN_SPECIFIC_OBSERVATION} + +Inventory: +{INVENTORY} + +Choose exactly ONE action to take from the available actions. + +Output only the action name. +""" + +CONDITION_SET = { + "name": "In-context learning", + "comparisons": [ + "Zero-shot: no examples", + "1-shot: one example trajectory (different maze, same mechanism type)", + ], + "decision": ( + "If 1-shot dramatically improves performance, bottleneck is task " + "understanding, not capability. Determines zero-shot vs few-shot." + ), + "constraint": "ICL examples must not use evaluation mazes.", + "prompts": { + "zero_shot": STANDARD_PROMPT, + "one_shot": ONE_SHOT_PROMPT, + }, +} + +PROMPTS = CONDITION_SET["prompts"] diff --git a/prompts/exp_design.py b/prompts/exp_design.py new file mode 100644 index 0000000..3bd3260 --- /dev/null +++ b/prompts/exp_design.py @@ -0,0 +1,17 @@ +"""Experiment 3 prompt condition-set registry.""" + +from .condition_set_1_prompt import CONDITION_SET as CONDITION_SET_1 +from .condition_set_2_observation_format import CONDITION_SET as CONDITION_SET_2 +from .condition_set_3_context_window import CONDITION_SET as CONDITION_SET_3 +from .condition_set_4_action_space import CONDITION_SET as CONDITION_SET_4 +from .condition_set_5_querying_strategy import CONDITION_SET as CONDITION_SET_5 +from .condition_set_6_in_context_learning import CONDITION_SET as CONDITION_SET_6 + +CONDITION_SETS = { + CONDITION_SET_1["name"]: CONDITION_SET_1, + CONDITION_SET_2["name"]: CONDITION_SET_2, + CONDITION_SET_3["name"]: CONDITION_SET_3, + CONDITION_SET_4["name"]: CONDITION_SET_4, + CONDITION_SET_5["name"]: CONDITION_SET_5, + CONDITION_SET_6["name"]: CONDITION_SET_6, +}