Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "ogbench"]
path = ogbench
url = ./ogbench
21 changes: 21 additions & 0 deletions .vscode/launch.json
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add this to .gitignore

Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
// add python paths
"env": {
"PYTHONPATH": "${workspaceFolder}/ogbench:${workspaceFolder}/ogbench/ogbench:${workspaceFolder}/ogbench/ogbench/procgen"
}

},

]
}
1 change: 1 addition & 0 deletions ogbench
Submodule ogbench added at d75094
5 changes: 5 additions & 0 deletions prompts/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Prompt condition sets for maze evaluation experiments."""

from .exp_design import CONDITION_SETS

__all__ = ["CONDITION_SETS"]
Binary file added prompts/__pycache__/__init__.cpython-312.pyc
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please delete this and add .pyc files to .gitignore

Binary file not shown.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please delete this and add .pyc files to .gitignore

Binary file not shown.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please delete this and add .pyc files to .gitignore

Binary file not shown.
63 changes: 63 additions & 0 deletions prompts/condition_set_1_prompt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Condition set 1: prompt verbosity."""

STANDARD_PROMPT = """You are the red triangular agent solving a maze. Your mission is to navigate to the green square.

mechanisms present:
{MAZE_SPECIFIC_MECHS}

Available actions:
{DOMAIN_SPECIFIC_ACTION_LIST}

Current observation:
{DOMAIN_SPECIFIC_OBSERVATION} # image for 2D and 3D; NL for NL

Inventory:
{INVENTORY}

Choose exactly ONE action to take from the available actions.

Output only the action name.
"""

EXPLICIT_PROMPT = """You are the red triangular agent solving a maze. Your mission is to navigate to the green square.

mechanisms present:
{MAZE_SPECIFIC_MECHS}

Detailed rules:
1. Keys only open doors with matching colors.
2. Keys are consumed immediately after opening a matching door.
3. Opened doors remain open permanently.
4. Switches toggle associated gates between open and closed states.
5. Walls and closed doors cannot be crossed.
6. The agent occupies exactly one cell at a time.
7. Invalid actions do not help progress toward the goal.

Available actions:
{DOMAIN_SPECIFIC_ACTION_LIST}

Current observation:
{DOMAIN_SPECIFIC_OBSERVATION}

Inventory:
{INVENTORY}

Choose exactly ONE action to take from the available actions.

Output only the action name.
"""

CONDITION_SET = {
"name": "Prompt",
"comparisons": [
"Standard: goal + mechanism descriptions + action list",
"Verbose: Standard + explicit rules",
],
"decision": "If delta < 5%, use Standard. If > 5%, use Verbose.",
"prompts": {
"standard": STANDARD_PROMPT,
"explicit": EXPLICIT_PROMPT,
},
}

PROMPTS = CONDITION_SET["prompts"]
39 changes: 39 additions & 0 deletions prompts/condition_set_2_observation_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Condition set 2: observation format."""

from .condition_set_1_prompt import STANDARD_PROMPT

IMAGE_PLUS_TEXT_PROMPT = STANDARD_PROMPT

IMAGE_ONLY_PROMPT = """You are the red triangular agent solving a maze. Your mission is to navigate to the green square.

mechanisms present:
{MAZE_SPECIFIC_MECHS}

Available actions:
{DOMAIN_SPECIFIC_ACTION_LIST}

Current observation:
{DOMAIN_SPECIFIC_OBSERVATION}

Inventory:
{INVENTORY}

Choose exactly ONE action to take from the available actions.

Output only the action name.
"""

CONDITION_SET = {
"name": "Observation format",
"comparisons": [
"Image + text prompt",
"Image only (no text)",
],
"decision": "Does text add meaningful signal?",
"prompts": {
"image_plus_text": IMAGE_PLUS_TEXT_PROMPT,
"image_only": IMAGE_ONLY_PROMPT,
},
}

PROMPTS = CONDITION_SET["prompts"]
69 changes: 69 additions & 0 deletions prompts/condition_set_3_context_window.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""Condition set 3: context window."""

HISTORY_PROMPT = """You are the red triangular agent solving a maze. Your mission is to navigate to the green square.

mechanisms present:
{MAZE_SPECIFIC_MECHS}

Available actions:
{DOMAIN_SPECIFIC_ACTION_LIST}

Observation history:

Frame t-2:
{FRAME_T_MINUS_2}

Frame t-1:
{FRAME_T_MINUS_1}

Current frame:
{CURRENT_FRAME}

Inventory:
{INVENTORY}

Choose exactly ONE action to take from the available actions.

Output only the action name.
"""

TEXT_SUMMARY_PROMPT = """You are the red triangular agent solving a maze. Your mission is to navigate to the green square.

mechanisms present:
{MAZE_SPECIFIC_MECHS}

Available actions:
{DOMAIN_SPECIFIC_ACTION_LIST}

Current observation:
{CURRENT_FRAME}

Exploration summary:
{MECHANISMS_INTERACTED_WITH}.{SUBGOALS_ACHIEVED}.{PATH_IN_LAST_10_FRAMES}.
# Example: you've interacted with the yellow key and the yellow door.
# You've opened the yellow door.
# In the last 10 frames, you've traveled from [1, 1] to [1, 10]

Inventory:
{INVENTORY}

Choose exactly ONE action to take from the available actions.

Output only the action name.
"""

CONDITION_SET = {
"name": "Context window",
"comparisons": [
"0 history (current frame only)",
"Last 3 frames",
"Current frame + text summary of prior actions",
],
"decision": "Is there a cheap alternative to feeding multiple frames?",
"prompts": {
"history": HISTORY_PROMPT,
"text_summary": TEXT_SUMMARY_PROMPT,
},
}

PROMPTS = CONDITION_SET["prompts"]
62 changes: 62 additions & 0 deletions prompts/condition_set_4_action_space.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""Condition set 4: action space."""

EGOCENTRIC_PROMPT = """You are the red triangular agent solving a maze. Your mission is to navigate to the green square.

mechanisms present:
{MAZE_SPECIFIC_MECHS}

Available actions:
- TURN_LEFT
- TURN_RIGHT
- MOVE_FORWARD
- INTERACT

Current observation:
{DOMAIN_SPECIFIC_OBSERVATION}

Inventory:
{INVENTORY}

Choose exactly ONE action to take from the available actions.

Output only the action name.
Output only the action name.
"""

CARDINAL_PROMPT = """You are the red triangular agent solving a maze. Your mission is to navigate to the green square.

mechanisms present:
{MAZE_SPECIFIC_MECHS}

Available actions:
- MOVE_NORTH
- MOVE_SOUTH
- MOVE_EAST
- MOVE_WEST
- INTERACT

Current observation:
{DOMAIN_SPECIFIC_OBSERVATION}

Inventory:
{INVENTORY}

Choose exactly ONE action to take from the available actions.

Output only the action name.
"""

CONDITION_SET = {
"name": "Action space",
"comparisons": [
"Egocentric: TURN_LEFT, TURN_RIGHT, MOVE_FORWARD, INTERACT",
"Cardinal: MOVE_NORTH/SOUTH/EAST/WEST, INTERACT",
],
"decision": "If delta is trivial (<5%), go egocentric. If massive (>15%), reassess.",
"prompts": {
"egocentric": EGOCENTRIC_PROMPT,
"cardinal": CARDINAL_PROMPT,
},
}

PROMPTS = CONDITION_SET["prompts"]
60 changes: 60 additions & 0 deletions prompts/condition_set_5_querying_strategy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""Condition set 5: querying strategy."""

SUBGOAL_PROMPT = """You are the red triangular agent solving a maze. Your mission is to navigate to the green square.

mechanisms present:
{MAZE_SPECIFIC_MECHS}

Current observation:
{DOMAIN_SPECIFIC_OBSERVATION}

Inventory:
{INVENTORY}

Before acting, produce a short high-level plan.

Your plan should identify:
1. important keys
2. important doors or gates
3. likely exploration order
4. important switches or bottlenecks

Output concise numbered subgoals only.
"""

SUBGOAL_EXECUTION_PROMPT = """You are the red triangular agent solving a maze. Your mission is to navigate to the green square.

mechanisms present:
{MAZE_SPECIFIC_MECHS}

Current high-level plan:
{SUBGOAL_PLAN}

Available actions:
{DOMAIN_SPECIFIC_ACTION_LIST}

Current observation:
{DOMAIN_SPECIFIC_OBSERVATION}

Inventory:
{INVENTORY}

Choose exactly ONE action to take from the available actions.

Output only the action name.
"""

CONDITION_SET = {
"name": "Querying strategy",
"comparisons": [
"Step-by-step: one action per query",
"Subgoal planning: model outputs plan first, then executes per-subgoal",
],
"decision": "Does planning help? If yes, benchmark tests planning or execution?",
"prompts": {
"subgoal": SUBGOAL_PROMPT,
"subgoal_execution": SUBGOAL_EXECUTION_PROMPT,
},
}

PROMPTS = CONDITION_SET["prompts"]
Loading