ProsusAI · kevinngo1304 · May 26, 2026 · May 26, 2026 · May 26, 2026 · May 28, 2026
diff --git a/.gitignore b/.gitignore
@@ -16,6 +16,7 @@ venv/
 # IDEs
 .vscode/
 .idea/
+.claude/
 
 # Build files
 dist/

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,18 @@ All notable changes to Murphy will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
+## [1.2.0] - 2026-06-02
+
+### Added
+- Lite mode (`--lite` CLI flag / `lite: true` in the REST API) for a faster, simpler run aimed at quick product feedback: Murphy builds a compact persona plan directly from the goal or available analysis, then runs a lighter browser-agent prompt
+- Lite runs return a structured `LiteResult` per scenario with a 1–10 `grade` plus `flaws`, `improvements`, `fixes`, and `other_feedback`, summarised in a dedicated terminal output
+- `lite` field on the `/generate-plan`, `/evaluate`, and `/execute` REST API request models
+- `LITE_MODE.md` documentation describing the mode, what it skips, and how to run it
+
+### Changed
+- Lite mode skips LLM test generation, the Murphy judge, full JSON/Markdown report generation, and interactive review pauses
+- Disabled the unused `write_file` tool in Murphy runs
+
 ## [1.1.0] - 2026-04-07
 
 ### Added

diff --git a/LITE_MODE.md b/LITE_MODE.md
@@ -0,0 +1,54 @@
+# Murphy Lite Mode
+
+Lite mode is a faster, simpler Murphy run for quick product feedback. It is enabled with `--lite` in the CLI or `lite: true` in the REST API.
+
+## What It Skips
+
+- LLM test generation
+- Interactive feature and test-plan review pauses
+- Murphy judge calls
+- Full JSON and Markdown report generation
+
+## What It Returns
+
+Each scenario returns a structured `LiteResult`:
+
+- `grade`: 1-10 overall experience score
+- `flaws`: concrete problems or blockers
+- `improvements`: product or UX improvements
+- `fixes`: implementation-level fixes
+- `other_feedback`: additional useful observations
+
+## CLI
+
+```bash
+uv run murphy --url https://example.com --goal "Test agent creation flow" --lite
+```
+
+You can still use `--max-tests`, `--parallel`, `--provider`, `--model`, `--auth`, `--no-auth`, `--features`, and `--plan`.
+
+## REST
+
+Set `lite: true` on `/generate-plan`, `/evaluate`, or `/execute`.
+
+```json
+{
+  "url": "https://example.com",
+  "goal": "Test agent creation flow",
+  "max_tests": 1,
+  "lite": true
+}
+```
+
+## Speed Experiment
+
+Use the manual experiment runner:
+
+```bash
+uv run python exp_2/lite_speed/run_compare.py \
+  --url https://work.toqan.ai \
+  --goal "Test agent creation flow" \
+  --max-tests 1 \
+  --parallel 1 \
+  --repetitions 1
+```
diff --git a/README.md b/README.md
@@ -78,6 +78,9 @@ uv run murphy --url https://example.com
 # Goal-directed: explores with focus, skips feature discovery, generates plan directly
 uv run murphy --url https://example.com --goal "test the checkout flow"
 
+# Lite mode: faster, simpler run that skips test generation, judge, and reports
+uv run murphy --url https://example.com --goal "test the checkout flow" --lite
+
 # Site requires login — opens browser for manual auth first (local only, not Docker)
 uv run murphy --url https://example.com --auth
 
@@ -115,6 +118,8 @@ An AI agent explores the site with the given goal in mind, then synthesizes a te
 
 **Execution (both strategies):** An AI agent runs each test scenario in a real browser, and a separate judge LLM evaluates pass/fail. Saves `evaluation_report.json` and `evaluation_report.md`.
 
+**Lite mode (`--lite`):** Murphy builds a compact persona plan directly from the goal or available analysis, then runs a lighter browser-agent prompt that returns structured `flaws`, `improvements`, `fixes`, and `other_feedback`. It skips LLM test generation, the judge, report generation, and interactive review pauses.
+
 You can resume from any point by passing `--features` or `--plan` with a previously generated (and optionally edited) file.
 
 ## Output

diff --git a/murphy/api/cli.py b/murphy/api/cli.py
@@ -7,6 +7,7 @@
     murphy --url https://example.com --features features.md     # skip analysis, load features from file
     murphy --url https://example.com --plan plan.yaml           # skip analysis + generation, load test plan
     murphy --url https://example.com --goal "test the checkout flow"
+    murphy --url https://example.com --goal "test the checkout flow" --lite
 """
 
 from __future__ import annotations
@@ -26,7 +27,7 @@
 
 if TYPE_CHECKING:
 	from murphy.api.server import ServerState
-	from murphy.models import TestPlan, TestResult
+	from murphy.models import TestPlan, TestResult, TokenUsage, WebsiteAnalysis
 
 load_dotenv()
 
@@ -40,6 +41,35 @@
 )
 
 
+def _write_reports_and_log_results(
+	url: str,
+	analysis: WebsiteAnalysis | None,
+	results: list[TestResult],
+	output_dir: Path,
+	*,
+	use_lite: bool,
+	persona_discovery_tokens: TokenUsage | None = None,
+	murphy_tokens: TokenUsage | None = None,
+) -> None:
+	"""Write report artifacts when analysis context exists, then log the mode-specific terminal summary."""
+	if analysis:
+		from murphy.core.summary import write_reports_and_print
+
+		write_reports_and_print(
+			url,
+			analysis,
+			results,
+			output_dir,
+			persona_discovery_tokens=persona_discovery_tokens,
+			murphy_tokens=murphy_tokens,
+		)
+	elif not use_lite:
+		_log_results_summary(results)
+
+	if use_lite:
+		_log_lite_summary(results)
+
+
 def main() -> int:
 	parser = argparse.ArgumentParser(
 		prog='murphy',
@@ -51,6 +81,7 @@ def main() -> int:
 	parser.add_argument('--no-auth', action='store_true', help='Skip auth detection entirely, treat site as public')
 	parser.add_argument('--features', help='Path to existing features markdown (skips analysis, goes to test generation)')
 	parser.add_argument('--plan', help='Path to existing YAML test plan (skips analysis + test generation)')
+	parser.add_argument('--lite', action='store_true', help='Run faster lite mode: skip test generation and judge')
 	parser.add_argument('--max-tests', type=int, default=None, help='Max test scenarios (default: number of personas)')
 	parser.add_argument(
 		'--provider', default='openai', help='LLM provider (default: openai). e.g. google, anthropic, azure, mistral'
@@ -119,7 +150,7 @@ async def _async_main(args: argparse.Namespace) -> None:
 	from murphy.browser.patches import apply as apply_patches
 	from murphy.core.analysis import analyze_website
 	from murphy.core.execution import execute_tests_with_session
-	from murphy.core.generation import explore_and_generate_plan, generate_tests
+	from murphy.core.generation import explore_and_generate_plan, generate_tests, make_lite_plan
 	from murphy.core.summary import build_summary, write_reports_and_print
 	from murphy.io.features_io import read_features_markdown, write_features_markdown
 	from murphy.io.fixtures import ensure_dummy_fixture_files
@@ -222,6 +253,7 @@ async def _async_main(args: argparse.Namespace) -> None:
 
 		# ── Phase 1–2: Discover features & generate plan ──
 		use_exploration_first = bool(args.goal and not args.features and not args.plan)
+		use_lite = bool(args.lite)
 
 		if args.plan:
 			# Skip both analysis and test generation
@@ -231,6 +263,27 @@ async def _async_main(args: argparse.Namespace) -> None:
 			if url != args.url:
 				logger.warning('Plan URL (%s) differs from --url (%s). Using --url.', url, args.url)
 			logger.info('Loaded %d scenarios from %s', len(test_plan.scenarios), plan_path)
+		elif use_lite:
+			if args.features:
+				features_path = Path(args.features)
+				assert features_path.exists(), f'Features file not found: {features_path}'
+				analysis = read_features_markdown(features_path)
+				logger.info('Loaded %d features from %s', len(analysis.features), features_path)
+			elif not args.goal:
+				analysis = await analyze_website(args.url, llm, goal=args.goal, browser_session=browser_session)
+				features_path = write_features_markdown(analysis, output_dir)
+				logger.info('\n  Features saved: %s', features_path)
+
+			test_plan = make_lite_plan(
+				args.url,
+				goal=args.goal,
+				analysis=analysis,
+				max_tests=args.max_tests,
+				discovered_personas=discovered_personas,
+			)
+			plan_path = save_test_plan(args.url, test_plan, output_dir)
+			logger.info('\n  Lite plan saved: %s', plan_path)
+			logger.info('  Using %d lite scenarios.\n', len(test_plan.scenarios))
 		elif use_exploration_first:
 			# Exploration-first path: explore → summarize → synthesize plan
 			test_plan = await explore_and_generate_plan(
@@ -350,18 +403,18 @@ def _on_test_complete(results: list[TestResult]) -> None:
 				judge_llm=judge_llm,
 				output_dir=output_dir,
 				discovered_personas=discovered_personas,
+				use_lite=use_lite,
+				analysis=analysis,
+			)
+			_write_reports_and_log_results(
+				args.url,
+				analysis,
+				results,
+				output_dir,
+				use_lite=use_lite,
+				persona_discovery_tokens=persona_discovery_tokens,
+				murphy_tokens=_get_murphy_tokens(),
 			)
-			if analysis:
-				write_reports_and_print(
-					args.url,
-					analysis,
-					results,
-					output_dir,
-					persona_discovery_tokens=persona_discovery_tokens,
-					murphy_tokens=_get_murphy_tokens(),
-				)
-			else:
-				_log_results_summary(results)
 			return
 
 		# ── Server UI mode (--ui) ──
@@ -384,6 +437,8 @@ async def _execute_fn(plan: TestPlan, state: ServerState) -> list[TestResult]:
 				judge_llm=judge_llm,
 				output_dir=output_dir,
 				discovered_personas=discovered_personas,
+				use_lite=use_lite,
+				analysis=analysis,
 			)
 
 		state = ServerState(
@@ -402,17 +457,15 @@ async def _execute_fn(plan: TestPlan, state: ServerState) -> list[TestResult]:
 			while True:
 				await asyncio.sleep(1)
 				if state.done and state.results and not getattr(state, '_reports_written', False):
-					if analysis:
-						write_reports_and_print(
-							args.url,
-							analysis,
-							state.results,
-							output_dir,
-							persona_discovery_tokens=persona_discovery_tokens,
-							murphy_tokens=_get_murphy_tokens(),
-						)
-					else:
-						_log_results_summary(state.results)
+					_write_reports_and_log_results(
+						args.url,
+						analysis,
+						state.results,
+						output_dir,
+						use_lite=use_lite,
+						persona_discovery_tokens=persona_discovery_tokens,
+						murphy_tokens=_get_murphy_tokens(),
+					)
 					state._reports_written = True  # type: ignore[attr-defined]
 		except KeyboardInterrupt:
 			pass
@@ -467,5 +520,24 @@ def _log_results_summary(results: list[TestResult]) -> None:
 	logger.info('\n  Pass rate: %s%% (%d/%d)', summary.pass_rate, summary.passed, summary.total)
 
 
+def _log_lite_summary(results: list[TestResult]) -> None:
+	logger.info('\n%s', '=' * 60)
+	logger.info('Lite Mode Complete — %d scenario(s)', len(results))
+	logger.info('%s', '=' * 60)
+	for result in results:
+		lite_result = result.lite_result
+		if lite_result is None:
+			logger.info('  [%s] no lite result — %s', result.scenario.test_persona, result.reason)
+			continue
+		logger.info(
+			'  [%s] grade=%d flaws=%d improvements=%d fixes=%d',
+			result.scenario.test_persona,
+			lite_result.grade,
+			len(lite_result.flaws),
+			len(lite_result.improvements),
+			len(lite_result.fixes),
+		)
+
+
 if __name__ == '__main__':
 	sys.exit(main())
diff --git a/murphy/api/request_models.py b/murphy/api/request_models.py
@@ -36,6 +36,7 @@ class GeneratePlanRequest(BaseModel):
 	analysis: Annotated[WebsiteAnalysis, BeforeValidator(_parse_json_string)]
 	max_tests: int = 8
 	goal: str | None = None
+	lite: bool = False
 	provider: str = 'openai'
 	model: str = 'gpt-5-mini'
 	webhook_url: str | None = None
@@ -55,6 +56,7 @@ class ExecuteRequest(BaseModel):
 	judge_model: str | None = None
 	max_steps: int = 15
 	max_concurrent: int = 3
+	lite: bool = False
 	webhook_url: str | None = None
 	async_mode: bool = Field(False, alias='async')
 
@@ -65,6 +67,7 @@ class EvaluateRequest(BaseModel):
 	url: str
 	goal: str | None = None
 	max_tests: int = 8
+	lite: bool = False
 	provider: str = 'openai'
 	model: str = 'gpt-5-mini'
 	judge_provider: str | None = None

diff --git a/murphy/api/rest.py b/murphy/api/rest.py
@@ -70,7 +70,7 @@ async def _core_generate_plan(req: GeneratePlanRequest) -> dict[str, Any]:
 	from murphy.core.pipeline import run_generate_plan
 
 	test_plan = await run_generate_plan(
-		req.url, req.analysis, req.model, provider=req.provider, max_tests=req.max_tests, goal=req.goal
+		req.url, req.analysis, req.model, provider=req.provider, max_tests=req.max_tests, goal=req.goal, lite=req.lite
 	)
 	return test_plan.model_dump()
 
@@ -101,6 +101,7 @@ async def _core_execute(req: ExecuteRequest) -> dict[str, Any]:
 		goal=req.goal,
 		max_steps=req.max_steps,
 		max_concurrent=req.max_concurrent,
+		lite=req.lite,
 	)
 	return ExecuteResult(results=results, summary=summary).model_dump()
 
@@ -109,7 +110,9 @@ async def _core_evaluate(req: EvaluateRequest) -> dict[str, Any]:
 	"""Run exploration-first evaluation: explore site → generate test plan."""
 	from murphy.core.pipeline import run_evaluate
 
-	test_plan = await run_evaluate(req.url, req.model, provider=req.provider, max_tests=req.max_tests, goal=req.goal)
+	test_plan = await run_evaluate(
+		req.url, req.model, provider=req.provider, max_tests=req.max_tests, goal=req.goal, lite=req.lite
+	)
 	return test_plan.model_dump()
 
 

diff --git a/murphy/api/templates.py b/murphy/api/templates.py
@@ -45,6 +45,7 @@
 .badge-medium { background: var(--text); color: #fff; }
 .badge-low { background: var(--gray); color: #fff; }
 .badge-pass { background: var(--green); color: #fff; }
+.badge-fail { background: var(--red); color: #fff; }
 .badge-fail-website { background: var(--red); color: #fff; }
 .badge-fail-test { background: var(--orange); color: #fff; }
 .test-name { font-weight: 600; flex: 1; font-size: .95rem; }
@@ -381,6 +382,7 @@ def render_results_html(
 
 	sections = [
 		('Passed', [r for r in results if r.success]),
+		('Failed', [r for r in results if r.success is not True and r.failure_category is None]),
 		('Failed — Website Issue', [r for r in results if r.failure_category == 'website_issue']),
 		('Failed — Test Limitation', [r for r in results if r.failure_category == 'test_limitation']),
 	]
@@ -395,6 +397,9 @@ def render_results_html(
 			if r.success:
 				badge_cls = 'badge-pass'
 				badge_text = 'PASS'
+			elif r.failure_category is None:
+				badge_cls = 'badge-fail'
+				badge_text = 'FAILED'
 			elif r.failure_category == 'website_issue':
 				badge_cls = 'badge-fail-website'
 				badge_text = 'WEBSITE ISSUE'

diff --git a/murphy/browser/cleanup.py b/murphy/browser/cleanup.py
@@ -140,6 +140,8 @@ def _find_stale_browser_pids() -> list[int]:
 		if not cmdline:
 			continue
 		if any(marker in cmdline for marker in profile_markers) and any(marker in cmdline for marker in browser_markers):
-			pids.add(proc.pid)
+			pid = proc.info.get('pid')
+			if isinstance(pid, int):
+				pids.add(pid)
 
 	return sorted(pids)
-Original file line number
+Diff line change
@@ Expand Up / @@ -16,6 +16,7 @@ venv/ @@
     # IDEs
     .vscode/
     .idea/
+    .claude/
     # Build files
     dist/
@@ Expand Down @@