diff --git a/loop.sh b/loop.sh index a34e327..ebefa46 100755 --- a/loop.sh +++ b/loop.sh @@ -191,19 +191,17 @@ fi # Two modes: # Interactive (default when TTY available): runs claude in full interactive mode. # The user sees the complete CC session (tool calls, file edits, etc.) in the terminal. -# Output is captured via `script` for verdict parsing. +# No output capture — state is tracked via prd.json and .verdict file. # Headless (no TTY or LOOP_HEADLESS=true): uses claude --print for fully autonomous operation. +# Output is captured to a temp file for verdict parsing. # -# The function prints the captured output to stdout for the caller to capture. +# The function prints captured output to stdout (headless) or nothing (interactive). run_agent() { local prompt="$1" - local output_file - output_file=$(mktemp) - LOOP_AGENT_TMPFILE="$output_file" # exposed for trap cleanup + local role="${2:-}" # "generator" or "evaluator" — used for verdict file - local prompt_file - prompt_file=$(mktemp) - printf '%s\n' "$prompt" > "$prompt_file" + # Clean up any previous verdict file + rm -f "$LOOP_DIR/.verdict" # Determine whether we can run interactively local has_tty=false @@ -213,52 +211,61 @@ run_agent() { # Run in subshell so a non-zero exit from the AI tool doesn't kill the loop. local agent_exit=0 - ( - case "$TOOL" in - claude) - if [ "$has_tty" = true ]; then - # Interactive mode: full CC session visible in terminal. - # Pass prompt as CLI argument so stdin stays as TTY (required for interactive UI). - # Use script to capture output while showing it live. - script -q "$output_file" \ - claude --dangerously-skip-permissions "$(cat "$prompt_file")" - else - # Headless mode: --print for autonomous operation + if [ "$has_tty" = true ]; then + # --- Interactive mode --- + # Run claude directly in the terminal — full interactive UI visible. + # No output capture. State tracked via files (prd.json, .verdict). + ( + case "$TOOL" in + claude) + claude --dangerously-skip-permissions "$prompt" + ;; + amp) + amp --dangerously-allow-all "$prompt" + ;; + *) + log "ERROR: Unknown tool '$TOOL'" + exit 1 + ;; + esac + ) || agent_exit=$? + + # In interactive mode, read verdict from file if evaluator wrote one + if [ "$role" = "evaluator" ] && [ -f "$LOOP_DIR/.verdict" ]; then + cat "$LOOP_DIR/.verdict" + fi + else + # --- Headless mode --- + local output_file + output_file=$(mktemp) + LOOP_AGENT_TMPFILE="$output_file" + + ( + case "$TOOL" in + claude) printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \ claude --dangerously-skip-permissions --output-format text \ --print 2>&1 > "$output_file" - fi - ;; - amp) - if [ "$has_tty" = true ]; then - script -q "$output_file" \ - amp --dangerously-allow-all "$(cat "$prompt_file")" - else + ;; + amp) printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \ amp --dangerously-allow-all 2>&1 > "$output_file" - fi - ;; - *) - log "ERROR: Unknown tool '$TOOL'" - exit 1 - ;; - esac - ) || agent_exit=$? + ;; + *) + log "ERROR: Unknown tool '$TOOL'" + exit 1 + ;; + esac + ) || agent_exit=$? - rm -f "$prompt_file" + if [ "$agent_exit" -ne 0 ] && [ ! -s "$output_file" ]; then + log "WARNING: Agent exited with code $agent_exit and produced no output." + fi - if [ "$agent_exit" -ne 0 ] && [ ! -s "$output_file" ]; then - log "WARNING: Agent exited with code $agent_exit and produced no output." - fi - - # Strip ANSI escape codes for clean verdict parsing - if command -v sed &>/dev/null; then - sed 's/\x1b\[[0-9;]*[a-zA-Z]//g' "$output_file" - else cat "$output_file" + rm -f "$output_file" + LOOP_AGENT_TMPFILE="" fi - rm -f "$output_file" - LOOP_AGENT_TMPFILE="" } # --- Parse evaluator verdict --- @@ -346,9 +353,11 @@ while [ "$ITERATION" -lt "$MAX_ITERATIONS" ]; do exit 0 fi - GENERATOR_OUTPUT=$(run_agent "$GENERATOR_PROMPT") + GENERATOR_OUTPUT=$(run_agent "$GENERATOR_PROMPT" "generator") - if [ -z "$GENERATOR_OUTPUT" ]; then + # In interactive mode, generator output is empty (displayed in terminal, not captured). + # State is tracked via prd.json — the generator updates it directly. + if [ "${LOOP_HEADLESS:-false}" = "true" ] && [ -z "$GENERATOR_OUTPUT" ]; then log "WARNING: Generator produced empty output (timeout or crash). Skipping to next iteration." continue fi @@ -390,11 +399,16 @@ while [ "$ITERATION" -lt "$MAX_ITERATIONS" ]; do fi EVAL_PROMPT=$(build_prompt "evaluator" "$MODE") - EVAL_OUTPUT=$(run_agent "$EVAL_PROMPT") + EVAL_OUTPUT=$(run_agent "$EVAL_PROMPT" "evaluator") if [ -z "$EVAL_OUTPUT" ]; then - log "WARNING: Evaluator produced empty output (timeout or crash). Treating as REJECT." - EVAL_OUTPUT="REJECTEvaluator produced no output" + # In interactive mode, check the verdict file + if [ -f "$LOOP_DIR/.verdict" ]; then + EVAL_OUTPUT=$(cat "$LOOP_DIR/.verdict") + else + log "WARNING: Evaluator produced no output and no verdict file. Treating as REJECT." + EVAL_OUTPUT="REJECTEvaluator produced no output" + fi fi VERDICT=$(parse_verdict "$EVAL_OUTPUT") diff --git a/prompts/evaluator/_base.md b/prompts/evaluator/_base.md index 9ab7165..465590d 100644 --- a/prompts/evaluator/_base.md +++ b/prompts/evaluator/_base.md @@ -42,16 +42,18 @@ Evaluate story **`{{CURRENT_STORY_ID}}`**. This is the story the generator just ## Verdict Format -You MUST end your response with EXACTLY ONE of these verdict blocks: +You MUST do TWO things when delivering your verdict: -### If the story genuinely passes all criteria: +### 1. Write the verdict to a file +Write your verdict to `{{LOOP_DIR}}/.verdict` using the Write tool. This file is how the loop harness reads your decision. + +**If PASS:** ``` PASS ``` -### If any criterion is not met or issues are found: - +**If REJECT:** ``` REJECT @@ -61,6 +63,10 @@ Be concrete — "the function doesn't handle null input" not "there might be edg ``` +### 2. Also include the verdict in your response + +End your response with the same verdict block so it's visible in the terminal output. + ## What Warrants Rejection - ANY acceptance criterion not actually met (not "mostly met" — MET)