feat: true interactive mode — run claude directly, verdict via file, no script/capture

2026-03-27 13:07:25 -04:00
parent 5e456cff6d
commit 1e7f7ea6ed
2 changed files with 74 additions and 54 deletions
--- a/loop.sh
+++ b/loop.sh
@@ -191,19 +191,17 @@ fi
 # Two modes:
 #   Interactive (default when TTY available): runs claude in full interactive mode.
 #     The user sees the complete CC session (tool calls, file edits, etc.) in the terminal.
-#     Output is captured via `script` for verdict parsing.
+#     No output capture — state is tracked via prd.json and .verdict file.
 #   Headless (no TTY or LOOP_HEADLESS=true): uses claude --print for fully autonomous operation.
+#     Output is captured to a temp file for verdict parsing.
 #
-# The function prints the captured output to stdout for the caller to capture.
+# The function prints captured output to stdout (headless) or nothing (interactive).
 run_agent() {
    local prompt="$1"
-    local output_file
-    output_file=$(mktemp)
-    LOOP_AGENT_TMPFILE="$output_file"  # exposed for trap cleanup
+    local role="${2:-}"  # "generator" or "evaluator" — used for verdict file

-    local prompt_file
-    prompt_file=$(mktemp)
-    printf '%s\n' "$prompt" > "$prompt_file"
+    # Clean up any previous verdict file
+    rm -f "$LOOP_DIR/.verdict"

    # Determine whether we can run interactively
    local has_tty=false
@@ -213,30 +211,17 @@ run_agent() {

    # Run in subshell so a non-zero exit from the AI tool doesn't kill the loop.
    local agent_exit=0
+    if [ "$has_tty" = true ]; then
+        # --- Interactive mode ---
+        # Run claude directly in the terminal — full interactive UI visible.
+        # No output capture. State tracked via files (prd.json, .verdict).
        (
            case "$TOOL" in
                claude)
-                if [ "$has_tty" = true ]; then
-                    # Interactive mode: full CC session visible in terminal.
-                    # Pass prompt as CLI argument so stdin stays as TTY (required for interactive UI).
-                    # Use script to capture output while showing it live.
-                    script -q "$output_file" \
-                        claude --dangerously-skip-permissions "$(cat "$prompt_file")"
-                else
-                    # Headless mode: --print for autonomous operation
-                    printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
-                        claude --dangerously-skip-permissions --output-format text \
-                        --print 2>&1 > "$output_file"
-                fi
+                    claude --dangerously-skip-permissions "$prompt"
                    ;;
                amp)
-                if [ "$has_tty" = true ]; then
-                    script -q "$output_file" \
-                        amp --dangerously-allow-all "$(cat "$prompt_file")"
-                else
-                    printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
-                        amp --dangerously-allow-all 2>&1 > "$output_file"
-                fi
+                    amp --dangerously-allow-all "$prompt"
                    ;;
                *)
                    log "ERROR: Unknown tool '$TOOL'"
@@ -245,20 +230,42 @@ run_agent() {
            esac
        ) || agent_exit=$?

-    rm -f "$prompt_file"
+        # In interactive mode, read verdict from file if evaluator wrote one
+        if [ "$role" = "evaluator" ] && [ -f "$LOOP_DIR/.verdict" ]; then
+            cat "$LOOP_DIR/.verdict"
+        fi
+    else
+        # --- Headless mode ---
+        local output_file
+        output_file=$(mktemp)
+        LOOP_AGENT_TMPFILE="$output_file"
+
+        (
+            case "$TOOL" in
+                claude)
+                    printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
+                        claude --dangerously-skip-permissions --output-format text \
+                        --print 2>&1 > "$output_file"
+                    ;;
+                amp)
+                    printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
+                        amp --dangerously-allow-all 2>&1 > "$output_file"
+                    ;;
+                *)
+                    log "ERROR: Unknown tool '$TOOL'"
+                    exit 1
+                    ;;
+            esac
+        ) || agent_exit=$?

        if [ "$agent_exit" -ne 0 ] && [ ! -s "$output_file" ]; then
            log "WARNING: Agent exited with code $agent_exit and produced no output."
        fi

-    # Strip ANSI escape codes for clean verdict parsing
-    if command -v sed &>/dev/null; then
-        sed 's/\x1b\[[0-9;]*[a-zA-Z]//g' "$output_file"
-    else
        cat "$output_file"
-    fi
        rm -f "$output_file"
        LOOP_AGENT_TMPFILE=""
+    fi
 }

 # --- Parse evaluator verdict ---
@@ -346,9 +353,11 @@ while [ "$ITERATION" -lt "$MAX_ITERATIONS" ]; do
        exit 0
    fi

-    GENERATOR_OUTPUT=$(run_agent "$GENERATOR_PROMPT")
+    GENERATOR_OUTPUT=$(run_agent "$GENERATOR_PROMPT" "generator")

-    if [ -z "$GENERATOR_OUTPUT" ]; then
+    # In interactive mode, generator output is empty (displayed in terminal, not captured).
+    # State is tracked via prd.json — the generator updates it directly.
+    if [ "${LOOP_HEADLESS:-false}" = "true" ] && [ -z "$GENERATOR_OUTPUT" ]; then
        log "WARNING: Generator produced empty output (timeout or crash). Skipping to next iteration."
        continue
    fi
@@ -390,12 +399,17 @@ while [ "$ITERATION" -lt "$MAX_ITERATIONS" ]; do
        fi

        EVAL_PROMPT=$(build_prompt "evaluator" "$MODE")
-        EVAL_OUTPUT=$(run_agent "$EVAL_PROMPT")
+        EVAL_OUTPUT=$(run_agent "$EVAL_PROMPT" "evaluator")

        if [ -z "$EVAL_OUTPUT" ]; then
-            log "WARNING: Evaluator produced empty output (timeout or crash). Treating as REJECT."
+            # In interactive mode, check the verdict file
+            if [ -f "$LOOP_DIR/.verdict" ]; then
+                EVAL_OUTPUT=$(cat "$LOOP_DIR/.verdict")
+            else
+                log "WARNING: Evaluator produced no output and no verdict file. Treating as REJECT."
                EVAL_OUTPUT="<verdict>REJECT</verdict><rejection_reason>Evaluator produced no output</rejection_reason>"
            fi
+        fi

        VERDICT=$(parse_verdict "$EVAL_OUTPUT")

--- a/prompts/evaluator/_base.md
+++ b/prompts/evaluator/_base.md
@@ -42,16 +42,18 @@ Evaluate story **`{{CURRENT_STORY_ID}}`**. This is the story the generator just

 ## Verdict Format

-You MUST end your response with EXACTLY ONE of these verdict blocks:
+You MUST do TWO things when delivering your verdict:

-### If the story genuinely passes all criteria:
+### 1. Write the verdict to a file

+Write your verdict to `{{LOOP_DIR}}/.verdict` using the Write tool. This file is how the loop harness reads your decision.
+
+**If PASS:**
 ```
 <verdict>PASS</verdict>
 ```

-### If any criterion is not met or issues are found:
-
+**If REJECT:**
 ```
 <verdict>REJECT</verdict>
 <rejection_reason>
@@ -61,6 +63,10 @@ Be concrete — "the function doesn't handle null input" not "there might be edg
 </rejection_reason>
 ```

+### 2. Also include the verdict in your response
+
+End your response with the same verdict block so it's visible in the terminal output.
+
 ## What Warrants Rejection

 - ANY acceptance criterion not actually met (not "mostly met" — MET)