feat: true interactive mode — run claude directly, verdict via file, no script/capture

2026-03-27 13:07:25 -04:00
parent 5e456cff6d
commit 1e7f7ea6ed
2 changed files with 74 additions and 54 deletions
--- a/loop.sh
+++ b/loop.sh
@@ -191,19 +191,17 @@ fi
 # Two modes:
 #   Interactive (default when TTY available): runs claude in full interactive mode.
 #     The user sees the complete CC session (tool calls, file edits, etc.) in the terminal.
-#     Output is captured via `script` for verdict parsing.
+#     No output capture — state is tracked via prd.json and .verdict file.
 #   Headless (no TTY or LOOP_HEADLESS=true): uses claude --print for fully autonomous operation.
 #     Output is captured to a temp file for verdict parsing.
 #
-# The function prints the captured output to stdout for the caller to capture.
+# The function prints captured output to stdout (headless) or nothing (interactive).
 run_agent() {
    local prompt="$1"
-    local output_file
+    local role="${2:-}"  # "generator" or "evaluator" — used for verdict file
    output_file=$(mktemp)
    LOOP_AGENT_TMPFILE="$output_file"  # exposed for trap cleanup
-    local prompt_file
+    # Clean up any previous verdict file
-    prompt_file=$(mktemp)
+    rm -f "$LOOP_DIR/.verdict"
    printf '%s\n' "$prompt" > "$prompt_file"
    # Determine whether we can run interactively
    local has_tty=false
@@ -213,52 +211,61 @@ run_agent() {
    # Run in subshell so a non-zero exit from the AI tool doesn't kill the loop.
    local agent_exit=0
-    (
+    if [ "$has_tty" = true ]; then
-        case "$TOOL" in
+        # --- Interactive mode ---
-            claude)
+        # Run claude directly in the terminal — full interactive UI visible.
-                if [ "$has_tty" = true ]; then
+        # No output capture. State tracked via files (prd.json, .verdict).
-                    # Interactive mode: full CC session visible in terminal.
+        (
-                    # Pass prompt as CLI argument so stdin stays as TTY (required for interactive UI).
+            case "$TOOL" in
-                    # Use script to capture output while showing it live.
+                claude)
-                    script -q "$output_file" \
+                    claude --dangerously-skip-permissions "$prompt"
-                        claude --dangerously-skip-permissions "$(cat "$prompt_file")"
+                    ;;
-                else
+                amp)
-                    # Headless mode: --print for autonomous operation
+                    amp --dangerously-allow-all "$prompt"
                    ;;
                *)
                    log "ERROR: Unknown tool '$TOOL'"
                    exit 1
                    ;;
            esac
        ) || agent_exit=$?
        # In interactive mode, read verdict from file if evaluator wrote one
        if [ "$role" = "evaluator" ] && [ -f "$LOOP_DIR/.verdict" ]; then
            cat "$LOOP_DIR/.verdict"
        fi
    else
        # --- Headless mode ---
        local output_file
        output_file=$(mktemp)
        LOOP_AGENT_TMPFILE="$output_file"
        (
            case "$TOOL" in
                claude)
                    printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
                        claude --dangerously-skip-permissions --output-format text \
                        --print 2>&1 > "$output_file"
-                fi
+                    ;;
-                ;;
+                amp)
            amp)
                if [ "$has_tty" = true ]; then
                    script -q "$output_file" \
                        amp --dangerously-allow-all "$(cat "$prompt_file")"
                else
                    printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
                        amp --dangerously-allow-all 2>&1 > "$output_file"
-                fi
+                    ;;
-                ;;
+                *)
-            *)
+                    log "ERROR: Unknown tool '$TOOL'"
-                log "ERROR: Unknown tool '$TOOL'"
+                    exit 1
-                exit 1
+                    ;;
-                ;;
+            esac
-        esac
+        ) || agent_exit=$?
    ) || agent_exit=$?
-    rm -f "$prompt_file"
+        if [ "$agent_exit" -ne 0 ] && [ ! -s "$output_file" ]; then
            log "WARNING: Agent exited with code $agent_exit and produced no output."
        fi
    if [ "$agent_exit" -ne 0 ] && [ ! -s "$output_file" ]; then
        log "WARNING: Agent exited with code $agent_exit and produced no output."
    fi
    # Strip ANSI escape codes for clean verdict parsing
    if command -v sed &>/dev/null; then
        sed 's/\x1b\[[0-9;]*[a-zA-Z]//g' "$output_file"
    else
        cat "$output_file"
        rm -f "$output_file"
        LOOP_AGENT_TMPFILE=""
    fi
    rm -f "$output_file"
    LOOP_AGENT_TMPFILE=""
 }
 # --- Parse evaluator verdict ---
@@ -346,9 +353,11 @@ while [ "$ITERATION" -lt "$MAX_ITERATIONS" ]; do
        exit 0
    fi
-    GENERATOR_OUTPUT=$(run_agent "$GENERATOR_PROMPT")
+    GENERATOR_OUTPUT=$(run_agent "$GENERATOR_PROMPT" "generator")
-    if [ -z "$GENERATOR_OUTPUT" ]; then
+    # In interactive mode, generator output is empty (displayed in terminal, not captured).
    # State is tracked via prd.json — the generator updates it directly.
    if [ "${LOOP_HEADLESS:-false}" = "true" ] && [ -z "$GENERATOR_OUTPUT" ]; then
        log "WARNING: Generator produced empty output (timeout or crash). Skipping to next iteration."
        continue
    fi
@@ -390,11 +399,16 @@ while [ "$ITERATION" -lt "$MAX_ITERATIONS" ]; do
        fi
        EVAL_PROMPT=$(build_prompt "evaluator" "$MODE")
-        EVAL_OUTPUT=$(run_agent "$EVAL_PROMPT")
+        EVAL_OUTPUT=$(run_agent "$EVAL_PROMPT" "evaluator")
        if [ -z "$EVAL_OUTPUT" ]; then
-            log "WARNING: Evaluator produced empty output (timeout or crash). Treating as REJECT."
+            # In interactive mode, check the verdict file
-            EVAL_OUTPUT="<verdict>REJECT</verdict><rejection_reason>Evaluator produced no output</rejection_reason>"
+            if [ -f "$LOOP_DIR/.verdict" ]; then
                EVAL_OUTPUT=$(cat "$LOOP_DIR/.verdict")
            else
                log "WARNING: Evaluator produced no output and no verdict file. Treating as REJECT."
                EVAL_OUTPUT="<verdict>REJECT</verdict><rejection_reason>Evaluator produced no output</rejection_reason>"
            fi
        fi
        VERDICT=$(parse_verdict "$EVAL_OUTPUT")
--- a/prompts/evaluator/_base.md
+++ b/prompts/evaluator/_base.md
@@ -42,16 +42,18 @@ Evaluate story **`{{CURRENT_STORY_ID}}`**. This is the story the generator just
 ## Verdict Format
-You MUST end your response with EXACTLY ONE of these verdict blocks:
+You MUST do TWO things when delivering your verdict:
-### If the story genuinely passes all criteria:
+### 1. Write the verdict to a file
 Write your verdict to `{{LOOP_DIR}}/.verdict` using the Write tool. This file is how the loop harness reads your decision.
 **If PASS:**
 ```
 <verdict>PASS</verdict>
 ```
-### If any criterion is not met or issues are found:
+**If REJECT:**
 ```
 <verdict>REJECT</verdict>
 <rejection_reason>
@@ -61,6 +63,10 @@ Be concrete — "the function doesn't handle null input" not "there might be edg
 </rejection_reason>
 ```
 ### 2. Also include the verdict in your response
 End your response with the same verdict block so it's visible in the terminal output.
 ## What Warrants Rejection
 - ANY acceptance criterion not actually met (not "mostly met" — MET)