feat: true interactive mode — run claude directly, verdict via file, no script/capture

This commit is contained in:
2026-03-27 13:07:25 -04:00
parent 5e456cff6d
commit 1e7f7ea6ed
2 changed files with 74 additions and 54 deletions

114
loop.sh
View File

@@ -191,19 +191,17 @@ fi
# Two modes: # Two modes:
# Interactive (default when TTY available): runs claude in full interactive mode. # Interactive (default when TTY available): runs claude in full interactive mode.
# The user sees the complete CC session (tool calls, file edits, etc.) in the terminal. # The user sees the complete CC session (tool calls, file edits, etc.) in the terminal.
# Output is captured via `script` for verdict parsing. # No output capture — state is tracked via prd.json and .verdict file.
# Headless (no TTY or LOOP_HEADLESS=true): uses claude --print for fully autonomous operation. # Headless (no TTY or LOOP_HEADLESS=true): uses claude --print for fully autonomous operation.
# Output is captured to a temp file for verdict parsing.
# #
# The function prints the captured output to stdout for the caller to capture. # The function prints captured output to stdout (headless) or nothing (interactive).
run_agent() { run_agent() {
local prompt="$1" local prompt="$1"
local output_file local role="${2:-}" # "generator" or "evaluator" — used for verdict file
output_file=$(mktemp)
LOOP_AGENT_TMPFILE="$output_file" # exposed for trap cleanup
local prompt_file # Clean up any previous verdict file
prompt_file=$(mktemp) rm -f "$LOOP_DIR/.verdict"
printf '%s\n' "$prompt" > "$prompt_file"
# Determine whether we can run interactively # Determine whether we can run interactively
local has_tty=false local has_tty=false
@@ -213,52 +211,61 @@ run_agent() {
# Run in subshell so a non-zero exit from the AI tool doesn't kill the loop. # Run in subshell so a non-zero exit from the AI tool doesn't kill the loop.
local agent_exit=0 local agent_exit=0
( if [ "$has_tty" = true ]; then
case "$TOOL" in # --- Interactive mode ---
claude) # Run claude directly in the terminal — full interactive UI visible.
if [ "$has_tty" = true ]; then # No output capture. State tracked via files (prd.json, .verdict).
# Interactive mode: full CC session visible in terminal. (
# Pass prompt as CLI argument so stdin stays as TTY (required for interactive UI). case "$TOOL" in
# Use script to capture output while showing it live. claude)
script -q "$output_file" \ claude --dangerously-skip-permissions "$prompt"
claude --dangerously-skip-permissions "$(cat "$prompt_file")" ;;
else amp)
# Headless mode: --print for autonomous operation amp --dangerously-allow-all "$prompt"
;;
*)
log "ERROR: Unknown tool '$TOOL'"
exit 1
;;
esac
) || agent_exit=$?
# In interactive mode, read verdict from file if evaluator wrote one
if [ "$role" = "evaluator" ] && [ -f "$LOOP_DIR/.verdict" ]; then
cat "$LOOP_DIR/.verdict"
fi
else
# --- Headless mode ---
local output_file
output_file=$(mktemp)
LOOP_AGENT_TMPFILE="$output_file"
(
case "$TOOL" in
claude)
printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \ printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
claude --dangerously-skip-permissions --output-format text \ claude --dangerously-skip-permissions --output-format text \
--print 2>&1 > "$output_file" --print 2>&1 > "$output_file"
fi ;;
;; amp)
amp)
if [ "$has_tty" = true ]; then
script -q "$output_file" \
amp --dangerously-allow-all "$(cat "$prompt_file")"
else
printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \ printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
amp --dangerously-allow-all 2>&1 > "$output_file" amp --dangerously-allow-all 2>&1 > "$output_file"
fi ;;
;; *)
*) log "ERROR: Unknown tool '$TOOL'"
log "ERROR: Unknown tool '$TOOL'" exit 1
exit 1 ;;
;; esac
esac ) || agent_exit=$?
) || agent_exit=$?
rm -f "$prompt_file" if [ "$agent_exit" -ne 0 ] && [ ! -s "$output_file" ]; then
log "WARNING: Agent exited with code $agent_exit and produced no output."
fi
if [ "$agent_exit" -ne 0 ] && [ ! -s "$output_file" ]; then
log "WARNING: Agent exited with code $agent_exit and produced no output."
fi
# Strip ANSI escape codes for clean verdict parsing
if command -v sed &>/dev/null; then
sed 's/\x1b\[[0-9;]*[a-zA-Z]//g' "$output_file"
else
cat "$output_file" cat "$output_file"
rm -f "$output_file"
LOOP_AGENT_TMPFILE=""
fi fi
rm -f "$output_file"
LOOP_AGENT_TMPFILE=""
} }
# --- Parse evaluator verdict --- # --- Parse evaluator verdict ---
@@ -346,9 +353,11 @@ while [ "$ITERATION" -lt "$MAX_ITERATIONS" ]; do
exit 0 exit 0
fi fi
GENERATOR_OUTPUT=$(run_agent "$GENERATOR_PROMPT") GENERATOR_OUTPUT=$(run_agent "$GENERATOR_PROMPT" "generator")
if [ -z "$GENERATOR_OUTPUT" ]; then # In interactive mode, generator output is empty (displayed in terminal, not captured).
# State is tracked via prd.json — the generator updates it directly.
if [ "${LOOP_HEADLESS:-false}" = "true" ] && [ -z "$GENERATOR_OUTPUT" ]; then
log "WARNING: Generator produced empty output (timeout or crash). Skipping to next iteration." log "WARNING: Generator produced empty output (timeout or crash). Skipping to next iteration."
continue continue
fi fi
@@ -390,11 +399,16 @@ while [ "$ITERATION" -lt "$MAX_ITERATIONS" ]; do
fi fi
EVAL_PROMPT=$(build_prompt "evaluator" "$MODE") EVAL_PROMPT=$(build_prompt "evaluator" "$MODE")
EVAL_OUTPUT=$(run_agent "$EVAL_PROMPT") EVAL_OUTPUT=$(run_agent "$EVAL_PROMPT" "evaluator")
if [ -z "$EVAL_OUTPUT" ]; then if [ -z "$EVAL_OUTPUT" ]; then
log "WARNING: Evaluator produced empty output (timeout or crash). Treating as REJECT." # In interactive mode, check the verdict file
EVAL_OUTPUT="<verdict>REJECT</verdict><rejection_reason>Evaluator produced no output</rejection_reason>" if [ -f "$LOOP_DIR/.verdict" ]; then
EVAL_OUTPUT=$(cat "$LOOP_DIR/.verdict")
else
log "WARNING: Evaluator produced no output and no verdict file. Treating as REJECT."
EVAL_OUTPUT="<verdict>REJECT</verdict><rejection_reason>Evaluator produced no output</rejection_reason>"
fi
fi fi
VERDICT=$(parse_verdict "$EVAL_OUTPUT") VERDICT=$(parse_verdict "$EVAL_OUTPUT")

View File

@@ -42,16 +42,18 @@ Evaluate story **`{{CURRENT_STORY_ID}}`**. This is the story the generator just
## Verdict Format ## Verdict Format
You MUST end your response with EXACTLY ONE of these verdict blocks: You MUST do TWO things when delivering your verdict:
### If the story genuinely passes all criteria: ### 1. Write the verdict to a file
Write your verdict to `{{LOOP_DIR}}/.verdict` using the Write tool. This file is how the loop harness reads your decision.
**If PASS:**
``` ```
<verdict>PASS</verdict> <verdict>PASS</verdict>
``` ```
### If any criterion is not met or issues are found: **If REJECT:**
``` ```
<verdict>REJECT</verdict> <verdict>REJECT</verdict>
<rejection_reason> <rejection_reason>
@@ -61,6 +63,10 @@ Be concrete — "the function doesn't handle null input" not "there might be edg
</rejection_reason> </rejection_reason>
``` ```
### 2. Also include the verdict in your response
End your response with the same verdict block so it's visible in the terminal output.
## What Warrants Rejection ## What Warrants Rejection
- ANY acceptance criterion not actually met (not "mostly met" — MET) - ANY acceptance criterion not actually met (not "mostly met" — MET)