feat: true interactive mode — run claude directly, verdict via file, no script/capture

This commit is contained in:
2026-03-27 13:07:25 -04:00
parent 5e456cff6d
commit 1e7f7ea6ed
2 changed files with 74 additions and 54 deletions

88
loop.sh
View File

@@ -191,19 +191,17 @@ fi
# Two modes:
# Interactive (default when TTY available): runs claude in full interactive mode.
# The user sees the complete CC session (tool calls, file edits, etc.) in the terminal.
# Output is captured via `script` for verdict parsing.
# No output capture — state is tracked via prd.json and .verdict file.
# Headless (no TTY or LOOP_HEADLESS=true): uses claude --print for fully autonomous operation.
# Output is captured to a temp file for verdict parsing.
#
# The function prints the captured output to stdout for the caller to capture.
# The function prints captured output to stdout (headless) or nothing (interactive).
run_agent() {
local prompt="$1"
local output_file
output_file=$(mktemp)
LOOP_AGENT_TMPFILE="$output_file" # exposed for trap cleanup
local role="${2:-}" # "generator" or "evaluator" — used for verdict file
local prompt_file
prompt_file=$(mktemp)
printf '%s\n' "$prompt" > "$prompt_file"
# Clean up any previous verdict file
rm -f "$LOOP_DIR/.verdict"
# Determine whether we can run interactively
local has_tty=false
@@ -213,30 +211,17 @@ run_agent() {
# Run in subshell so a non-zero exit from the AI tool doesn't kill the loop.
local agent_exit=0
if [ "$has_tty" = true ]; then
# --- Interactive mode ---
# Run claude directly in the terminal — full interactive UI visible.
# No output capture. State tracked via files (prd.json, .verdict).
(
case "$TOOL" in
claude)
if [ "$has_tty" = true ]; then
# Interactive mode: full CC session visible in terminal.
# Pass prompt as CLI argument so stdin stays as TTY (required for interactive UI).
# Use script to capture output while showing it live.
script -q "$output_file" \
claude --dangerously-skip-permissions "$(cat "$prompt_file")"
else
# Headless mode: --print for autonomous operation
printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
claude --dangerously-skip-permissions --output-format text \
--print 2>&1 > "$output_file"
fi
claude --dangerously-skip-permissions "$prompt"
;;
amp)
if [ "$has_tty" = true ]; then
script -q "$output_file" \
amp --dangerously-allow-all "$(cat "$prompt_file")"
else
printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
amp --dangerously-allow-all 2>&1 > "$output_file"
fi
amp --dangerously-allow-all "$prompt"
;;
*)
log "ERROR: Unknown tool '$TOOL'"
@@ -245,20 +230,42 @@ run_agent() {
esac
) || agent_exit=$?
rm -f "$prompt_file"
# In interactive mode, read verdict from file if evaluator wrote one
if [ "$role" = "evaluator" ] && [ -f "$LOOP_DIR/.verdict" ]; then
cat "$LOOP_DIR/.verdict"
fi
else
# --- Headless mode ---
local output_file
output_file=$(mktemp)
LOOP_AGENT_TMPFILE="$output_file"
(
case "$TOOL" in
claude)
printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
claude --dangerously-skip-permissions --output-format text \
--print 2>&1 > "$output_file"
;;
amp)
printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
amp --dangerously-allow-all 2>&1 > "$output_file"
;;
*)
log "ERROR: Unknown tool '$TOOL'"
exit 1
;;
esac
) || agent_exit=$?
if [ "$agent_exit" -ne 0 ] && [ ! -s "$output_file" ]; then
log "WARNING: Agent exited with code $agent_exit and produced no output."
fi
# Strip ANSI escape codes for clean verdict parsing
if command -v sed &>/dev/null; then
sed 's/\x1b\[[0-9;]*[a-zA-Z]//g' "$output_file"
else
cat "$output_file"
fi
rm -f "$output_file"
LOOP_AGENT_TMPFILE=""
fi
}
# --- Parse evaluator verdict ---
@@ -346,9 +353,11 @@ while [ "$ITERATION" -lt "$MAX_ITERATIONS" ]; do
exit 0
fi
GENERATOR_OUTPUT=$(run_agent "$GENERATOR_PROMPT")
GENERATOR_OUTPUT=$(run_agent "$GENERATOR_PROMPT" "generator")
if [ -z "$GENERATOR_OUTPUT" ]; then
# In interactive mode, generator output is empty (displayed in terminal, not captured).
# State is tracked via prd.json — the generator updates it directly.
if [ "${LOOP_HEADLESS:-false}" = "true" ] && [ -z "$GENERATOR_OUTPUT" ]; then
log "WARNING: Generator produced empty output (timeout or crash). Skipping to next iteration."
continue
fi
@@ -390,12 +399,17 @@ while [ "$ITERATION" -lt "$MAX_ITERATIONS" ]; do
fi
EVAL_PROMPT=$(build_prompt "evaluator" "$MODE")
EVAL_OUTPUT=$(run_agent "$EVAL_PROMPT")
EVAL_OUTPUT=$(run_agent "$EVAL_PROMPT" "evaluator")
if [ -z "$EVAL_OUTPUT" ]; then
log "WARNING: Evaluator produced empty output (timeout or crash). Treating as REJECT."
# In interactive mode, check the verdict file
if [ -f "$LOOP_DIR/.verdict" ]; then
EVAL_OUTPUT=$(cat "$LOOP_DIR/.verdict")
else
log "WARNING: Evaluator produced no output and no verdict file. Treating as REJECT."
EVAL_OUTPUT="<verdict>REJECT</verdict><rejection_reason>Evaluator produced no output</rejection_reason>"
fi
fi
VERDICT=$(parse_verdict "$EVAL_OUTPUT")

View File

@@ -42,16 +42,18 @@ Evaluate story **`{{CURRENT_STORY_ID}}`**. This is the story the generator just
## Verdict Format
You MUST end your response with EXACTLY ONE of these verdict blocks:
You MUST do TWO things when delivering your verdict:
### If the story genuinely passes all criteria:
### 1. Write the verdict to a file
Write your verdict to `{{LOOP_DIR}}/.verdict` using the Write tool. This file is how the loop harness reads your decision.
**If PASS:**
```
<verdict>PASS</verdict>
```
### If any criterion is not met or issues are found:
**If REJECT:**
```
<verdict>REJECT</verdict>
<rejection_reason>
@@ -61,6 +63,10 @@ Be concrete — "the function doesn't handle null input" not "there might be edg
</rejection_reason>
```
### 2. Also include the verdict in your response
End your response with the same verdict block so it's visible in the terminal output.
## What Warrants Rejection
- ANY acceptance criterion not actually met (not "mostly met" — MET)