feat: true interactive mode — run claude directly, verdict via file, no script/capture
This commit is contained in:
114
loop.sh
114
loop.sh
@@ -191,19 +191,17 @@ fi
|
|||||||
# Two modes:
|
# Two modes:
|
||||||
# Interactive (default when TTY available): runs claude in full interactive mode.
|
# Interactive (default when TTY available): runs claude in full interactive mode.
|
||||||
# The user sees the complete CC session (tool calls, file edits, etc.) in the terminal.
|
# The user sees the complete CC session (tool calls, file edits, etc.) in the terminal.
|
||||||
# Output is captured via `script` for verdict parsing.
|
# No output capture — state is tracked via prd.json and .verdict file.
|
||||||
# Headless (no TTY or LOOP_HEADLESS=true): uses claude --print for fully autonomous operation.
|
# Headless (no TTY or LOOP_HEADLESS=true): uses claude --print for fully autonomous operation.
|
||||||
|
# Output is captured to a temp file for verdict parsing.
|
||||||
#
|
#
|
||||||
# The function prints the captured output to stdout for the caller to capture.
|
# The function prints captured output to stdout (headless) or nothing (interactive).
|
||||||
run_agent() {
|
run_agent() {
|
||||||
local prompt="$1"
|
local prompt="$1"
|
||||||
local output_file
|
local role="${2:-}" # "generator" or "evaluator" — used for verdict file
|
||||||
output_file=$(mktemp)
|
|
||||||
LOOP_AGENT_TMPFILE="$output_file" # exposed for trap cleanup
|
|
||||||
|
|
||||||
local prompt_file
|
# Clean up any previous verdict file
|
||||||
prompt_file=$(mktemp)
|
rm -f "$LOOP_DIR/.verdict"
|
||||||
printf '%s\n' "$prompt" > "$prompt_file"
|
|
||||||
|
|
||||||
# Determine whether we can run interactively
|
# Determine whether we can run interactively
|
||||||
local has_tty=false
|
local has_tty=false
|
||||||
@@ -213,52 +211,61 @@ run_agent() {
|
|||||||
|
|
||||||
# Run in subshell so a non-zero exit from the AI tool doesn't kill the loop.
|
# Run in subshell so a non-zero exit from the AI tool doesn't kill the loop.
|
||||||
local agent_exit=0
|
local agent_exit=0
|
||||||
(
|
if [ "$has_tty" = true ]; then
|
||||||
case "$TOOL" in
|
# --- Interactive mode ---
|
||||||
claude)
|
# Run claude directly in the terminal — full interactive UI visible.
|
||||||
if [ "$has_tty" = true ]; then
|
# No output capture. State tracked via files (prd.json, .verdict).
|
||||||
# Interactive mode: full CC session visible in terminal.
|
(
|
||||||
# Pass prompt as CLI argument so stdin stays as TTY (required for interactive UI).
|
case "$TOOL" in
|
||||||
# Use script to capture output while showing it live.
|
claude)
|
||||||
script -q "$output_file" \
|
claude --dangerously-skip-permissions "$prompt"
|
||||||
claude --dangerously-skip-permissions "$(cat "$prompt_file")"
|
;;
|
||||||
else
|
amp)
|
||||||
# Headless mode: --print for autonomous operation
|
amp --dangerously-allow-all "$prompt"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
log "ERROR: Unknown tool '$TOOL'"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
) || agent_exit=$?
|
||||||
|
|
||||||
|
# In interactive mode, read verdict from file if evaluator wrote one
|
||||||
|
if [ "$role" = "evaluator" ] && [ -f "$LOOP_DIR/.verdict" ]; then
|
||||||
|
cat "$LOOP_DIR/.verdict"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
# --- Headless mode ---
|
||||||
|
local output_file
|
||||||
|
output_file=$(mktemp)
|
||||||
|
LOOP_AGENT_TMPFILE="$output_file"
|
||||||
|
|
||||||
|
(
|
||||||
|
case "$TOOL" in
|
||||||
|
claude)
|
||||||
printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
|
printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
|
||||||
claude --dangerously-skip-permissions --output-format text \
|
claude --dangerously-skip-permissions --output-format text \
|
||||||
--print 2>&1 > "$output_file"
|
--print 2>&1 > "$output_file"
|
||||||
fi
|
;;
|
||||||
;;
|
amp)
|
||||||
amp)
|
|
||||||
if [ "$has_tty" = true ]; then
|
|
||||||
script -q "$output_file" \
|
|
||||||
amp --dangerously-allow-all "$(cat "$prompt_file")"
|
|
||||||
else
|
|
||||||
printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
|
printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
|
||||||
amp --dangerously-allow-all 2>&1 > "$output_file"
|
amp --dangerously-allow-all 2>&1 > "$output_file"
|
||||||
fi
|
;;
|
||||||
;;
|
*)
|
||||||
*)
|
log "ERROR: Unknown tool '$TOOL'"
|
||||||
log "ERROR: Unknown tool '$TOOL'"
|
exit 1
|
||||||
exit 1
|
;;
|
||||||
;;
|
esac
|
||||||
esac
|
) || agent_exit=$?
|
||||||
) || agent_exit=$?
|
|
||||||
|
|
||||||
rm -f "$prompt_file"
|
if [ "$agent_exit" -ne 0 ] && [ ! -s "$output_file" ]; then
|
||||||
|
log "WARNING: Agent exited with code $agent_exit and produced no output."
|
||||||
|
fi
|
||||||
|
|
||||||
if [ "$agent_exit" -ne 0 ] && [ ! -s "$output_file" ]; then
|
|
||||||
log "WARNING: Agent exited with code $agent_exit and produced no output."
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Strip ANSI escape codes for clean verdict parsing
|
|
||||||
if command -v sed &>/dev/null; then
|
|
||||||
sed 's/\x1b\[[0-9;]*[a-zA-Z]//g' "$output_file"
|
|
||||||
else
|
|
||||||
cat "$output_file"
|
cat "$output_file"
|
||||||
|
rm -f "$output_file"
|
||||||
|
LOOP_AGENT_TMPFILE=""
|
||||||
fi
|
fi
|
||||||
rm -f "$output_file"
|
|
||||||
LOOP_AGENT_TMPFILE=""
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# --- Parse evaluator verdict ---
|
# --- Parse evaluator verdict ---
|
||||||
@@ -346,9 +353,11 @@ while [ "$ITERATION" -lt "$MAX_ITERATIONS" ]; do
|
|||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
GENERATOR_OUTPUT=$(run_agent "$GENERATOR_PROMPT")
|
GENERATOR_OUTPUT=$(run_agent "$GENERATOR_PROMPT" "generator")
|
||||||
|
|
||||||
if [ -z "$GENERATOR_OUTPUT" ]; then
|
# In interactive mode, generator output is empty (displayed in terminal, not captured).
|
||||||
|
# State is tracked via prd.json — the generator updates it directly.
|
||||||
|
if [ "${LOOP_HEADLESS:-false}" = "true" ] && [ -z "$GENERATOR_OUTPUT" ]; then
|
||||||
log "WARNING: Generator produced empty output (timeout or crash). Skipping to next iteration."
|
log "WARNING: Generator produced empty output (timeout or crash). Skipping to next iteration."
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
@@ -390,11 +399,16 @@ while [ "$ITERATION" -lt "$MAX_ITERATIONS" ]; do
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
EVAL_PROMPT=$(build_prompt "evaluator" "$MODE")
|
EVAL_PROMPT=$(build_prompt "evaluator" "$MODE")
|
||||||
EVAL_OUTPUT=$(run_agent "$EVAL_PROMPT")
|
EVAL_OUTPUT=$(run_agent "$EVAL_PROMPT" "evaluator")
|
||||||
|
|
||||||
if [ -z "$EVAL_OUTPUT" ]; then
|
if [ -z "$EVAL_OUTPUT" ]; then
|
||||||
log "WARNING: Evaluator produced empty output (timeout or crash). Treating as REJECT."
|
# In interactive mode, check the verdict file
|
||||||
EVAL_OUTPUT="<verdict>REJECT</verdict><rejection_reason>Evaluator produced no output</rejection_reason>"
|
if [ -f "$LOOP_DIR/.verdict" ]; then
|
||||||
|
EVAL_OUTPUT=$(cat "$LOOP_DIR/.verdict")
|
||||||
|
else
|
||||||
|
log "WARNING: Evaluator produced no output and no verdict file. Treating as REJECT."
|
||||||
|
EVAL_OUTPUT="<verdict>REJECT</verdict><rejection_reason>Evaluator produced no output</rejection_reason>"
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
VERDICT=$(parse_verdict "$EVAL_OUTPUT")
|
VERDICT=$(parse_verdict "$EVAL_OUTPUT")
|
||||||
|
|||||||
@@ -42,16 +42,18 @@ Evaluate story **`{{CURRENT_STORY_ID}}`**. This is the story the generator just
|
|||||||
|
|
||||||
## Verdict Format
|
## Verdict Format
|
||||||
|
|
||||||
You MUST end your response with EXACTLY ONE of these verdict blocks:
|
You MUST do TWO things when delivering your verdict:
|
||||||
|
|
||||||
### If the story genuinely passes all criteria:
|
### 1. Write the verdict to a file
|
||||||
|
|
||||||
|
Write your verdict to `{{LOOP_DIR}}/.verdict` using the Write tool. This file is how the loop harness reads your decision.
|
||||||
|
|
||||||
|
**If PASS:**
|
||||||
```
|
```
|
||||||
<verdict>PASS</verdict>
|
<verdict>PASS</verdict>
|
||||||
```
|
```
|
||||||
|
|
||||||
### If any criterion is not met or issues are found:
|
**If REJECT:**
|
||||||
|
|
||||||
```
|
```
|
||||||
<verdict>REJECT</verdict>
|
<verdict>REJECT</verdict>
|
||||||
<rejection_reason>
|
<rejection_reason>
|
||||||
@@ -61,6 +63,10 @@ Be concrete — "the function doesn't handle null input" not "there might be edg
|
|||||||
</rejection_reason>
|
</rejection_reason>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### 2. Also include the verdict in your response
|
||||||
|
|
||||||
|
End your response with the same verdict block so it's visible in the terminal output.
|
||||||
|
|
||||||
## What Warrants Rejection
|
## What Warrants Rejection
|
||||||
|
|
||||||
- ANY acceptance criterion not actually met (not "mostly met" — MET)
|
- ANY acceptance criterion not actually met (not "mostly met" — MET)
|
||||||
|
|||||||
Reference in New Issue
Block a user