feat: agent loop harness with Claude Code plugin support
Generator-evaluator architecture with iterative context-reset for long-running coding tasks. Ships as a Claude Code plugin — install with /plugin and use /agent-loop:init, /agent-loop:plan, /agent-loop:run.
This commit is contained in:
403
loop.sh
Executable file
403
loop.sh
Executable file
@@ -0,0 +1,403 @@
|
||||
#!/bin/bash
|
||||
# Autonomous AI agent loop orchestrator
|
||||
# Combines generator-evaluator architecture with iterative context-reset pattern.
|
||||
#
|
||||
# Usage:
|
||||
# ./loop.sh [options]
|
||||
#
|
||||
# Options:
|
||||
# --mode <implement|explore|fix> Operating mode (default: from config.json)
|
||||
# --max <N> Maximum iterations (default: from config.json)
|
||||
# --skip-eval Skip evaluator pass
|
||||
# --tool <claude|amp> AI tool to use (default: from config.json)
|
||||
# --no-hooks Don't install stop hooks
|
||||
# --dry-run Print assembled prompts without running agents
|
||||
# --resume Skip already-passed stories (explicit mode)
|
||||
# --replan (reserved — not yet implemented)
|
||||
#
|
||||
# Each iteration:
|
||||
# 1. Generator: picks highest-priority incomplete story, does the work
|
||||
# 2. Evaluator: verifies the work, can PASS or REJECT
|
||||
# Both get fresh context windows. Loop continues until all stories pass or max iterations.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# --- Exit codes ---
|
||||
EXIT_OK=0 # All stories complete
|
||||
EXIT_ERROR=1 # Configuration or runtime error
|
||||
EXIT_MAX_ITERATIONS=2 # Max iterations reached, work remains
|
||||
EXIT_ALL_BLOCKED=3 # All remaining stories blocked for human review
|
||||
|
||||
# --- Resolve paths ---
|
||||
LOOP_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$LOOP_DIR/.." && pwd)"
|
||||
export LOOP_DIR PROJECT_ROOT
|
||||
|
||||
# --- Lockfile (prevent concurrent runs) ---
|
||||
LOCKFILE="$LOOP_DIR/.loop.lock"
|
||||
|
||||
acquire_lock() {
|
||||
# mkdir is atomic on POSIX — prevents race between check and create
|
||||
if ! mkdir "$LOCKFILE" 2>/dev/null; then
|
||||
local old_pid
|
||||
old_pid=$(cat "$LOCKFILE/pid" 2>/dev/null)
|
||||
if [ -n "$old_pid" ] && kill -0 "$old_pid" 2>/dev/null; then
|
||||
echo "[loop] ERROR: Another loop instance is running (PID $old_pid)."
|
||||
echo "[loop] If this is stale, remove $LOCKFILE and retry."
|
||||
exit 1
|
||||
fi
|
||||
# Stale lockfile — previous run crashed without cleanup
|
||||
rm -rf "$LOCKFILE"
|
||||
mkdir "$LOCKFILE"
|
||||
fi
|
||||
echo $$ > "$LOCKFILE/pid"
|
||||
}
|
||||
|
||||
release_lock() {
|
||||
rm -rf "$LOCKFILE"
|
||||
}
|
||||
|
||||
acquire_lock
|
||||
|
||||
# --- Source libraries ---
|
||||
source "$LOOP_DIR/lib/hooks.sh"
|
||||
source "$LOOP_DIR/lib/state.sh"
|
||||
source "$LOOP_DIR/lib/archive.sh"
|
||||
source "$LOOP_DIR/lib/prompt.sh"
|
||||
|
||||
# --- Logging ---
|
||||
log() { echo "[loop] $*"; }
|
||||
log_header() {
|
||||
echo ""
|
||||
echo "═══════════════════════════════════════════════════════"
|
||||
echo " $*"
|
||||
echo "═══════════════════════════════════════════════════════"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# --- Preflight checks ---
|
||||
if ! command -v jq &>/dev/null && ! command -v python3 &>/dev/null; then
|
||||
log "ERROR: Either jq or python3 is required. Install one and retry."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# --- Load config defaults ---
|
||||
CONFIG_FILE="$LOOP_DIR/config.json"
|
||||
config_default() { get_config_value "$1" "$2"; }
|
||||
|
||||
TOOL=$(config_default ".tool" "claude")
|
||||
MODE=$(config_default ".mode" "implement")
|
||||
MAX_ITERATIONS=$(config_default ".maxIterations" "20")
|
||||
SKIP_EVAL=$(config_default ".skipEval" "false")
|
||||
EVAL_RETRIES=$(config_default ".evalRetries" "2")
|
||||
AUTO_HOOKS=$(config_default ".autoHooks" "true")
|
||||
DRY_RUN=false
|
||||
RESUME=false
|
||||
# --- Parse CLI args (override config) ---
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--mode) MODE="$2"; shift 2 ;;
|
||||
--mode=*) MODE="${1#*=}"; shift ;;
|
||||
--max) MAX_ITERATIONS="$2"; shift 2 ;;
|
||||
--max=*) MAX_ITERATIONS="${1#*=}"; shift ;;
|
||||
--skip-eval) SKIP_EVAL=true; shift ;;
|
||||
--tool) TOOL="$2"; shift 2 ;;
|
||||
--tool=*) TOOL="${1#*=}"; shift ;;
|
||||
--no-hooks) AUTO_HOOKS=false; shift ;;
|
||||
--dry-run) DRY_RUN=true; shift ;;
|
||||
--resume) RESUME=true; shift ;;
|
||||
--replan) log "ERROR: --replan is not yet implemented. Use /loop-plan interactively."; exit 1 ;;
|
||||
[0-9]*) MAX_ITERATIONS="$1"; shift ;;
|
||||
*) log "Unknown option: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
export ITERATION=0 MAX_ITERATIONS MODE
|
||||
|
||||
# --- Validate ---
|
||||
if [[ ! "$MODE" =~ ^(implement|explore|fix)$ ]]; then
|
||||
log "ERROR: Invalid mode '$MODE'. Must be: implement, explore, fix"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! "$TOOL" =~ ^(claude|amp)$ ]]; then
|
||||
log "ERROR: Invalid tool '$TOOL'. Must be: claude, amp"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# --- Setup ---
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
cleanup() {
|
||||
[ -n "${LOOP_AGENT_TMPFILE:-}" ] && rm -f "$LOOP_AGENT_TMPFILE"
|
||||
[ "$AUTO_HOOKS" = true ] && remove_hooks
|
||||
release_lock
|
||||
}
|
||||
LOOP_AGENT_TMPFILE=""
|
||||
|
||||
if [ "$AUTO_HOOKS" = true ]; then
|
||||
install_hooks
|
||||
fi
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
check_archive
|
||||
|
||||
# Validate prd.json exists (AFTER archive check, which may delete it on branch change)
|
||||
if [ ! -f "$LOOP_DIR/prd.json" ]; then
|
||||
log "ERROR: No prd.json found. Run /loop-plan first to create one."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
validate_prd
|
||||
|
||||
# Run project init script if it exists
|
||||
if [ -f "$LOOP_DIR/init.sh" ]; then
|
||||
log "Running init.sh..."
|
||||
bash "$LOOP_DIR/init.sh"
|
||||
fi
|
||||
|
||||
# Ensure correct git branch
|
||||
BRANCH=$(prd_branch_name 2>/dev/null || echo "")
|
||||
if [ -n "$BRANCH" ]; then
|
||||
CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "")
|
||||
if [ "$CURRENT_BRANCH" != "$BRANCH" ]; then
|
||||
log "Switching to branch: $BRANCH"
|
||||
git checkout "$BRANCH" 2>/dev/null || \
|
||||
git checkout -b "$BRANCH" "origin/$BRANCH" 2>/dev/null || \
|
||||
git checkout -b "$BRANCH"
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Agent runner ---
|
||||
# Runs a prompt through the selected AI tool and captures output.
|
||||
# Output is displayed live via tee to /dev/tty (if available) and captured to a temp file.
|
||||
# The function prints the captured output to stdout for the caller to capture.
|
||||
run_agent() {
|
||||
local prompt="$1"
|
||||
local output_file
|
||||
output_file=$(mktemp)
|
||||
LOOP_AGENT_TMPFILE="$output_file" # exposed for trap cleanup
|
||||
|
||||
# Determine whether we can display live output
|
||||
local has_tty=false
|
||||
if { true > /dev/tty; } 2>/dev/null; then
|
||||
has_tty=true
|
||||
fi
|
||||
|
||||
# Run in subshell so a non-zero exit from the AI tool doesn't kill the loop.
|
||||
# The subshell inherits set -e but its exit status is captured, not propagated.
|
||||
local agent_exit=0
|
||||
(
|
||||
case "$TOOL" in
|
||||
claude)
|
||||
if [ "$has_tty" = true ]; then
|
||||
printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
|
||||
claude --dangerously-skip-permissions --output-format text \
|
||||
--print 2>&1 | tee /dev/tty > "$output_file"
|
||||
else
|
||||
printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
|
||||
claude --dangerously-skip-permissions --output-format text \
|
||||
--print 2>&1 > "$output_file"
|
||||
fi
|
||||
;;
|
||||
amp)
|
||||
if [ "$has_tty" = true ]; then
|
||||
printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
|
||||
amp --dangerously-allow-all 2>&1 | tee /dev/tty > "$output_file"
|
||||
else
|
||||
printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
|
||||
amp --dangerously-allow-all 2>&1 > "$output_file"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
log "ERROR: Unknown tool '$TOOL'"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
) || agent_exit=$?
|
||||
|
||||
if [ "$agent_exit" -ne 0 ] && [ ! -s "$output_file" ]; then
|
||||
log "WARNING: Agent exited with code $agent_exit and produced no output."
|
||||
fi
|
||||
|
||||
cat "$output_file"
|
||||
rm -f "$output_file"
|
||||
LOOP_AGENT_TMPFILE=""
|
||||
}
|
||||
|
||||
# --- Parse evaluator verdict ---
|
||||
parse_verdict() {
|
||||
local output="$1"
|
||||
|
||||
if echo "$output" | grep -q "<verdict>REJECT</verdict>"; then
|
||||
# Extract rejection reason (supports multiline)
|
||||
local reason
|
||||
reason=$(echo "$output" | sed -n '/<rejection_reason>/,/<\/rejection_reason>/p' \
|
||||
| sed '1s/.*<rejection_reason>//' | sed '$s/<\/rejection_reason>.*//' \
|
||||
| tr '\n' ' ' | sed 's/ */ /g' | sed 's/^ //;s/ $//')
|
||||
[ -z "$reason" ] && reason="Rejected without specific reason"
|
||||
echo "REJECT:${reason}"
|
||||
elif echo "$output" | grep -q "<verdict>PASS</verdict>"; then
|
||||
echo "PASS"
|
||||
else
|
||||
# No explicit verdict — fail-safe: treat as reject so broken evaluators don't silently approve
|
||||
log "WARNING: No verdict tag found in evaluator output. Treating as REJECT (fail-safe)."
|
||||
echo "REJECT:Evaluator produced no verdict tag — output may be malformed"
|
||||
fi
|
||||
}
|
||||
|
||||
# --- Main loop ---
|
||||
log_header "Loop Starting"
|
||||
log "Mode: $MODE"
|
||||
log "Tool: $TOOL"
|
||||
log "Max iter: $MAX_ITERATIONS"
|
||||
log "Eval: $([[ $SKIP_EVAL == true ]] && echo 'off' || echo 'on')"
|
||||
log "Dry run: $([[ $DRY_RUN == true ]] && echo 'yes' || echo 'no')"
|
||||
log "Project: $PROJECT_ROOT"
|
||||
log "Stories: $(story_counts 2>/dev/null || echo 'N/A')"
|
||||
echo ""
|
||||
|
||||
while [ "$ITERATION" -lt "$MAX_ITERATIONS" ]; do
|
||||
ITERATION=$((ITERATION + 1))
|
||||
export ITERATION
|
||||
|
||||
# Check if all stories already pass
|
||||
if all_stories_pass 2>/dev/null; then
|
||||
log_header "All Stories Complete! ($(story_counts))"
|
||||
snapshot_for_archive
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Capture which story the generator will work on (highest-priority incomplete)
|
||||
CURRENT_STORY_ID=$(next_story_id 2>/dev/null || echo "")
|
||||
export CURRENT_STORY_ID
|
||||
|
||||
# No actionable story — all remaining are passed or blocked
|
||||
if [ -z "$CURRENT_STORY_ID" ]; then
|
||||
if [ "$RESUME" = true ]; then
|
||||
log "Resume mode: no actionable stories remaining."
|
||||
else
|
||||
log "No actionable stories remaining (all passed or blocked)."
|
||||
fi
|
||||
snapshot_for_archive
|
||||
if any_stories_blocked 2>/dev/null; then
|
||||
log "Some stories are blocked and need human review. Run /loop-triage for details."
|
||||
exit $EXIT_ALL_BLOCKED
|
||||
fi
|
||||
exit $EXIT_OK
|
||||
fi
|
||||
|
||||
# Capture git state before generator runs (for evaluator diff)
|
||||
PRE_GENERATOR_SHA=$(git rev-parse HEAD 2>/dev/null || echo "")
|
||||
export PRE_GENERATOR_SHA
|
||||
|
||||
# --- Generator pass ---
|
||||
log_header "Iteration $ITERATION / $MAX_ITERATIONS — GENERATOR${CURRENT_STORY_ID:+ ($CURRENT_STORY_ID)}"
|
||||
|
||||
GENERATOR_PROMPT=$(build_prompt "generator" "$MODE")
|
||||
|
||||
# --dry-run: print prompts and exit without running agents
|
||||
if [ "$DRY_RUN" = true ]; then
|
||||
log "=== GENERATOR PROMPT ==="
|
||||
printf '%s\n' "$GENERATOR_PROMPT"
|
||||
echo ""
|
||||
if [ "$SKIP_EVAL" != true ] && [ -n "$CURRENT_STORY_ID" ]; then
|
||||
EVAL_PROMPT=$(build_prompt "evaluator" "$MODE")
|
||||
log "=== EVALUATOR PROMPT ==="
|
||||
printf '%s\n' "$EVAL_PROMPT"
|
||||
fi
|
||||
log "Dry run complete. Showing prompts for story: ${CURRENT_STORY_ID:-unknown}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
GENERATOR_OUTPUT=$(run_agent "$GENERATOR_PROMPT")
|
||||
|
||||
if [ -z "$GENERATOR_OUTPUT" ]; then
|
||||
log "WARNING: Generator produced empty output (timeout or crash). Skipping to next iteration."
|
||||
continue
|
||||
fi
|
||||
|
||||
# --- Scope budget check ---
|
||||
# Verify the generator stayed within configured limits (files modified, lines written).
|
||||
# Advisory in implement/fix modes (log warning), but enforced as rejection reason for evaluator.
|
||||
if [ -n "$PRE_GENERATOR_SHA" ] && [ "$PRE_GENERATOR_SHA" != "" ]; then
|
||||
SCOPE_FILES_MODIFIED=$(git diff --name-only "$PRE_GENERATOR_SHA" HEAD 2>/dev/null | wc -l | tr -d ' ')
|
||||
SCOPE_LINES_WRITTEN=$(git diff --stat "$PRE_GENERATOR_SHA" HEAD 2>/dev/null | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0")
|
||||
|
||||
MAX_MODIFY=$(config_default ".scopeBudgets.${MODE}.maxFilesToModify" "10")
|
||||
MAX_WRITE=$(config_default ".scopeBudgets.${MODE}.maxLinesToWrite" "500")
|
||||
|
||||
if [ "${SCOPE_FILES_MODIFIED:-0}" -gt "$MAX_MODIFY" ]; then
|
||||
log "WARNING: Scope budget exceeded — modified $SCOPE_FILES_MODIFIED files (limit: $MAX_MODIFY)"
|
||||
fi
|
||||
if [ "${SCOPE_LINES_WRITTEN:-0}" -gt "$MAX_WRITE" ]; then
|
||||
log "WARNING: Scope budget exceeded — wrote $SCOPE_LINES_WRITTEN lines (limit: $MAX_WRITE)"
|
||||
fi
|
||||
|
||||
export SCOPE_FILES_MODIFIED SCOPE_LINES_WRITTEN
|
||||
fi
|
||||
|
||||
# Check for completion sentinel
|
||||
if echo "$GENERATOR_OUTPUT" | grep -q "<promise>COMPLETE</promise>"; then
|
||||
log_header "Generator signaled COMPLETE ($(story_counts))"
|
||||
snapshot_for_archive
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# --- Evaluator pass ---
|
||||
if [ "$SKIP_EVAL" != true ]; then
|
||||
log_header "Iteration $ITERATION / $MAX_ITERATIONS — EVALUATOR${CURRENT_STORY_ID:+ ($CURRENT_STORY_ID)}"
|
||||
|
||||
if [ -z "$CURRENT_STORY_ID" ]; then
|
||||
log "WARNING: No actionable story ID found. Skipping evaluator."
|
||||
continue
|
||||
fi
|
||||
|
||||
EVAL_PROMPT=$(build_prompt "evaluator" "$MODE")
|
||||
EVAL_OUTPUT=$(run_agent "$EVAL_PROMPT")
|
||||
|
||||
if [ -z "$EVAL_OUTPUT" ]; then
|
||||
log "WARNING: Evaluator produced empty output (timeout or crash). Treating as REJECT."
|
||||
EVAL_OUTPUT="<verdict>REJECT</verdict><rejection_reason>Evaluator produced no output</rejection_reason>"
|
||||
fi
|
||||
|
||||
VERDICT=$(parse_verdict "$EVAL_OUTPUT")
|
||||
|
||||
case "$VERDICT" in
|
||||
PASS)
|
||||
log "Evaluator: PASS"
|
||||
if [ -n "$CURRENT_STORY_ID" ]; then
|
||||
mark_story_pass "$CURRENT_STORY_ID"
|
||||
fi
|
||||
;;
|
||||
REJECT:*)
|
||||
REASON="${VERDICT#REJECT:}"
|
||||
log "Evaluator: REJECT — $REASON"
|
||||
|
||||
if [ -n "$CURRENT_STORY_ID" ]; then
|
||||
mark_story_reject "$CURRENT_STORY_ID" "$REASON"
|
||||
|
||||
# Check retry limit — block story to prevent infinite retries
|
||||
REJECTIONS=$(story_rejections "$CURRENT_STORY_ID")
|
||||
REJECTIONS="${REJECTIONS:-0}"
|
||||
if [ "$REJECTIONS" -ge "$EVAL_RETRIES" ]; then
|
||||
log "WARNING: Story $CURRENT_STORY_ID rejected $REJECTIONS times (limit: $EVAL_RETRIES). Blocking for human review."
|
||||
mark_story_blocked "$CURRENT_STORY_ID" "Rejected $REJECTIONS times. Last: $REASON"
|
||||
append_progress "### BLOCKED: $CURRENT_STORY_ID
|
||||
|
||||
Rejected $REJECTIONS times. Needs human review. Last reason: $REASON
|
||||
|
||||
---"
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
done
|
||||
|
||||
# --- Max iterations reached ---
|
||||
log_header "Max Iterations Reached ($MAX_ITERATIONS)"
|
||||
log "Stories completed: $(story_counts)"
|
||||
log "Run /loop-triage to generate a handoff brief."
|
||||
snapshot_for_archive
|
||||
exit $EXIT_MAX_ITERATIONS
|
||||
Reference in New Issue
Block a user