loop-loop/loop.sh

#!/bin/bash
# Autonomous AI agent loop orchestrator
# Combines generator-evaluator architecture with iterative context-reset pattern.
#
# Usage:
#   ./loop.sh [options]
#
# Options:
#   --mode <implement|explore|fix>   Operating mode (default: from config.json)
#   --max <N>                        Maximum iterations (default: from config.json)
#   --skip-eval                      Skip evaluator pass
#   --tool <claude|amp>              AI tool to use (default: from config.json)
#   --no-hooks                       Don't install stop hooks
#   --dry-run                        Print assembled prompts without running agents
#   --resume                         Skip already-passed stories (explicit mode)
#   --replan                         (reserved — not yet implemented)
#
# Each iteration:
#   1. Generator: picks highest-priority incomplete story, does the work
#   2. Evaluator: verifies the work, can PASS or REJECT
# Both get fresh context windows. Loop continues until all stories pass or max iterations.

set -euo pipefail

# --- Exit codes ---
EXIT_OK=0              # All stories complete
EXIT_ERROR=1           # Configuration or runtime error
EXIT_MAX_ITERATIONS=2  # Max iterations reached, work remains
EXIT_ALL_BLOCKED=3     # All remaining stories blocked for human review

# --- Resolve paths ---
LOOP_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(cd "$LOOP_DIR/.." && pwd)"
export LOOP_DIR PROJECT_ROOT

# --- Lockfile (prevent concurrent runs) ---
LOCKFILE="$LOOP_DIR/.loop.lock"

acquire_lock() {
    # mkdir is atomic on POSIX — prevents race between check and create
    if ! mkdir "$LOCKFILE" 2>/dev/null; then
        local old_pid
        old_pid=$(cat "$LOCKFILE/pid" 2>/dev/null)
        if [ -n "$old_pid" ] && kill -0 "$old_pid" 2>/dev/null; then
            echo "[loop] ERROR: Another loop instance is running (PID $old_pid)."
            echo "[loop] If this is stale, remove $LOCKFILE and retry."
            exit 1
        fi
        # Stale lockfile — previous run crashed without cleanup
        rm -rf "$LOCKFILE"
        mkdir "$LOCKFILE"
    fi
    echo $$ > "$LOCKFILE/pid"
}

release_lock() {
    rm -rf "$LOCKFILE"
}

acquire_lock

# --- Source libraries ---
source "$LOOP_DIR/lib/hooks.sh"
source "$LOOP_DIR/lib/state.sh"
source "$LOOP_DIR/lib/archive.sh"
source "$LOOP_DIR/lib/prompt.sh"

# --- Logging ---
log() { echo "[loop] $*"; }
log_header() {
    echo ""
    echo "═══════════════════════════════════════════════════════"
    echo "  $*"
    echo "═══════════════════════════════════════════════════════"
    echo ""
}

# --- Preflight checks ---
if ! command -v jq &>/dev/null && ! command -v python3 &>/dev/null; then
    log "ERROR: Either jq or python3 is required. Install one and retry."
    exit 1
fi

# --- macOS timeout compatibility ---
# macOS doesn't have GNU timeout. Use gtimeout (from coreutils) or a perl fallback.
if ! command -v timeout &>/dev/null; then
    if command -v gtimeout &>/dev/null; then
        timeout() { gtimeout "$@"; }
    else
        # Perl-based fallback: runs command with alarm signal
        timeout() {
            local duration="$1"; shift
            perl -e '
                alarm shift @ARGV;
                exec @ARGV;
            ' "$duration" "$@"
        }
    fi
fi

# --- Load config defaults ---
CONFIG_FILE="$LOOP_DIR/config.json"
config_default() { get_config_value "$1" "$2"; }

TOOL=$(config_default ".tool" "claude")
MODE=$(config_default ".mode" "implement")
MAX_ITERATIONS=$(config_default ".maxIterations" "20")
SKIP_EVAL=$(config_default ".skipEval" "false")
EVAL_RETRIES=$(config_default ".evalRetries" "3")
AUTO_HOOKS=$(config_default ".autoHooks" "true")
DRY_RUN=false
RESUME=false
# --- Parse CLI args (override config) ---
while [[ $# -gt 0 ]]; do
    case $1 in
        --mode) MODE="$2"; shift 2 ;;
        --mode=*) MODE="${1#*=}"; shift ;;
        --max) MAX_ITERATIONS="$2"; shift 2 ;;
        --max=*) MAX_ITERATIONS="${1#*=}"; shift ;;
        --skip-eval) SKIP_EVAL=true; shift ;;
        --tool) TOOL="$2"; shift 2 ;;
        --tool=*) TOOL="${1#*=}"; shift ;;
        --no-hooks) AUTO_HOOKS=false; shift ;;
        --dry-run) DRY_RUN=true; shift ;;
        --headless) export LOOP_HEADLESS=true; shift ;;
        --resume) RESUME=true; shift ;;
        --replan) log "ERROR: --replan is not yet implemented. Use /agent-loop:stories interactively."; exit 1 ;;
        [0-9]*) MAX_ITERATIONS="$1"; shift ;;
        *) log "Unknown option: $1"; exit 1 ;;
    esac
done

export ITERATION=0 MAX_ITERATIONS MODE

# --- Validate ---
if [[ ! "$MODE" =~ ^(implement|explore|fix)$ ]]; then
    log "ERROR: Invalid mode '$MODE'. Must be: implement, explore, fix"
    exit 1
fi

if [[ ! "$TOOL" =~ ^(claude|amp)$ ]]; then
    log "ERROR: Invalid tool '$TOOL'. Must be: claude, amp"
    exit 1
fi

# --- Setup ---
cd "$PROJECT_ROOT"

cleanup() {
    [ -n "${LOOP_AGENT_TMPFILE:-}" ] && rm -f "$LOOP_AGENT_TMPFILE"
    # Remove hooks in case we exit mid-agent (Ctrl+C during a claude session)
    [ "$AUTO_HOOKS" = true ] && remove_hooks 2>/dev/null
    release_lock
}

# Show final status and wait so tmux doesn't vanish
finish() {
    local exit_code="${1:-0}"
    local reason="${2:-}"
    echo ""
    echo "═══════════════════════════════════════════════════════"
    echo "  LOOP FINISHED"
    echo "═══════════════════════════════════════════════════════"
    echo ""
    echo "  Stories:    $(story_counts 2>/dev/null || echo 'N/A')"
    echo "  Iterations: $ITERATION / $MAX_ITERATIONS"
    [ -n "$reason" ] && echo "  Reason:     $reason"
    echo ""
    echo "  Progress:   .loop/progress.md"
    echo "  Stories:    .loop/prd.json"
    echo ""
    if any_stories_blocked 2>/dev/null; then
        echo "  ⚠ Some stories are blocked. Run /agent-loop:triage"
        echo ""
    fi
    echo "  Closing in 30 seconds. Press Enter to close now, or Ctrl+C to keep open."
    # Auto-close after 30s so the tmux session exits and the background watcher fires
    read -r -t 30 2>/dev/null || true
    exit "$exit_code"
}
LOOP_AGENT_TMPFILE=""

# NOTE: Stop hook is installed/removed per-agent in run_agent(), not globally.
# This prevents the hook from killing the orchestrating CC session.
trap cleanup EXIT INT TERM

check_archive

# Validate prd.json exists (AFTER archive check, which may delete it on branch change)
if [ ! -f "$LOOP_DIR/prd.json" ]; then
    log "ERROR: No prd.json found. Run /agent-loop:stories first to create one."
    exit 1
fi

validate_prd

# Run project init script if it exists
if [ -f "$LOOP_DIR/init.sh" ]; then
    log "Running init.sh..."
    bash "$LOOP_DIR/init.sh"
fi

# Ensure correct git branch
BRANCH=$(prd_branch_name 2>/dev/null || echo "")
if [ -n "$BRANCH" ]; then
    CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "")
    if [ "$CURRENT_BRANCH" != "$BRANCH" ]; then
        log "Switching to branch: $BRANCH"
        git checkout "$BRANCH" 2>/dev/null || \
            git checkout -b "$BRANCH" "origin/$BRANCH" 2>/dev/null || \
            git checkout -b "$BRANCH"
    fi
fi

# --- Agent runner ---
# Runs a prompt through the selected AI tool.
#
# Interactive (default): Pipes prompt to claude WITHOUT --print.
#   This gives the full interactive CC UI — tool calls, file edits, etc.
#   A Stop hook (installed at startup) sends SIGINT to the loop when claude
#   finishes, which returns control to the while loop for the next iteration.
#   State is tracked via files (prd.json, .verdict), not stdout.
#
# Headless (LOOP_HEADLESS=true): Uses claude --print for CI/background.
#   Output captured to file for verdict parsing.
run_agent() {
    local prompt="$1"
    local role="${2:-}"

    rm -f "$LOOP_DIR/.verdict"

    local agent_exit=0
    if [ "${LOOP_HEADLESS:-false}" != "true" ]; then
        # --- Interactive mode (Ralph pattern) ---
        # Install Stop hook just before claude starts, remove after it exits.
        # This scopes the hook to only affect the loop's claude sessions.
        [ "$AUTO_HOOKS" = true ] && install_hooks

        (
            case "$TOOL" in
                claude)
                    printf '%s\n' "$prompt" | claude --dangerously-skip-permissions
                    ;;
                amp)
                    printf '%s\n' "$prompt" | amp --dangerously-allow-all
                    ;;
                *)
                    log "ERROR: Unknown tool '$TOOL'"
                    exit 1
                    ;;
            esac
        ) || agent_exit=$?

        [ "$AUTO_HOOKS" = true ] && remove_hooks
        sleep 2  # Brief pause between sessions

        # Read verdict from file if evaluator wrote one
        if [ "$role" = "evaluator" ] && [ -f "$LOOP_DIR/.verdict" ]; then
            cat "$LOOP_DIR/.verdict"
        fi
    else
        # --- Headless mode ---
        local output_file
        output_file=$(mktemp)
        LOOP_AGENT_TMPFILE="$output_file"

        (
            case "$TOOL" in
                claude)
                    printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
                        claude --dangerously-skip-permissions --output-format text \
                        --print > "$output_file" 2>&1
                    ;;
                amp)
                    printf '%s\n' "$prompt" | timeout "${LOOP_AGENT_TIMEOUT:-600}" \
                        amp --dangerously-allow-all > "$output_file" 2>&1
                    ;;
                *)
                    log "ERROR: Unknown tool '$TOOL'"
                    exit 1
                    ;;
            esac
        ) || agent_exit=$?

        if [ "$agent_exit" -ne 0 ] && [ ! -s "$output_file" ]; then
            log "WARNING: Agent exited with code $agent_exit and produced no output."
        fi

        cat "$output_file"
        rm -f "$output_file"
        LOOP_AGENT_TMPFILE=""
    fi
}

# --- Parse evaluator verdict ---
parse_verdict() {
    local output="$1"

    if echo "$output" | grep -q "<verdict>REJECT</verdict>"; then
        # Extract rejection reason (supports multiline)
        local reason
        reason=$(echo "$output" | sed -n '/<rejection_reason>/,/<\/rejection_reason>/p' \
            | sed '1s/.*<rejection_reason>//' | sed '$s/<\/rejection_reason>.*//' \
            | tr '\n' ' ' | sed 's/  */ /g' | sed 's/^ //;s/ $//')
        [ -z "$reason" ] && reason="Rejected without specific reason"
        echo "REJECT:${reason}"
    elif echo "$output" | grep -q "<verdict>PASS</verdict>"; then
        echo "PASS"
    else
        # No explicit verdict — fail-safe: treat as reject so broken evaluators don't silently approve
        log "WARNING: No verdict tag found in evaluator output. Treating as REJECT (fail-safe)."
        echo "REJECT:Evaluator produced no verdict tag — output may be malformed"
    fi
}

# --- Main loop ---
log_header "Loop Starting"
log "Mode:       $MODE"
log "Tool:       $TOOL"
log "Max iter:   $MAX_ITERATIONS"
log "Eval:       $([[ $SKIP_EVAL == true ]] && echo 'off' || echo 'on')"
log "Dry run:    $([[ $DRY_RUN == true ]] && echo 'yes' || echo 'no')"
log "Project:    $PROJECT_ROOT"
log "Stories:    $(story_counts 2>/dev/null || echo 'N/A')"
echo ""

while [ "$ITERATION" -lt "$MAX_ITERATIONS" ]; do
    ITERATION=$((ITERATION + 1))
    export ITERATION

    # Check if all stories already pass
    if all_stories_pass 2>/dev/null; then
        snapshot_for_archive
        finish 0 "All stories complete"
    fi

    # Capture which story the generator will work on (highest-priority incomplete)
    CURRENT_STORY_ID=$(next_story_id 2>/dev/null || echo "")
    export CURRENT_STORY_ID

    # No actionable story — all remaining are passed or blocked
    if [ -z "$CURRENT_STORY_ID" ]; then
        snapshot_for_archive
        if any_stories_blocked 2>/dev/null; then
            finish $EXIT_ALL_BLOCKED "Some stories blocked — needs human review"
        else
            finish $EXIT_OK "No actionable stories remaining"
        fi
    fi

    # Capture git state before generator runs (for evaluator diff)
    PRE_GENERATOR_SHA=$(git rev-parse HEAD 2>/dev/null || echo "")
    export PRE_GENERATOR_SHA

    # --- Generator pass ---
    log_header "Iteration $ITERATION / $MAX_ITERATIONS — GENERATOR${CURRENT_STORY_ID:+ ($CURRENT_STORY_ID)}"

    GENERATOR_PROMPT=$(build_prompt "generator" "$MODE")

    # --dry-run: print prompts and exit without running agents
    if [ "$DRY_RUN" = true ]; then
        log "=== GENERATOR PROMPT ==="
        printf '%s\n' "$GENERATOR_PROMPT"
        echo ""
        if [ "$SKIP_EVAL" != true ] && [ -n "$CURRENT_STORY_ID" ]; then
            EVAL_PROMPT=$(build_prompt "evaluator" "$MODE")
            log "=== EVALUATOR PROMPT ==="
            printf '%s\n' "$EVAL_PROMPT"
        fi
        log "Dry run complete. Showing prompts for story: ${CURRENT_STORY_ID:-unknown}"
        exit 0
    fi

    if [ "${LOOP_HEADLESS:-false}" != "true" ]; then
        # Interactive: run directly, no capture. User sees full CC UI.
        run_agent "$GENERATOR_PROMPT" "generator"
        GENERATOR_OUTPUT=""
    else
        # Headless: capture output for parsing.
        GENERATOR_OUTPUT=$(run_agent "$GENERATOR_PROMPT" "generator")
        if [ -z "$GENERATOR_OUTPUT" ]; then
            log "WARNING: Generator produced empty output (timeout or crash). Skipping to next iteration."
            continue
        fi
    fi

    # --- Scope budget check ---
    # Verify the generator stayed within configured limits (files modified, lines written).
    # Advisory in implement/fix modes (log warning), but enforced as rejection reason for evaluator.
    if [ -n "$PRE_GENERATOR_SHA" ]; then
        SCOPE_FILES_MODIFIED=$(git diff --name-only "$PRE_GENERATOR_SHA" HEAD 2>/dev/null | wc -l | tr -d ' ')
        SCOPE_LINES_WRITTEN=$(git diff --stat "$PRE_GENERATOR_SHA" HEAD 2>/dev/null | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0")

        MAX_MODIFY=$(config_default ".scopeBudgets.${MODE}.maxFilesToModify" "10")
        MAX_WRITE=$(config_default ".scopeBudgets.${MODE}.maxLinesToWrite" "500")

        if [ "${SCOPE_FILES_MODIFIED:-0}" -gt "$MAX_MODIFY" ]; then
            log "WARNING: Scope budget exceeded — modified $SCOPE_FILES_MODIFIED files (limit: $MAX_MODIFY)"
        fi
        if [ "${SCOPE_LINES_WRITTEN:-0}" -gt "$MAX_WRITE" ]; then
            log "WARNING: Scope budget exceeded — wrote $SCOPE_LINES_WRITTEN lines (limit: $MAX_WRITE)"
        fi

        export SCOPE_FILES_MODIFIED SCOPE_LINES_WRITTEN
    fi

    # NOTE: Do NOT check all_stories_pass here. The generator marks its own story
    # as passed, but the evaluator hasn't verified yet. Checking here would skip
    # evaluation on the last story. The completion check is at the top of the loop.

    # --- Evaluator pass ---
    if [ "$SKIP_EVAL" != true ]; then
        log_header "Iteration $ITERATION / $MAX_ITERATIONS — EVALUATOR${CURRENT_STORY_ID:+ ($CURRENT_STORY_ID)}"

        if [ -z "$CURRENT_STORY_ID" ]; then
            log "WARNING: No actionable story ID found. Skipping evaluator."
            continue
        fi

        EVAL_PROMPT=$(build_prompt "evaluator" "$MODE")

        if [ "${LOOP_HEADLESS:-false}" != "true" ]; then
            # Interactive: run directly, read verdict from file.
            run_agent "$EVAL_PROMPT" "evaluator"
            if [ -f "$LOOP_DIR/.verdict" ]; then
                EVAL_OUTPUT=$(cat "$LOOP_DIR/.verdict")
            else
                log "WARNING: No verdict file found. Treating as REJECT."
                EVAL_OUTPUT="<verdict>REJECT</verdict><rejection_reason>Evaluator produced no verdict file</rejection_reason>"
            fi
        else
            # Headless: capture output for parsing.
            EVAL_OUTPUT=$(run_agent "$EVAL_PROMPT" "evaluator")
            if [ -z "$EVAL_OUTPUT" ]; then
                log "WARNING: Evaluator produced empty output. Treating as REJECT."
                EVAL_OUTPUT="<verdict>REJECT</verdict><rejection_reason>Evaluator produced no output</rejection_reason>"
            fi
        fi

        VERDICT=$(parse_verdict "$EVAL_OUTPUT")

        case "$VERDICT" in
            PASS)
                log "Evaluator: PASS"
                if [ -n "$CURRENT_STORY_ID" ]; then
                    mark_story_pass "$CURRENT_STORY_ID"
                fi
                ;;
            REJECT:*)
                REASON="${VERDICT#REJECT:}"
                log "Evaluator: REJECT — $REASON"

                if [ -n "$CURRENT_STORY_ID" ]; then
                    mark_story_reject "$CURRENT_STORY_ID" "$REASON"

                    # Check retry limit — block story to prevent infinite retries
                    REJECTIONS=$(story_rejections "$CURRENT_STORY_ID")
                    REJECTIONS="${REJECTIONS:-0}"
                    if [ "$REJECTIONS" -ge "$EVAL_RETRIES" ]; then
                        log "WARNING: Story $CURRENT_STORY_ID rejected $REJECTIONS times (limit: $EVAL_RETRIES). Blocking for human review."
                        mark_story_blocked "$CURRENT_STORY_ID" "Rejected $REJECTIONS times. Last: $REASON"
                        append_progress "### BLOCKED: $CURRENT_STORY_ID

Rejected $REJECTIONS times. Needs human review. Last reason: $REASON

---"
                    fi
                fi
                ;;
        esac
    fi
done

# --- Max iterations reached ---
snapshot_for_archive
finish $EXIT_MAX_ITERATIONS "Max iterations reached ($MAX_ITERATIONS)"