#!/bin/bash
set -e

# ==============================================================================
# BLUEPRINT: LOCAL LLM + PI AGENT SETUP (MAC MINI M4 24GB)
# Zero cloud API cost coding agent environment with active model rotation.
#
# One-off install:  curl -fsSL https://workswithagents.dev/static/setup-local-llm-pi.sh | sh
# Swap to 9B:        ./setup-local-llm-pi.sh swap-to-9b
# Swap to 4B:        ./setup-local-llm-pi.sh swap-to-4b
# ==============================================================================

MODELS_DIR="$HOME/.omlx/models"
STAGING_DIR="$HOME/.omlx/staging"
M9B_DIR="mlx-community/Qwable-9B-Claude-Fable-5-mlx-4Bit"
M4B_DIR="mlx-community/Qwen3.5-4B-OptiQ-4bit"

# --- MODEL ROTATION ENGINE ---
rotate_models() {
    local target=$1
    echo "🔄 Running Model Rotation sequence for the 24GB Memory Wall..."

    mkdir -p "$MODELS_DIR/mlx-community" "$STAGING_DIR/mlx-community"

    if [ "$target" == "9b" ]; then
        if [ -d "$STAGING_DIR/$M9B_DIR" ]; then
            echo "📦 Moving Qwen3.5-4B out to staging..."
            [ -d "$MODELS_DIR/$M4B_DIR" ] && mv "$MODELS_DIR/$M4B_DIR" "$STAGING_DIR/$M4B_DIR"
            echo "📥 Bringing Qwable-9B into active models..."
            mv "$STAGING_DIR/$M9B_DIR" "$MODELS_DIR/$M9B_DIR"
        else
            echo "ℹ️ Qwable-9B is already in place or needs downloading via HF CLI."
        fi
    elif [ "$target" == "4b" ]; then
        if [ -d "$STAGING_DIR/$M4B_DIR" ]; then
            echo "📦 Moving Qwable-9B out to staging..."
            [ -d "$MODELS_DIR/$M9B_DIR" ] && mv "$MODELS_DIR/$M9B_DIR" "$STAGING_DIR/$M9B_DIR"
            echo "📥 Bringing Qwen3.5-4B into active models..."
            mv "$STAGING_DIR/$M4B_DIR" "$MODELS_DIR/$M4B_DIR"
        else
            echo "ℹ️ Qwen3.5-4B is already in place or needs downloading via HF CLI."
        fi
    fi

    echo "♻️ Cycling local oMLX inference backend..."
    pkill -f "omlx serve" || true
    sleep 3
    echo "✅ Cycle complete. Active model list:"
    curl -s http://127.0.0.1:8000/v1/models || echo "⌛ Server recycling..."
}

# --- CLI ARGUMENT ROUTING ---
if [ "$1" == "swap-to-9b" ]; then
    rotate_models "9b"
    exit 0
elif [ "$1" == "swap-to-4b" ]; then
    rotate_models "4b"
    exit 0
fi

# --- CORE INSTALLATION & PROVISIONING ---
echo "🍏 Starting Local Agent Environment Setup..."

echo "📦 Checking core dependencies..."
which brew >/dev/null || { echo "❌ Homebrew required. Install first."; exit 1; }
which node >/dev/null || { echo "❌ Node.js/npm required. Install first."; exit 1; }

if ! which omlx >/dev/null; then
    echo "📥 Installing oMLX via Homebrew tap..."
    brew install jundot/omlx/omlx
else
    echo "✅ oMLX already installed."
fi

if ! which pi >/dev/null; then
    echo "📥 Installing Pi Coding Agent via npm..."
    npm install -g @earendil-works/pi-coding-agent
else
    echo "✅ Pi already installed ($(pi --version))."
fi

if ! which scrapling >/dev/null; then
    echo "📥 Installing scrapling for MCP..."
    pip3 install scrapling || echo "⚠️ Non-critical: could not install scrapling."
fi

echo "📁 Structuring directories..."
mkdir -p "$MODELS_DIR" "$STAGING_DIR"
mkdir -p ~/.pi/agent/skills/pix-optimizer
mkdir -p ~/.hermes ~/Library/LaunchAgents

echo "⚙️ Configuring oMLX performance tuning..."
cat << 'EOF' > ~/.omlx/model_settings.json
{
  "Qwen3.5-4B-OptiQ-4bit": {
    "enable_thinking": false,
    "turboquant_kv_enabled": true,
    "specprefill_enabled": true
  },
  "Qwable-9B-Claude-Fable-5-mlx-4Bit": {
    "turboquant_kv_enabled": true,
    "specprefill_enabled": true
  }
}
EOF

cat << 'EOF' > ~/Library/LaunchAgents/com.omlx.server.plist
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://apple.com">
<plist version="1.0">
<dict>
    <key>Label</key>
    <string>com.omlx.server</string>
    <key>ProgramArguments</key>
    <array>
        <string>/opt/homebrew/bin/omlx</string>
        <string>serve</string>
        <string>--port</string>
        <string>8000</string>
    </array>
    <key>RunAtLoad</key>
    <true/>
    <key>KeepAlive</key>
    <true/>
</dict>
</plist>
EOF

echo "🚀 Booting oMLX..."
launchctl unload ~/Library/LaunchAgents/com.omlx.server.plist 2>/dev/null || true
launchctl load ~/Library/LaunchAgents/com.omlx.server.plist

echo "⚙️ Configuring Pi..."
cat << 'EOF' > ~/.pi/agent/settings.json
{
  "defaultProvider": "omlx",
  "defaultModel": "Qwen3.5-4B-OptiQ-4bit",
  "packages": [
    "npm:context-mode",
    "npm:pi-subagents",
    "npm:pi-workflow-engine",
    "npm:pi-mcp-adapter",
    "npm:@fgladisch/pi-caveman",
    "npm:@xynogen/pix-optimizer"
  ]
}
EOF

cat << 'EOF' > ~/.pi/agent/models.json
{
  "providers": {
    "omlx": {
      "baseUrl": "http://127.0.0.1:8000/v1",
      "api": "openai-completions",
      "apiKey": "your-omlx-api-key",
      "authHeader": true,
      "models": [
        {
          "id": "Qwable-9B-Claude-Fable-5-mlx-4Bit",
          "name": "Qwable-9B-Claude-Fable-5-mlx-4Bit",
          "reasoning": false,
          "input": ["text"],
          "cost": { "input": 0, "output": 0 },
          "contextWindow": 262144,
          "maxTokens": 32768
        },
        {
          "id": "Qwen3.5-4B-OptiQ-4bit",
          "name": "Qwen3.5-4B-OptiQ-4bit",
          "reasoning": false,
          "input": ["text"],
          "cost": { "input": 0, "output": 0 },
          "contextWindow": 262144,
          "maxTokens": 32768
        }
      ]
    }
  }
}
EOF

echo "⚙️ Configuring Hermes Agent..."
cat << 'EOF' > ~/.hermes/config.yaml
custom_providers:
  - name: omlx
    api_key: "your-omlx-api-key"
    api_mode: chat_completions
    base_url: http://127.0.0.1:8000/v1
    models:
      Qwen3.5-4B-OptiQ-4bit:
        context_length: 262144
      Qwable-9B-Claude-Fable-5-mlx-4Bit:
        context_length: 262144

delegation:
  model: Qwable-9B-Claude-Fable-5-mlx-4Bit
  provider: omlx
  context_length: 262144

mcp_servers:
  context7:
    url: https://mcp.context7.com/mcp
    headers:
      CONTEXT7_API_KEY: "your-key"
    timeout: 60
    connect_timeout: 30
  scrapling:
    command: scrapling
    args: [mcp]
EOF

echo "📡 Verifying inference connectivity..."
sleep 3
if curl -s http://127.0.0.1:8000/v1/models > /dev/null; then
    echo "✅ oMLX responded on port 8000."
else
    echo "⚠️ oMLX initialising via launchd..."
fi

echo ""
echo "============================================================"
echo "🎉 Setup complete!"
echo ""
echo "🔄 ROTATION COMMANDS:"
echo "   $0 swap-to-9b    → activate Qwable-9B (coding)"
echo "   $0 swap-to-4b    → activate Qwen3.5-4B (fast)"
echo ""
echo "💡 Quick start:"
echo "   pi -p 'list files'"
echo "============================================================"