#!/usr/bin/env bash
# Ground Control (GCTRL) Installer
# Usage: curl -fsSL https://gctrl.tech/install | bash
# No license key required — enter your key at http://localhost:3001 after install
set -euo pipefail

GCTRL_VERSION="${GCTRL_VERSION:-latest}"
API_URL="https://api.gctrl.tech"
INSTALL_DIR="${HOME}/gctrl"
CONFIG_DIR="${INSTALL_DIR}/config"

# ── Colors ────────────────────────────────────────────────────────────────────
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'; NC='\033[0m'
info()    { echo -e "${BLUE}[GCTRL]${NC} $1"; }
success() { echo -e "${GREEN}[GCTRL]${NC} $1"; }
warn()    { echo -e "${YELLOW}[GCTRL]${NC} $1"; }
error()   { echo -e "${RED}[GCTRL]${NC} $1"; exit 1; }

# ── Prerequisites ─────────────────────────────────────────────────────────────
check_prereqs() {
  info "Checking prerequisites..."
  local missing=()

  command -v docker  &>/dev/null || missing+=("docker")
  command -v curl    &>/dev/null || missing+=("curl")
  command -v openssl &>/dev/null || missing+=("openssl")
  docker compose version &>/dev/null || missing+=("docker-compose-plugin")

  if [ ${#missing[@]} -gt 0 ]; then
    error "Missing: ${missing[*]}\nInstall Docker: https://docs.docker.com/engine/install/"
  fi

  if ! docker info &>/dev/null; then
    if command -v systemctl &>/dev/null; then
      warn "Docker daemon not running — attempting to start..."
      sudo systemctl start docker 2>/dev/null || true
      sleep 2
    fi

    if ! docker info &>/dev/null; then
      warn "Waiting for Docker to start — please open Docker Desktop..."
      local waited=0
      while ! docker info &>/dev/null; do
        sleep 3; waited=$((waited + 3)); printf "."
        [ $waited -ge 60 ] && echo "" && error "Docker is not running after 60s. Please start Docker Desktop and re-run the installer."
      done
      echo ""
    fi
  fi

  success "Prerequisites OK"
}

# ── Service Detection ─────────────────────────────────────────────────────────
probe_tcp() {
  local port=$1
  timeout 2 bash -c ">/dev/tcp/127.0.0.1/$port" 2>/dev/null
}

# Is Ollama on :11434 reachable from CONTAINERS (bound to all interfaces), or only
# from the host (loopback)? A native Ollama on 127.0.0.1 answers our probe here but
# the Dockerised GCTRL stack can't reach it. Best-effort via ss/netstat; "unknown"
# → assume OK (the dashboard guides the user if it turns out unreachable).
ollama_bound_all_ifaces() {
  local binds=""
  if command -v ss >/dev/null 2>&1; then
    binds=$(ss -tlnH 2>/dev/null | awk '{print $4}')
  elif command -v netstat >/dev/null 2>&1; then
    binds=$(netstat -tln 2>/dev/null | awk '{print $4}')
  fi
  [ -z "$binds" ] && return 0
  echo "$binds" | grep -qE '(0\.0\.0\.0|\*|\[::\]):11434$' && return 0
  echo "$binds" | grep -qE '127\.0\.0\.1:11434$' && return 1
  return 0
}

PROFILES=()
NEO4J_URI=""
NEO4J_USER="neo4j"
NEO4J_PASSWORD="gctrl-neo4j-password"
QDRANT_URL=""
OLLAMA_BASE=""
GPU_ENABLED=false
GPU_TYPE="none"   # none | nvidia | amd
CHAT_MODEL=""     # generation model chosen by choose_model() ("" = skip / pick later)

# ── GPU Detection ─────────────────────────────────────────────────────────────
# Detection strategy:
#   NVIDIA — nvidia-smi reports only discrete NVIDIA GPUs (no Intel integrated).
#             Requires nvidia-container-toolkit for Docker GPU passthrough.
#   AMD    — rocm-smi or /dev/kfd presence signals an AMD GPU.
#             Requires ROCm and /dev/kfd + /dev/dri for Docker passthrough.
#   CPU    — fallback when no discrete GPU (or toolkit) is detected.
detect_gpu() {
  # ── NVIDIA ────────────────────────────────────────────────────────────────
  if command -v nvidia-smi &>/dev/null && nvidia-smi &>/dev/null 2>&1; then
    local gpu_name
    gpu_name=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader 2>/dev/null | head -1 || echo "NVIDIA GPU")

    if command -v nvidia-ctk &>/dev/null || docker info 2>/dev/null | grep -qi "nvidia"; then
      GPU_ENABLED=true
      GPU_TYPE="nvidia"
      success "GPU detected: ${gpu_name} — KEX will use NVIDIA GPU acceleration"
      return
    else
      warn "NVIDIA GPU found (${gpu_name}) but nvidia-container-toolkit is not installed"
      warn "KEX will run on CPU. To enable GPU later:"
      info "  sudo apt-get install -y nvidia-container-toolkit"
      info "  sudo nvidia-ctk runtime configure --runtime=docker"
      info "  sudo systemctl restart docker"
      info "  curl -fsSL https://gctrl.tech/install | bash"
      return
    fi
  fi

  # ── AMD ───────────────────────────────────────────────────────────────────
  # rocm-smi is the AMD equivalent of nvidia-smi.
  # /dev/kfd is the AMD GPU kernel fusion driver — present when ROCm is installed.
  local amd_detected=false
  local amd_name="AMD GPU"

  if command -v rocm-smi &>/dev/null && rocm-smi &>/dev/null 2>&1; then
    amd_detected=true
    amd_name=$(rocm-smi --showproductname 2>/dev/null \
      | grep -oP '(?i)(?<=card series:\s{0,16})\S.*' | head -1 || echo "AMD GPU")
  elif [ -e /dev/kfd ]; then
    # /dev/kfd present even without rocm-smi in PATH (e.g. system-level ROCm install)
    amd_detected=true
  fi

  if [ "${amd_detected}" = "true" ]; then
    if [ -e /dev/kfd ] && [ -e /dev/dri ]; then
      GPU_ENABLED=true
      GPU_TYPE="amd"
      success "GPU detected: ${amd_name} — KEX will use AMD ROCm acceleration"
    else
      warn "AMD GPU found but /dev/kfd or /dev/dri not accessible — KEX will run on CPU"
      warn "To enable AMD GPU, install ROCm: https://rocm.docs.amd.com/en/latest/deploy/linux/quick_start.html"
    fi
    return
  fi

  info "No discrete GPU detected — KEX will run on CPU"
}

detect_services() {
  echo ""
  echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
  info "Detecting existing infrastructure..."
  echo ""

  if probe_tcp 7687; then
    NEO4J_URI="bolt://host.docker.internal:7687"
    success "Neo4j detected on :7687 — will connect to existing instance"
  else
    PROFILES+=("bundled-neo4j")
    NEO4J_URI="bolt://gctrl-neo4j:7687"
    info "Neo4j not found — will deploy bundled container"
  fi

  if probe_tcp 6333; then
    QDRANT_URL="http://host.docker.internal:6333"
    success "Qdrant detected on :6333 — will connect to existing instance"
  else
    PROFILES+=("bundled-qdrant")
    QDRANT_URL="http://gctrl-qdrant:6333"
    info "Qdrant not found — will deploy bundled container"
  fi

  if probe_tcp 11434; then
    if ollama_bound_all_ifaces; then
      OLLAMA_BASE="http://host.docker.internal:11434"
      success "Ollama detected on :11434 — connecting to your native instance (GPU-capable)"
    else
      # Native Ollama listens on localhost only → the GCTRL containers can't reach
      # it. Bundle the CPU Ollama so the platform works out of the box; the user can
      # switch to native (GPU) later by exposing it.
      PROFILES+=("bundled-ollama")
      OLLAMA_BASE="http://gctrl-ollama:11434"
      warn "Native Ollama found, but it listens on localhost only — Docker can't reach it."
      warn "→ Using the bundled (CPU) Ollama so GCTRL works out of the box."
      warn "  For GPU: set OLLAMA_HOST=0.0.0.0 on your Ollama, then switch in the"
      warn "  dashboard (Settings → AI Models). Guide: https://gctrl.tech/docs/gpu"
    fi
  else
    PROFILES+=("bundled-ollama")
    OLLAMA_BASE="http://gctrl-ollama:11434"
    info "Ollama not found — will deploy bundled container"
  fi

  echo ""
  if [ ${#PROFILES[@]} -eq 0 ]; then
    success "All infrastructure already present — GCTRL will connect to your existing stack"
  else
    info "Will deploy bundled: ${PROFILES[*]}"
  fi
  echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
  echo ""
}

# ── Model Picker ──────────────────────────────────────────────────────────────
# Choose the local generation model KEX/RAG use. Default is small/fast so the
# bundled (CPU) Ollama path works out of the box; upgrade later by pointing GCTRL
# at a native Ollama (Metal/CUDA GPU) in Settings → Infrastructure and pulling a
# bigger model. Honors GCTRL_MODEL for non-interactive installs; never hangs when
# there's no terminal.
choose_model() {
  if [ -n "${GCTRL_MODEL:-}" ]; then
    CHAT_MODEL="$GCTRL_MODEL"
    info "Model (from GCTRL_MODEL): ${CHAT_MODEL}"
    return
  fi

  # Total RAM in GB — for a sensible recommendation (best-effort, 0 if unknown).
  local ram_gb=0
  if [ "$(uname)" = "Darwin" ]; then
    ram_gb=$(( $(sysctl -n hw.memsize 2>/dev/null || echo 0) / 1073741824 ))
  elif [ -r /proc/meminfo ]; then
    ram_gb=$(( $(awk '/MemTotal/{print $2}' /proc/meminfo 2>/dev/null || echo 0) / 1048576 ))
  fi

  local default_choice="llama3.2:3b"

  # No interactive terminal (piped/non-interactive) → take the default silently.
  if [ ! -t 0 ] && [ ! -e /dev/tty ]; then
    CHAT_MODEL="$default_choice"
    info "Non-interactive — default model ${CHAT_MODEL} (override with GCTRL_MODEL=...)"
    return
  fi

  echo ""
  echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
  [ "$ram_gb" -gt 0 ] && info "Detected RAM: ${ram_gb} GB"
  echo "  Choose the local model GCTRL uses for extraction & RAG:"
  echo "    1) llama3.2:3b   — lightweight, fast         (~2 GB)   [default]"
  echo "    2) qwen2.5:7b    — best extraction quality   (~5 GB)"
  echo "    3) qwen2.5:14b   — higher quality            (~9 GB,  16 GB+ RAM)"
  echo "    4) qwen2.5:32b   — max quality               (~20 GB, 32 GB+ RAM)"
  echo "    5) custom        — enter any Ollama tag"
  echo "    6) skip          — choose later in the dashboard"
  echo "  (Tip: the bundled Ollama runs on CPU. For GPU speed, point GCTRL at a"
  echo "   native Ollama in Settings → Infrastructure, then pick a bigger model.)"
  local choice=""
  read -rp "  Choice [1]: " choice </dev/tty || choice=""
  case "${choice:-1}" in
    1) CHAT_MODEL="llama3.2:3b" ;;
    2) CHAT_MODEL="qwen2.5:7b" ;;
    3) CHAT_MODEL="qwen2.5:14b" ;;
    4) CHAT_MODEL="qwen2.5:32b" ;;
    5) read -rp "  Ollama model tag: " CHAT_MODEL </dev/tty || CHAT_MODEL=""
       CHAT_MODEL="${CHAT_MODEL:-$default_choice}" ;;
    6) CHAT_MODEL="" ;;
    *) CHAT_MODEL="$default_choice" ;;
  esac
  echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
  if [ -n "$CHAT_MODEL" ]; then
    info "Model: ${CHAT_MODEL}"
  else
    info "Skipping model download — pick one later in Settings → Models"
  fi
  echo ""
}

# ── Generate Config ───────────────────────────────────────────────────────────
generate_config() {
  info "Creating ${INSTALL_DIR}..."
  mkdir -p "${INSTALL_DIR}" "${CONFIG_DIR}" "${INSTALL_DIR}/data"

  curl -fsSL "https://gctrl.tech/compose.yml" \
    -o "${INSTALL_DIR}/docker-compose.yml"

  curl -fsSL "${API_URL}/v1/public-key" -o "${CONFIG_DIR}/license.pub"
  chmod 600 "${CONFIG_DIR}/license.pub"

  # Secrets are generated per-install (unique passwords, not shared defaults), but
  # PRESERVED on re-install: a bundled DB bakes its auth into its data volume on
  # first start, so rotating the password on a re-run would lock us out. If an
  # .env already exists we keep its values; otherwise we generate fresh ones.
  local prev_env="${INSTALL_DIR}/.env"
  read_prev() { [ -f "$prev_env" ] && sed -n "s/^$1=//p" "$prev_env" | head -1 || true; }

  local jwt_secret pg_pw
  jwt_secret="$(read_prev JWT_SECRET)";  [ -n "$jwt_secret" ] || jwt_secret="$(openssl rand -hex 32)"
  pg_pw="$(read_prev POSTGRES_PASSWORD)"; [ -n "$pg_pw" ]     || pg_pw="$(openssl rand -hex 24)"

  # Bundled Neo4j → unique generated password (the fresh container initialises
  # with it). A reused/external Neo4j keeps its own configured auth (we can't
  # change a database we don't own).
  if [ "${NEO4J_URI}" = "bolt://gctrl-neo4j:7687" ]; then
    local neo_prev; neo_prev="$(read_prev NEO4J_PASSWORD)"
    NEO4J_PASSWORD="${neo_prev:-$(openssl rand -hex 24)}"
  fi

  cat > "${INSTALL_DIR}/.env" <<EOF
GCTRL_API_URL=https://api.gctrl.tech
GCTRL_DATA_DIR=${INSTALL_DIR}/data
JWT_SECRET=${jwt_secret}
NEO4J_URI=${NEO4J_URI}
NEO4J_USER=${NEO4J_USER}
NEO4J_PASSWORD=${NEO4J_PASSWORD}
QDRANT_URL=${QDRANT_URL}
OLLAMA_BASE=${OLLAMA_BASE}
RELEX_MODEL=${CHAT_MODEL:-qwen2.5:7b}
AUTO_CLASSIFY_MODEL=${CHAT_MODEL:-llama3.2}
POSTGRES_PASSWORD=${pg_pw}
EOF
  chmod 600 "${INSTALL_DIR}/.env"

  if [ "${GPU_ENABLED}" = "true" ]; then
    write_gpu_override
  fi

  success "Config generated"
}

# Writes docker-compose.override.yml with GPU passthrough for the detected vendor.
# Docker Compose automatically merges override.yml with docker-compose.yml on up/pull.
# Users can delete this file to revert to CPU mode.
write_gpu_override() {
  if [ "${GPU_TYPE}" = "nvidia" ]; then
    info "Writing NVIDIA GPU override (docker-compose.override.yml)..."
    cat > "${INSTALL_DIR}/docker-compose.override.yml" <<'OVERRIDE'
# Auto-generated by GCTRL installer — NVIDIA GPU detected.
# Delete this file to switch back to CPU mode, then run: docker compose up -d
services:
  gctrl-fuse:
    image: ghcr.io/gctrl-tech/fuse:latest-cuda
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
  gctrl-kex:
    image: ghcr.io/gctrl-tech/kex:latest-cuda
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
  gctrl-ollama:
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
OVERRIDE

  elif [ "${GPU_TYPE}" = "amd" ]; then
    info "Writing AMD ROCm GPU override (docker-compose.override.yml)..."
    cat > "${INSTALL_DIR}/docker-compose.override.yml" <<'OVERRIDE'
# Auto-generated by GCTRL installer — AMD GPU detected.
# Delete this file to switch back to CPU mode, then run: docker compose up -d
services:
  gctrl-fuse:
    image: ghcr.io/gctrl-tech/fuse:latest-cuda
    devices:
      - /dev/kfd:/dev/kfd
      - /dev/dri:/dev/dri
    group_add:
      - video
      - render
  gctrl-kex:
    devices:
      - /dev/kfd:/dev/kfd
      - /dev/dri:/dev/dri
    group_add:
      - video
      - render
  gctrl-ollama:
    devices:
      - /dev/kfd:/dev/kfd
      - /dev/dri:/dev/dri
    group_add:
      - video
      - render
OVERRIDE
  fi
}

# ── Start Stack ───────────────────────────────────────────────────────────────
start_stack() {
  info "Starting GCTRL..."

  local profile_flags=()
  for p in "${PROFILES[@]}"; do
    profile_flags+=("--profile" "$p")
  done

  docker compose \
    -f "${INSTALL_DIR}/docker-compose.yml" \
    --env-file "${INSTALL_DIR}/.env" \
    "${profile_flags[@]}" \
    pull

  docker compose \
    -f "${INSTALL_DIR}/docker-compose.yml" \
    --env-file "${INSTALL_DIR}/.env" \
    "${profile_flags[@]}" \
    up -d

  info "Waiting for the web UI to come up (up to 120s)..."
  local max_wait=120 waited=0
  while ! curl -sf http://localhost:3001 &>/dev/null; do
    sleep 3
    waited=$((waited + 3))
    [ $waited -ge $max_wait ] && error "Timeout. Check: docker compose -f ${INSTALL_DIR}/docker-compose.yml logs"
  done
  echo ""
  success "Web UI is live → http://localhost:3001"
  echo ""
}

# ── Finalize: apply the chosen model, pull local models, final message ─────────
# Runs AFTER the stack (incl. web) is already up, so the model question never
# blocks the user from seeing the UI. If the picker chose a model different from
# the placeholder the .env was generated with, patch the .env and restart the
# services that read it (kex/fuse) so the choice takes effect.
finalize() {
  if [ -n "${CHAT_MODEL}" ]; then
    sed -i.bak \
      -e "s|^RELEX_MODEL=.*|RELEX_MODEL=${CHAT_MODEL}|" \
      -e "s|^AUTO_CLASSIFY_MODEL=.*|AUTO_CLASSIFY_MODEL=${CHAT_MODEL}|" \
      "${INSTALL_DIR}/.env" 2>/dev/null && rm -f "${INSTALL_DIR}/.env.bak"
    local profile_flags=()
    for p in "${PROFILES[@]}"; do profile_flags+=("--profile" "$p"); done
    docker compose -f "${INSTALL_DIR}/docker-compose.yml" \
      --env-file "${INSTALL_DIR}/.env" "${profile_flags[@]}" \
      up -d gctrl-kex gctrl-fuse >/dev/null 2>&1 || true
  fi

  echo ""
  echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
  success "GCTRL is running!"
  echo ""
  echo "  Open http://localhost:3001 to activate your license and complete setup"
  echo "  Installed : ${INSTALL_DIR}"
  echo ""
  if [ "${GPU_ENABLED}" = "true" ]; then
    echo "  GPU       : ${GPU_TYPE} acceleration enabled"
  else
    echo "  GPU       : CPU mode (no compatible GPU or toolkit found)"
  fi
  echo ""
  # Pull models into the bundled Ollama whenever we deployed it (a localhost-only
  # native Ollama still answers probe_tcp, so gate on the profile, not the probe).
  if printf '%s\n' "${PROFILES[@]}" | grep -qx "bundled-ollama"; then
    pull_ollama_models
  else
    echo "  Ollama    : connected at ${OLLAMA_BASE}"
  fi
  echo ""
  echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
}

# Pull the local models KEX requires into the bundled Ollama container. Idempotent
# (Ollama skips already-present models). Keeps the platform zero-cost-local.
pull_ollama_models() {
  echo "  Ollama    : bundled — pulling required local models (one-time)..."
  local models=("nomic-embed-text")
  [ -n "${CHAT_MODEL}" ] && models=("${CHAT_MODEL}" "${models[@]}")
  for model in "${models[@]}"; do
    echo "              pulling ${model} ..."
    docker exec gctrl-ollama ollama pull "${model}" \
      || echo "              WARN: failed to pull ${model} — pull manually: docker exec gctrl-ollama ollama pull ${model}"
  done
}

# ── Main ──────────────────────────────────────────────────────────────────────
main() {
  check_prereqs
  detect_gpu
  detect_services      # auto-decides Ollama (native if reachable, else bundled — never conflicts)
  generate_config      # .env written with a sensible default model placeholder
  start_stack          # bring the stack up FIRST → web UI live at :3001
  choose_model         # ask the model question only NOW, after the UI is up
  finalize             # apply the chosen model (restart kex/fuse) + pull local models
}

main "$@"