From 7db8ddda5251a1c87130df802f58b87d9a9978db Mon Sep 17 00:00:00 2001
From: "CanbiZ (MickLesk)" <47820557+MickLesk@users.noreply.github.com>
Date: Wed, 25 Feb 2026 13:56:55 +0100
Subject: [PATCH] fix(test): initialize colors and remove illegal local in test
 harness

- Call load_functions() after sourcing core.func to initialize
  color/formatting/icon variables (RD, GN, YW, CL, TAB, etc.)
- Remove 'local' keyword from top-level scope (not inside function)
- Default REPO_SOURCE to ref_api instead of main
---
 tools/test-recovery-dialog.sh | 413 ++++++++++++++++++++++++++++++++++
 1 file changed, 413 insertions(+)
 create mode 100644 tools/test-recovery-dialog.sh
diff --git a/tools/test-recovery-dialog.sh b/tools/test-recovery-dialog.sh
new file mode 100644
index 000000000..3c1517a09
--- /dev/null
+++ b/tools/test-recovery-dialog.sh
@@ -0,0 +1,413 @@
+#!/usr/bin/env bash
+# ==============================================================================
+# test-recovery-dialog.sh — Test harness for the installation recovery dialog
+#
+# This script simulates a failed LXC installation on a real Proxmox host.
+# It sources the actual func files and triggers the failure path in
+# build_container() so you can verify the recovery dialog appears correctly.
+#
+# Usage:
+#   1. Copy this file to your Proxmox host:
+#        scp tools/test-recovery-dialog.sh root@proxmox:/tmp/
+#
+#   2. Run it directly (creates a minimal container, installs nothing, forces failure):
+#        bash /tmp/test-recovery-dialog.sh
+#
+#   3. Or run with a real app to test (will actually fail during install):
+#        TEST_REAL_APP=zammad bash /tmp/test-recovery-dialog.sh
+#
+# What it tests:
+#   - msg_error output after failure
+#   - Log collection (pct pull, combined log, tee capture)
+#   - Telemetry reporting (post_update_to_api)
+#   - Error type detection (APT, OOM, network, etc.)
+#   - Recovery menu display and option handling
+#   - SIGTSTP trap (the [2]+ Stopped bug)
+#
+# Environment variables:
+#   TEST_REAL_APP=<appname>   Use a real install script (e.g., zammad)
+#   TEST_EXIT_CODE=<N>        Simulate a specific exit code (default: 1)
+#   TEST_ERROR_TYPE=<type>    Simulate error type: apt, oom, network, cmd (default: generic)
+#   TEST_SKIP_CONTAINER=1     Skip container creation, test dialog rendering only
+#   TEST_VERBOSE=1            Enable verbose mode
+#   DIAGNOSTICS=yes           Enable telemetry (default: no for testing)
+# ==============================================================================
+
+set -Eeuo pipefail
+
+# ── Safety check ──
+if [[ ! -f /etc/pve/local/pve-ssl.pem ]] && [[ "${TEST_SKIP_CONTAINER:-0}" != "1" ]]; then
+  echo "ERROR: This script must be run on a Proxmox VE host."
+  echo "       Use TEST_SKIP_CONTAINER=1 to test dialog rendering without Proxmox."
+  exit 1
+fi
+
+# ── Configuration ──
+TEST_EXIT_CODE="${TEST_EXIT_CODE:-1}"
+TEST_ERROR_TYPE="${TEST_ERROR_TYPE:-generic}"
+TEST_REAL_APP="${TEST_REAL_APP:-}"
+DIAGNOSTICS="${DIAGNOSTICS:-no}"
+
+echo "=============================================="
+echo "  Recovery Dialog Test Harness"
+echo "=============================================="
+echo "  Exit code:    ${TEST_EXIT_CODE}"
+echo "  Error type:   ${TEST_ERROR_TYPE}"
+echo "  Real app:     ${TEST_REAL_APP:-none (mock)}"
+echo "  Skip CT:      ${TEST_SKIP_CONTAINER:-0}"
+echo "  Diagnostics:  ${DIAGNOSTICS}"
+echo "=============================================="
+echo ""
+
+# ── Source the real func files ──
+# Uses the same source chain as the real scripts
+REPO_SOURCE="${REPO_SOURCE:-https://raw.githubusercontent.com/community-scripts/ProxmoxVE/ref_api}"
+
+echo "Sourcing func files from: ${REPO_SOURCE}"
+
+# Source in the correct order (same as build.func does)
+source <(curl -fsSL "${REPO_SOURCE}/misc/api.func") 2>/dev/null || {
+  echo "WARNING: Could not source api.func from ${REPO_SOURCE}"
+  echo "         Defining stub functions..."
+  post_update_to_api() { echo "[STUB] post_update_to_api $*"; }
+  explain_exit_code() { echo "Test error (code $1)"; }
+  categorize_error() { echo "test"; }
+  json_escape() { printf '%s' "${1:-}"; }
+  get_full_log() { echo ""; }
+  build_error_string() { echo "exit_code=$1 | test error"; }
+}
+
+source <(curl -fsSL "${REPO_SOURCE}/misc/core.func") 2>/dev/null || {
+  echo "WARNING: Could not source core.func"
+  # Minimal stubs
+  TAB=$'\t'
+  RD=$'\033[01;31m'
+  GN=$'\033[1;92m'
+  YW=$'\033[33m'
+  BL=$'\033[36m'
+  CL=$'\033[m'
+  CM="✔"
+  CROSS="✖"
+  INFO="💡"
+  HOLD="⏳"
+  BFR="\r\033[2K"
+  DGN=$'\033[33m'
+  msg_info() { echo -e "${TAB}⏳ $1"; }
+  msg_ok() { echo -e "${TAB}✔ $1"; }
+  msg_error() { echo -e "${TAB}✖ $1" >&2; }
+  msg_warn() { echo -e "${TAB}⚠ $1"; }
+  msg_custom() { echo -e "${TAB}$1 $3"; }
+  stop_spinner() { :; }
+}
+
+source <(curl -fsSL "${REPO_SOURCE}/misc/error_handler.func") 2>/dev/null || {
+  echo "WARNING: Could not source error_handler.func"
+  error_handler() { echo "[STUB] error_handler $*"; exit "${1:-1}"; }
+  catch_errors() { :; }
+}
+
+# Initialize colors, formatting, icons (must be called after sourcing core.func)
+if declare -f load_functions >/dev/null 2>&1; then
+  load_functions
+fi
+
+# Initialize traps
+if declare -f catch_errors >/dev/null 2>&1; then
+  catch_errors
+fi
+
+echo ""
+echo "✔ Func files loaded"
+echo ""
+
+# ── Setup test environment ──
+export SESSION_ID="test-$(date +%s)"
+export RANDOM_UUID="test-uuid-$(date +%s)"
+export EXECUTION_ID="test-exec-$(date +%s)"
+export NSAPP="${TEST_REAL_APP:-testapp}"
+export APP="${NSAPP}"
+export var_install="${NSAPP}"
+export var_os="debian"
+export var_version="12"
+export CT_TYPE=1
+export DISK_SIZE=4
+export CORE_COUNT=1
+export RAM_SIZE=1024
+export METHOD="default"
+export NET="dhcp"
+export BRG="vmbr0"
+export TELEMETRY_TYPE="lxc"
+export VERBOSE="${TEST_VERBOSE:-no}"
+export var_verbose="${VERBOSE}"
+
+# ── Mock or create container ──
+if [[ "${TEST_SKIP_CONTAINER:-0}" == "1" ]]; then
+  echo "Skipping container creation (TEST_SKIP_CONTAINER=1)"
+  echo "Testing dialog rendering only..."
+  echo ""
+
+  export CTID=99999
+  export BUILD_LOG="/tmp/test-build-${SESSION_ID}.log"
+  echo "Test build log entry" > "$BUILD_LOG"
+
+  # Create a fake combined log with error content based on TEST_ERROR_TYPE
+  combined_log="/tmp/${NSAPP}-${CTID}-${SESSION_ID}.log"
+  {
+    echo "================================================================================"
+    echo "COMBINED INSTALLATION LOG - ${APP}"
+    echo "Container ID: ${CTID}"
+    echo "Session ID: ${SESSION_ID}"
+    echo "Timestamp: $(date '+%Y-%m-%d %H:%M:%S')"
+    echo "================================================================================"
+    echo ""
+    echo "================================================================================"
+    echo "PHASE 1: CONTAINER CREATION (Host)"
+    echo "================================================================================"
+    echo "Test build log entry"
+    echo ""
+    echo "================================================================================"
+    echo "PHASE 2: APPLICATION INSTALLATION (Container)"
+    echo "================================================================================"
+
+    case "$TEST_ERROR_TYPE" in
+    apt)
+      echo "Reading package lists..."
+      echo "Building dependency tree..."
+      echo "E: Unable to locate package foobar-nonexistent"
+      echo "E: Package 'foobar-nonexistent' has no installation candidate"
+      echo "dpkg: error processing package foobar (--configure):"
+      echo " dependency problems - leaving unconfigured"
+      echo "E: Sub-process /usr/bin/dpkg returned an error code (1)"
+      ;;
+    oom)
+      echo "Starting application..."
+      echo "FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - JavaScript heap out of memory"
+      echo "Cannot allocate memory"
+      echo "Killed process 12345 (node) total-vm:4194304kB"
+      ;;
+    network)
+      echo "Fetching https://registry.npmjs.org/..."
+      echo "curl: (7) Failed to connect to registry.npmjs.org port 443"
+      echo "Could not resolve host: github.com"
+      echo "Temporary failure resolving 'deb.debian.org'"
+      ;;
+    cmd)
+      echo "Setting up application..."
+      echo "foobar-cmd: command not found"
+      echo "/usr/local/bin/missing-tool: No such file or directory"
+      ;;
+    *)
+      echo "Starting installation..."
+      echo "Setting up database..."
+      echo "Configuring application..."
+      echo "systemctl restart -q elasticsearch"
+      echo "Job for elasticsearch.service failed because the control process exited with error code."
+      echo "See \"systemctl status elasticsearch.service\" for details."
+      ;;
+    esac
+  } > "$combined_log"
+
+  export INSTALL_LOG="$combined_log"
+
+  # ── Now simulate the failure path directly ──
+  install_exit_code="${TEST_EXIT_CODE}"
+
+  # Override exit codes for specific error types
+  case "$TEST_ERROR_TYPE" in
+  apt) [[ "$TEST_EXIT_CODE" == "1" ]] && install_exit_code=100 ;;
+  oom) [[ "$TEST_EXIT_CODE" == "1" ]] && install_exit_code=137 ;;
+  network) [[ "$TEST_EXIT_CODE" == "1" ]] && install_exit_code=7 ;;
+  cmd) [[ "$TEST_EXIT_CODE" == "1" ]] && install_exit_code=127 ;;
+  esac
+
+  echo "Simulating failure with exit code: ${install_exit_code}"
+  echo ""
+
+  # ── Run the actual failure path code ──
+  # This is the same code from build_container() starting at "Installation failed?"
+
+  # Prevent SIGTSTP (the fix we're testing)
+  trap '' TSTP
+
+  msg_error "Installation failed in container ${CTID} (exit code: ${install_exit_code})"
+
+  # Report failure to telemetry API
+  echo -e "${TAB}⏳ Reporting failure to telemetry..." >&2
+  post_update_to_api "failed" "$install_exit_code" 2>/dev/null || true
+  echo -e "${TAB}${CM:-✔} Failure reported" >&2
+
+  # Disable error handling (matches real code)
+  set +Eeuo pipefail
+  trap - ERR
+
+  # Show combined log location
+  msg_custom "📋" "${YW}" "Installation log: ${combined_log}"
+
+  # Error type detection (same as build_container)
+  is_oom=false
+  is_network_issue=false
+  is_apt_issue=false
+  is_cmd_not_found=false
+  error_explanation=""
+  if declare -f explain_exit_code >/dev/null 2>&1; then
+    error_explanation="$(explain_exit_code "$install_exit_code")"
+  fi
+
+  if [[ $install_exit_code -eq 134 || $install_exit_code -eq 137 || $install_exit_code -eq 243 ]]; then
+    is_oom=true
+  fi
+
+  case "$install_exit_code" in
+  100 | 101 | 102) is_apt_issue=true ;;
+  255)
+    if [[ -f "$combined_log" ]] && grep -qiE 'dpkg|apt-get|broken packages|unmet dependencies' "$combined_log"; then
+      is_apt_issue=true
+    fi
+    ;;
+  esac
+
+  if [[ $install_exit_code -eq 127 ]]; then
+    is_cmd_not_found=true
+  fi
+
+  case "$install_exit_code" in
+  6 | 7 | 22 | 28 | 35 | 52 | 56 | 57 | 75 | 78) is_network_issue=true ;;
+  esac
+
+  if [[ $install_exit_code -eq 1 && -f "$combined_log" ]]; then
+    if grep -qiE 'E: Unable to|dpkg.*error|broken packages' "$combined_log"; then
+      is_apt_issue=true
+    fi
+    if grep -qiE 'Cannot allocate memory|Out of memory|oom-killer|JavaScript heap' "$combined_log"; then
+      is_oom=true
+    fi
+    if grep -qiE 'Could not resolve|DNS|Connection refused|Temporary failure resolving' "$combined_log"; then
+      is_network_issue=true
+    fi
+    if grep -qiE ': command not found|No such file or directory.*/s?bin/' "$combined_log"; then
+      is_cmd_not_found=true
+    fi
+  fi
+
+  # Show error explanation
+  if [[ -n "$error_explanation" ]]; then
+    echo -e "${TAB}${RD}Error: ${error_explanation}${CL}"
+    echo ""
+  fi
+
+  # Show hints
+  if [[ "$is_cmd_not_found" == true ]]; then
+    missing_cmd=""
+    if [[ -f "$combined_log" ]]; then
+      missing_cmd=$(grep -oiE '[a-zA-Z0-9_.-]+: command not found' "$combined_log" 2>/dev/null | tail -1 | sed 's/: command not found//') || true
+    fi
+    if [[ -n "$missing_cmd" ]]; then
+      echo -e "${TAB}${INFO} Missing command: ${GN}${missing_cmd}${CL}"
+    fi
+    echo ""
+  fi
+
+  # Build recovery menu
+  echo -e "${YW}What would you like to do?${CL}"
+  echo ""
+  echo -e "  ${GN}1)${CL} Remove container and exit"
+  echo -e "  ${GN}2)${CL} Keep container for debugging"
+  echo -e "  ${GN}3)${CL} Retry with verbose mode (full rebuild)"
+
+  next_option=4
+  APT_OPTION="" OOM_OPTION="" DNS_OPTION=""
+  RAM_SIZE=${RAM_SIZE:-1024}
+
+  if [[ "$is_apt_issue" == true ]]; then
+    echo -e "  ${GN}${next_option})${CL} Repair APT/DPKG state and re-run install (in-place)"
+    APT_OPTION=$next_option
+    next_option=$((next_option + 1))
+  fi
+
+  if [[ "$is_oom" == true ]]; then
+    new_ram=$((RAM_SIZE * 2))
+    new_cpu=$((CORE_COUNT * 2))
+    echo -e "  ${GN}${next_option})${CL} Retry with more resources (RAM: ${RAM_SIZE}→${new_ram} MiB, CPU: ${CORE_COUNT}→${new_cpu} cores)"
+    OOM_OPTION=$next_option
+    next_option=$((next_option + 1))
+  fi
+
+  if [[ "$is_network_issue" == true ]]; then
+    echo -e "  ${GN}${next_option})${CL} Retry with DNS override in LXC (8.8.8.8 / 1.1.1.1)"
+    DNS_OPTION=$next_option
+    next_option=$((next_option + 1))
+  fi
+
+  max_option=$((next_option - 1))
+  echo ""
+  echo -en "${YW}Select option [1-${max_option}] (default: 1, auto-remove in 60s): ${CL}"
+
+  if read -t 60 -r response; then
+    echo ""
+    echo "✔ You selected: '${response:-1}'"
+    echo ""
+    case "${response:-1}" in
+    1) echo "[TEST] Would remove container ${CTID}" ;;
+    2) echo "[TEST] Would keep container ${CTID} for debugging" ;;
+    3) echo "[TEST] Would retry with verbose mode" ;;
+    *)
+      if [[ -n "${APT_OPTION}" && "${response}" == "${APT_OPTION}" ]]; then
+        echo "[TEST] Would repair APT/DPKG state and re-run"
+      elif [[ -n "${OOM_OPTION}" && "${response}" == "${OOM_OPTION}" ]]; then
+        echo "[TEST] Would retry with doubled resources"
+      elif [[ -n "${DNS_OPTION}" && "${response}" == "${DNS_OPTION}" ]]; then
+        echo "[TEST] Would retry with DNS override"
+      else
+        echo "[TEST] Invalid option: ${response}"
+      fi
+      ;;
+    esac
+  else
+    echo ""
+    echo "[TEST] Timeout - would auto-remove container ${CTID}"
+  fi
+
+  # Finalize
+  echo -e "${TAB}⏳ Finalizing telemetry report..." >&2
+  post_update_to_api "failed" "$install_exit_code" "force" 2>/dev/null || true
+  echo -e "${TAB}${CM:-✔} Telemetry finalized" >&2
+
+  trap - TSTP
+
+  echo ""
+  echo "=============================================="
+  echo "  Test completed successfully!"
+  echo "  The recovery dialog appeared as expected."
+  echo "=============================================="
+
+  # Cleanup
+  rm -f "$BUILD_LOG" "$combined_log" 2>/dev/null
+
+  exit 0
+fi
+
+# ── Full test with real container ──
+echo "Creating test container..."
+echo ""
+
+# Use the real build.func flow
+source <(curl -fsSL "${REPO_SOURCE}/misc/build.func") 2>/dev/null || {
+  echo "ERROR: Could not source build.func"
+  exit 1
+}
+
+# The rest of the test with a real container would use the standard
+# script flow. For now, suggest using TEST_SKIP_CONTAINER=1 for
+# dialog testing, or TEST_REAL_APP=<app> for full integration tests.
+echo "For full integration testing with a real container, run one of:"
+echo ""
+echo "  # Test with a known-failing app:"
+echo "  TEST_REAL_APP=zammad bash /tmp/test-recovery-dialog.sh"
+echo ""
+echo "  # Or test dialog rendering without a container:"
+echo "  TEST_SKIP_CONTAINER=1 bash /tmp/test-recovery-dialog.sh"
+echo "  TEST_SKIP_CONTAINER=1 TEST_ERROR_TYPE=apt bash /tmp/test-recovery-dialog.sh"
+echo "  TEST_SKIP_CONTAINER=1 TEST_ERROR_TYPE=oom bash /tmp/test-recovery-dialog.sh"
+echo "  TEST_SKIP_CONTAINER=1 TEST_ERROR_TYPE=network bash /tmp/test-recovery-dialog.sh"
+echo "  TEST_SKIP_CONTAINER=1 TEST_ERROR_TYPE=cmd bash /tmp/test-recovery-dialog.sh"
+echo ""