From ee74c8f15810f4e7aab0eb2af77898bc5680ca7d Mon Sep 17 00:00:00 2001 From: "CanbiZ (MickLesk)" <47820557+MickLesk@users.noreply.github.com> Date: Mon, 2 Mar 2026 14:23:59 +0100 Subject: [PATCH] fix: improve error trace propagation for telemetry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - post_update_to_api: Attempts 2/3 now send medium_error (16KB truncated log) instead of short_error (generic description only). This is the primary fix — when attempt 1 fails (120KB payload too large/timeout), attempts 2/3 no longer discard all log data. - _send_abort_telemetry: Increased container fallback from 20 to 200 log lines (capped at 16KB). Added SILENT_LOGFILE as fallback source. Added exit code explanation header and error_category to payload. - get_error_text/get_full_log: Added SILENT_LOGFILE as last-resort fallback when INSTALL_LOG, combined log, and BUILD_LOG are all empty/missing. --- misc/api.func | 34 +++++++++++++++++++++++++++++----- misc/error_handler.func | 30 ++++++++++++++++++++++++++++-- 2 files changed, 57 insertions(+), 7 deletions(-) diff --git a/misc/api.func b/misc/api.func index c56150230..1888412c7 100644 --- a/misc/api.func +++ b/misc/api.func @@ -393,6 +393,11 @@ get_error_text() { logfile="$BUILD_LOG" fi + # Try SILENT_LOGFILE as last resort (captures $STD command output) + if [[ -z "$logfile" || ! -s "$logfile" ]] && [[ -n "${SILENT_LOGFILE:-}" && -s "${SILENT_LOGFILE}" ]]; then + logfile="$SILENT_LOGFILE" + fi + if [[ -n "$logfile" && -s "$logfile" ]]; then tail -n 20 "$logfile" 2>/dev/null | sed 's/\r$//' | sed 's/\x1b\[[0-9;]*[a-zA-Z]//g' fi @@ -438,6 +443,13 @@ get_full_log() { fi fi + # Fall back to SILENT_LOGFILE (captures $STD command output) + if [[ -z "$logfile" || ! -s "$logfile" ]]; then + if [[ -n "${SILENT_LOGFILE:-}" && -s "${SILENT_LOGFILE}" ]]; then + logfile="$SILENT_LOGFILE" + fi + fi + if [[ -n "$logfile" && -s "$logfile" ]]; then # Strip ANSI codes, carriage returns, and anonymize IP addresses (GDPR) sed 's/\r$//' "$logfile" 2>/dev/null | @@ -876,7 +888,7 @@ post_update_to_api() { esac # For failed/unknown status, resolve exit code and error description - local short_error="" + local short_error="" medium_error="" if [[ "$pb_status" == "failed" ]] || [[ "$pb_status" == "unknown" ]]; then if [[ "$raw_exit_code" =~ ^[0-9]+$ ]]; then exit_code="$raw_exit_code" @@ -896,6 +908,18 @@ post_update_to_api() { short_error=$(json_escape "$(explain_exit_code "$exit_code")") error_category=$(categorize_error "$exit_code") [[ -z "$error" ]] && error="Unknown error" + + # Build medium error for attempt 2: explanation + last 100 log lines (≤16KB) + # This is the critical middle ground between full 120KB log and generic-only description + local medium_log="" + medium_log=$(get_full_log 16384) || true # 16KB max + if [[ -z "$medium_log" ]]; then + medium_log=$(get_error_text) || true + fi + local medium_full + medium_full=$(build_error_string "$exit_code" "$medium_log") + medium_error=$(json_escape "$medium_full") + [[ -z "$medium_error" ]] && medium_error="$short_error" fi # Calculate duration if timer was started @@ -954,7 +978,7 @@ EOF return 0 fi - # ── Attempt 2: Short error text (no full log) ── + # ── Attempt 2: Medium error text (truncated log ≤16KB instead of full 120KB) ── sleep 1 local RETRY_PAYLOAD RETRY_PAYLOAD=$( @@ -974,7 +998,7 @@ EOF "pve_version": "${pve_version}", "method": "${METHOD:-default}", "exit_code": ${exit_code}, - "error": "${short_error}", + "error": "${medium_error}", "error_category": "${error_category}", "install_duration": ${duration}, "cpu_vendor": "${cpu_vendor}", @@ -997,7 +1021,7 @@ EOF return 0 fi - # ── Attempt 3: Minimal payload (bare minimum to set status) ── + # ── Attempt 3: Minimal payload with medium error (bare minimum to set status) ── sleep 2 local MINIMAL_PAYLOAD MINIMAL_PAYLOAD=$( @@ -1009,7 +1033,7 @@ EOF "nsapp": "${NSAPP:-unknown}", "status": "${pb_status}", "exit_code": ${exit_code}, - "error": "${short_error}", + "error": "${medium_error}", "error_category": "${error_category}", "install_duration": ${duration} } diff --git a/misc/error_handler.func b/misc/error_handler.func index 55d024d4e..39e5e667f 100644 --- a/misc/error_handler.func +++ b/misc/error_handler.func @@ -408,10 +408,29 @@ _send_abort_telemetry() { [[ "${DIAGNOSTICS:-no}" == "no" ]] && return 0 [[ -z "${RANDOM_UUID:-}" ]] && return 0 - # Collect last 20 log lines for error diagnosis (best-effort) + # Collect last 200 log lines for error diagnosis (best-effort) + # Container context has no get_full_log(), so we gather as much as possible local error_text="" + local logfile="" if [[ -n "${INSTALL_LOG:-}" && -s "${INSTALL_LOG}" ]]; then - error_text=$(tail -n 20 "$INSTALL_LOG" 2>/dev/null | sed 's/\x1b\[[0-9;]*[a-zA-Z]//g; s/\\/\\\\/g; s/"/\\"/g; s/\r//g' | tr '\n' '|' | sed 's/|$//' | tr -d '\000-\010\013\014\016-\037\177') || true + logfile="${INSTALL_LOG}" + elif [[ -n "${SILENT_LOGFILE:-}" && -s "${SILENT_LOGFILE}" ]]; then + logfile="${SILENT_LOGFILE}" + fi + + if [[ -n "$logfile" ]]; then + error_text=$(tail -n 200 "$logfile" 2>/dev/null | sed 's/\x1b\[[0-9;]*[a-zA-Z]//g; s/\\/\\\\/g; s/"/\\"/g; s/\r//g' | tr '\n' '|' | sed 's/|$//' | head -c 16384 | tr -d '\000-\010\013\014\016-\037\177') || true + fi + + # Prepend exit code explanation header (like build_error_string does on host) + local explanation="" + if declare -f explain_exit_code &>/dev/null; then + explanation=$(explain_exit_code "$exit_code" 2>/dev/null) || true + fi + if [[ -n "$explanation" && -n "$error_text" ]]; then + error_text="exit_code=${exit_code} | ${explanation}|---|${error_text}" + elif [[ -n "$explanation" && -z "$error_text" ]]; then + error_text="exit_code=${exit_code} | ${explanation}" fi # Calculate duration if start time is available @@ -420,10 +439,17 @@ _send_abort_telemetry() { duration=$(($(date +%s) - DIAGNOSTICS_START_TIME)) fi + # Categorize error if function is available (may not be in minimal container context) + local error_category="" + if declare -f categorize_error &>/dev/null; then + error_category=$(categorize_error "$exit_code" 2>/dev/null) || true + fi + # Build JSON payload with error context local payload payload="{\"random_id\":\"${RANDOM_UUID}\",\"execution_id\":\"${EXECUTION_ID:-${RANDOM_UUID}}\",\"type\":\"${TELEMETRY_TYPE:-lxc}\",\"nsapp\":\"${NSAPP:-${app:-unknown}}\",\"status\":\"failed\",\"exit_code\":${exit_code}" [[ -n "$error_text" ]] && payload="${payload},\"error\":\"${error_text}\"" + [[ -n "$error_category" ]] && payload="${payload},\"error_category\":\"${error_category}\"" [[ -n "$duration" ]] && payload="${payload},\"duration\":${duration}" payload="${payload}}"