Add timeouts and prioritize telemetry on exit

Prevent hangs when pulling logs from containers by wrapping pct pull calls with timeout (8s) and running ensure_log_on_host under timeout (10s). Always send telemetry (post_update_to_api) before attempting best-effort log collection so status is reported even if log retrieval blocks. Update EXIT/ERR/SIGHUP/SIGINT/SIGTERM traps and consolidate error/interrupt handlers to use the new timeouted log collection. Changes in misc/build.func and misc/error_handler.func.
This commit is contained in:
CanbiZ (MickLesk)
2026-02-18 13:14:59 +01:00
parent 845b89f975
commit 6cc8877852
2 changed files with 30 additions and 38 deletions

View File

@@ -339,24 +339,17 @@ on_exit() {
# post_to_api was called ("installing" sent) but post_update_to_api was never called
if [[ "${POST_TO_API_DONE:-}" == "true" && "${POST_UPDATE_DONE:-}" != "true" ]]; then
if declare -f post_update_to_api >/dev/null 2>&1; then
if [[ $exit_code -gt 128 ]]; then
# Signal exit: send telemetry IMMEDIATELY (container may be dying, pct pull could hang)
# ALWAYS send telemetry FIRST - ensure status is reported even if
# ensure_log_on_host hangs (e.g. pct pull on dead/unresponsive container)
if [[ $exit_code -ne 0 ]]; then
post_update_to_api "failed" "$exit_code" 2>/dev/null || true
# Then try log collection (non-critical, best-effort)
if declare -f ensure_log_on_host >/dev/null 2>&1; then
ensure_log_on_host 2>/dev/null || true
fi
else
# Normal exit: collect logs first for better error details
if declare -f ensure_log_on_host >/dev/null 2>&1; then
ensure_log_on_host 2>/dev/null || true
fi
if [[ $exit_code -ne 0 ]]; then
post_update_to_api "failed" "$exit_code"
else
# exit_code=0 is never an error — report as success
post_update_to_api "done" "0"
fi
# exit_code=0 is never an error — report as success
post_update_to_api "done" "0" 2>/dev/null || true
fi
# Best-effort log collection with timeout (non-critical after telemetry is sent)
if declare -f ensure_log_on_host >/dev/null 2>&1; then
timeout 10 bash -c 'ensure_log_on_host' 2>/dev/null || true
fi
fi
fi
@@ -380,9 +373,9 @@ on_interrupt() {
if declare -f post_update_to_api >/dev/null 2>&1; then
post_update_to_api "failed" "130" 2>/dev/null || true
fi
# Best-effort log collection (non-critical after telemetry is sent)
# Best-effort log collection with timeout (non-critical after telemetry is sent)
if declare -f ensure_log_on_host >/dev/null 2>&1; then
ensure_log_on_host 2>/dev/null || true
timeout 10 bash -c 'ensure_log_on_host' 2>/dev/null || true
fi
if declare -f msg_error >/dev/null 2>&1; then
msg_error "Interrupted by user (SIGINT)" 2>/dev/null || true
@@ -409,9 +402,9 @@ on_terminate() {
if declare -f post_update_to_api >/dev/null 2>&1; then
post_update_to_api "failed" "143" 2>/dev/null || true
fi
# Best-effort log collection (non-critical after telemetry is sent)
# Best-effort log collection with timeout (non-critical after telemetry is sent)
if declare -f ensure_log_on_host >/dev/null 2>&1; then
ensure_log_on_host 2>/dev/null || true
timeout 10 bash -c 'ensure_log_on_host' 2>/dev/null || true
fi
if declare -f msg_error >/dev/null 2>&1; then
msg_error "Terminated by signal (SIGTERM)" 2>/dev/null || true