feat(recovery): add ENOSPC disk-full detection with auto-retry using doubled disk size

- Detect ENOSPC errors via exit code 228/23 and log patterns (ENOSPC, no space left on device, Disk quota exceeded, errno -28)
- Add is_disk_full detection in exit-1 subclassification block for generic failures caused by disk space
- Show informational hint with current disk size when ENOSPC is detected
- Add recovery menu option to rebuild container with doubled disk size (up to 2 retries)
- Follows same pattern as existing OOM recovery (DISK_RECOVERY_ATTEMPT counter, DISK_OPTION menu entry)
This commit is contained in:
CanbiZ (MickLesk)
2026-03-03 14:50:02 +01:00
parent aca721e9ee
commit d6efc5fb61

View File

@@ -4222,6 +4222,7 @@ EOF'
local is_network_issue=false local is_network_issue=false
local is_apt_issue=false local is_apt_issue=false
local is_cmd_not_found=false local is_cmd_not_found=false
local is_disk_full=false
local error_explanation="" local error_explanation=""
if declare -f explain_exit_code >/dev/null 2>&1; then if declare -f explain_exit_code >/dev/null 2>&1; then
error_explanation="$(explain_exit_code "$install_exit_code")" error_explanation="$(explain_exit_code "$install_exit_code")"
@@ -4242,6 +4243,14 @@ EOF'
;; ;;
esac esac
# Disk full / ENOSPC detection: errno -28 (ENOSPC), exit 228 (custom handler), exit 23 (curl write error)
if [[ $install_exit_code -eq 228 || $install_exit_code -eq 23 ]]; then
is_disk_full=true
fi
if [[ -f "$combined_log" ]] && grep -qiE 'ENOSPC|no space left on device|No space left on device|Disk quota exceeded|errno -28' "$combined_log"; then
is_disk_full=true
fi
# Command not found detection # Command not found detection
if [[ $install_exit_code -eq 127 ]]; then if [[ $install_exit_code -eq 127 ]]; then
is_cmd_not_found=true is_cmd_not_found=true
@@ -4278,6 +4287,9 @@ EOF'
if grep -qiE ': command not found|No such file or directory.*/s?bin/' "$combined_log"; then if grep -qiE ': command not found|No such file or directory.*/s?bin/' "$combined_log"; then
is_cmd_not_found=true is_cmd_not_found=true
fi fi
if grep -qiE 'ENOSPC|no space left on device|Disk quota exceeded|errno -28' "$combined_log"; then
is_disk_full=true
fi
fi fi
# Show error explanation if available # Show error explanation if available
@@ -4299,6 +4311,12 @@ EOF'
echo "" echo ""
fi fi
if [[ "$is_disk_full" == true ]]; then
echo -e "${TAB}${INFO} The container ran out of disk space during installation (${GN}ENOSPC${CL})."
echo -e "${TAB}${INFO} Current disk size: ${GN}${DISK_SIZE} GB${CL}. A rebuild with doubled disk may resolve this."
echo ""
fi
if [[ "$is_cmd_not_found" == true ]]; then if [[ "$is_cmd_not_found" == true ]]; then
local missing_cmd="" local missing_cmd=""
if [[ -f "$combined_log" ]]; then if [[ -f "$combined_log" ]]; then
@@ -4318,7 +4336,7 @@ EOF'
echo -e " ${GN}3)${CL} Retry with verbose mode (full rebuild)" echo -e " ${GN}3)${CL} Retry with verbose mode (full rebuild)"
local next_option=4 local next_option=4
local APT_OPTION="" OOM_OPTION="" DNS_OPTION="" local APT_OPTION="" OOM_OPTION="" DNS_OPTION="" DISK_OPTION=""
if [[ "$is_apt_issue" == true ]]; then if [[ "$is_apt_issue" == true ]]; then
if [[ "$var_os" == "alpine" ]]; then if [[ "$var_os" == "alpine" ]]; then
@@ -4343,6 +4361,18 @@ EOF'
fi fi
fi fi
if [[ "$is_disk_full" == true ]]; then
local disk_recovery_attempt="${DISK_RECOVERY_ATTEMPT:-0}"
if [[ $disk_recovery_attempt -lt 2 ]]; then
local new_disk=$((DISK_SIZE * 2))
echo -e " ${GN}${next_option})${CL} Retry with more disk space (Disk: ${DISK_SIZE}${new_disk} GB)"
DISK_OPTION=$next_option
next_option=$((next_option + 1))
else
echo -e " ${DGN}-)${CL} ${DGN}Disk resize retry exhausted (already retried ${disk_recovery_attempt}x)${CL}"
fi
fi
if [[ "$is_network_issue" == true ]]; then if [[ "$is_network_issue" == true ]]; then
echo -e " ${GN}${next_option})${CL} Retry with DNS override in LXC (8.8.8.8 / 1.1.1.1)" echo -e " ${GN}${next_option})${CL} Retry with DNS override in LXC (8.8.8.8 / 1.1.1.1)"
DNS_OPTION=$next_option DNS_OPTION=$next_option
@@ -4503,6 +4533,35 @@ EOF'
return $? return $?
fi fi
if [[ -n "${DISK_OPTION}" && "${response}" == "${DISK_OPTION}" ]]; then
# Retry with doubled disk size
handled=true
echo -e "\n${TAB}${HOLD}${YW}Removing container ${CTID} for rebuild with more disk space...${CL}"
pct stop "$CTID" &>/dev/null || true
pct destroy "$CTID" &>/dev/null || true
echo -e "${BFR}${CM}${GN}Container ${CTID} removed${CL}"
echo ""
local old_ctid="$CTID"
local old_disk="$DISK_SIZE"
export CTID=$(get_valid_container_id "$CTID")
export DISK_SIZE=$((DISK_SIZE * 2))
export var_disk="$DISK_SIZE"
export VERBOSE="yes"
export var_verbose="yes"
export DISK_RECOVERY_ATTEMPT=$((${DISK_RECOVERY_ATTEMPT:-0} + 1))
echo -e "${YW}Rebuilding with increased disk space (attempt ${DISK_RECOVERY_ATTEMPT}/2):${CL}"
echo -e " Container ID: ${old_ctid}${CTID}"
echo -e " Disk: ${old_disk}${GN}${DISK_SIZE}${CL} GB (x2)"
echo -e " RAM: ${RAM_SIZE} MiB | CPU: ${CORE_COUNT} cores"
echo -e " Network: ${NET:-dhcp} | Bridge: ${BRG:-vmbr0}"
echo -e " Verbose: ${GN}enabled${CL}"
echo ""
msg_info "Restarting installation..."
build_container
return $?
fi
if [[ -n "${DNS_OPTION}" && "${response}" == "${DNS_OPTION}" ]]; then if [[ -n "${DNS_OPTION}" && "${response}" == "${DNS_OPTION}" ]]; then
# Retry with DNS override in LXC # Retry with DNS override in LXC
handled=true handled=true