Refactor GPU passthrough and detection logic
Some checks failed
Bump build.func Revision / bump-revision (push) Has been cancelled

Simplifies and improves GPU device detection for Intel, AMD, and NVIDIA, consolidates configuration functions, and enhances permission handling for both privileged and unprivileged containers. Removes redundant helper functions, adds user prompts for multiple GPU types, and improves driver installation and verification steps. Also refactors USB and additional device passthrough setup for clarity and maintainability.
This commit is contained in:
CanbiZ 2025-09-29 11:02:27 +02:00
parent 856440fcd4
commit 8538a6c107

View File

@ -2163,22 +2163,6 @@ build_container() {
"viseron" "viseron"
) )
# ------------------------------------------------------------------------------
# Helper Functions for GPU/USB Configuration
# ------------------------------------------------------------------------------
# Get device GID dynamically
get_device_gid() {
local group="$1"
gid=$(getent group "$group" | cut -d: -f3)
[[ -n "$gid" ]] && echo "$gid" && return
case "$group" in
video) echo 44 ;;
render) echo 104 ;;
*) echo 44 ;;
esac
}
# Check if app needs GPU # Check if app needs GPU
is_gpu_app() { is_gpu_app() {
local app="${1,,}" local app="${1,,}"
@ -2188,33 +2172,43 @@ build_container() {
return 1 return 1
} }
# Detect available GPU devices # Detect all available GPU devices
is_gpu_app() {
local app="${1,,}"
for gpu in "${GPU_APPS[@]}"; do
[[ "$gpu" == "$app" ]] && return 0
done
return 1
}
detect_gpu_devices() { detect_gpu_devices() {
VAAPI_DEVICES=() INTEL_DEVICES=()
AMD_DEVICES=()
NVIDIA_DEVICES=() NVIDIA_DEVICES=()
# Intel/AMD (VAAPI) # Check for Intel/AMD GPUs via DRI devices
if [[ -d /dev/dri ]]; then
# Intel GPU detection
if lspci | grep -iq "VGA.*Intel\|Display.*Intel"; then
for d in /dev/dri/renderD* /dev/dri/card*; do for d in /dev/dri/renderD* /dev/dri/card*; do
[ -e "$d" ] || continue [[ -e "$d" ]] && INTEL_DEVICES+=("$d")
VAAPI_DEVICES+=("$d")
done done
[[ ${#INTEL_DEVICES[@]} -gt 0 ]] && msg_info "Detected Intel GPU"
fi
# NVIDIA # AMD GPU detection
if lspci | grep -iq "VGA.*AMD\|Display.*AMD\|VGA.*ATI\|Display.*ATI"; then
for d in /dev/dri/renderD* /dev/dri/card*; do
[[ -e "$d" ]] && AMD_DEVICES+=("$d")
done
[[ ${#AMD_DEVICES[@]} -gt 0 ]] && msg_info "Detected AMD GPU"
fi
fi
# NVIDIA GPU detection
if lspci | grep -iq "VGA.*NVIDIA\|3D.*NVIDIA"; then
for d in /dev/nvidia*; do for d in /dev/nvidia*; do
[ -e "$d" ] || continue [[ -e "$d" ]] && NVIDIA_DEVICES+=("$d")
NVIDIA_DEVICES+=("$d")
done done
if [[ ${#NVIDIA_DEVICES[@]} -eq 0 ]]; then
msg_debug "Detected VAAPI devices: ${VAAPI_DEVICES[*]:-(none)}" msg_warn "NVIDIA GPU detected but no /dev/nvidia* devices found"
msg_debug "Detected NVIDIA devices: ${NVIDIA_DEVICES[*]:-(none)}" msg_warn "Please install NVIDIA drivers on host: apt install nvidia-driver"
else
msg_info "Detected NVIDIA GPU"
fi
fi
} }
# Configure USB passthrough for privileged containers # Configure USB passthrough for privileged containers
@ -2225,7 +2219,7 @@ build_container() {
msg_info "Configuring automatic USB passthrough (privileged container)" msg_info "Configuring automatic USB passthrough (privileged container)"
cat <<EOF >>"$LXC_CONFIG" cat <<EOF >>"$LXC_CONFIG"
# USB passthrough (privileged container) # Automatic USB passthrough (privileged container)
lxc.cgroup2.devices.allow: a lxc.cgroup2.devices.allow: a
lxc.cap.drop: lxc.cap.drop:
lxc.cgroup2.devices.allow: c 188:* rwm lxc.cgroup2.devices.allow: c 188:* rwm
@ -2238,66 +2232,116 @@ lxc.mount.entry: /dev/ttyACM1 dev/ttyACM1 none bind,optional,create=
EOF EOF
msg_ok "USB passthrough configured" msg_ok "USB passthrough configured"
} }
configure_vaapi_device() {
local dev="$1" idx="$2" # Configure GPU passthrough
configure_gpu_passthrough() {
# Skip if not a GPU app and not privileged
if [[ "$CT_TYPE" != "0" ]] && ! is_gpu_app "$APP"; then
return 0
fi
detect_gpu_devices
# Count available GPU types
local gpu_count=0
local available_gpus=()
[[ ${#INTEL_DEVICES[@]} -gt 0 ]] && { available_gpus+=("INTEL"); ((gpu_count++)); }
[[ ${#AMD_DEVICES[@]} -gt 0 ]] && { available_gpus+=("AMD"); ((gpu_count++)); }
[[ ${#NVIDIA_DEVICES[@]} -gt 0 ]] && { available_gpus+=("NVIDIA"); ((gpu_count++)); }
if [[ $gpu_count -eq 0 ]]; then
msg_info "No GPU devices found for passthrough"
return 0
fi
local selected_gpu=""
if [[ $gpu_count -eq 1 ]]; then
# Automatic selection for single GPU
selected_gpu="${available_gpus[0]}"
msg_info "Automatically configuring ${selected_gpu} GPU passthrough"
else
# Multiple GPUs - ask user
echo -e "\n${INFO} Multiple GPU types detected:"
for gpu in "${available_gpus[@]}"; do
echo " - $gpu"
done
read -rp "Which GPU type to passthrough? (${available_gpus[*]}): " selected_gpu
selected_gpu="${selected_gpu^^}"
# Validate selection
local valid=0
for gpu in "${available_gpus[@]}"; do
[[ "$selected_gpu" == "$gpu" ]] && valid=1
done
if [[ $valid -eq 0 ]]; then
msg_warn "Invalid selection. Skipping GPU passthrough."
return 0
fi
fi
# Apply passthrough configuration based on selection
local dev_idx=0
case "$selected_gpu" in
INTEL|AMD)
local devices=()
[[ "$selected_gpu" == "INTEL" ]] && devices=("${INTEL_DEVICES[@]}")
[[ "$selected_gpu" == "AMD" ]] && devices=("${AMD_DEVICES[@]}")
for dev in "${devices[@]}"; do
if [[ "$CT_TYPE" == "0" ]]; then if [[ "$CT_TYPE" == "0" ]]; then
major=$(stat -c '%t' "$dev"); minor=$(stat -c '%T' "$dev") # Privileged container
local major=$(stat -c '%t' "$dev")
local minor=$(stat -c '%T' "$dev")
echo "lxc.cgroup2.devices.allow: c $((0x$major)):$((0x$minor)) rwm" >>"$LXC_CONFIG" echo "lxc.cgroup2.devices.allow: c $((0x$major)):$((0x$minor)) rwm" >>"$LXC_CONFIG"
echo "lxc.mount.entry: $dev dev/$(basename "$dev") none bind,optional,create=file" >>"$LXC_CONFIG" echo "lxc.mount.entry: $dev dev/$(basename "$dev") none bind,optional,create=file" >>"$LXC_CONFIG"
else else
[[ "$dev" =~ renderD ]] && gid=$(get_device_gid "render") || gid=$(get_device_gid "video") # Unprivileged container - use generic GID, will be fixed after start
echo "dev$idx: $dev,gid=$gid" >>"$LXC_CONFIG" echo "dev${dev_idx}: $dev,uid=0,gid=44" >>"$LXC_CONFIG"
((dev_idx++))
fi
done
# Mount entire /dev/dri for privileged containers
if [[ "$CT_TYPE" == "0" ]]; then
echo "lxc.mount.entry: /dev/dri dev/dri none bind,optional,create=dir" >>"$LXC_CONFIG"
fi
export GPU_TYPE="$selected_gpu"
msg_ok "${selected_gpu} GPU passthrough configured"
;;
NVIDIA)
if [[ ${#NVIDIA_DEVICES[@]} -eq 0 ]]; then
msg_error "NVIDIA drivers not installed on host. Please install: apt install nvidia-driver"
return 1
fi fi
}
configure_nvidia_devices() {
for dev in "${NVIDIA_DEVICES[@]}"; do for dev in "${NVIDIA_DEVICES[@]}"; do
if [[ "$CT_TYPE" == "0" ]]; then if [[ "$CT_TYPE" == "0" ]]; then
major=$(stat -c '%t' "$dev"); minor=$(stat -c '%T' "$dev") local major=$(stat -c '%t' "$dev")
local minor=$(stat -c '%T' "$dev")
echo "lxc.cgroup2.devices.allow: c $((0x$major)):$((0x$minor)) rwm" >>"$LXC_CONFIG" echo "lxc.cgroup2.devices.allow: c $((0x$major)):$((0x$minor)) rwm" >>"$LXC_CONFIG"
echo "lxc.mount.entry: $dev dev/$(basename "$dev") none bind,optional,create=file" >>"$LXC_CONFIG" echo "lxc.mount.entry: $dev dev/$(basename "$dev") none bind,optional,create=file" >>"$LXC_CONFIG"
else else
msg_warn "NVIDIA passthrough on unprivileged CT may fail" msg_warn "NVIDIA passthrough on unprivileged container may not work properly"
fi fi
done done
[[ -d /dev/dri && "$CT_TYPE" == "0" ]] && echo "lxc.mount.entry: /dev/dri dev/dri none bind,optional,create=dir" >>"$LXC_CONFIG"
}
configure_gpu_passthrough() { if [[ "$CT_TYPE" == "0" && -d /dev/dri ]]; then
detect_gpu_devices echo "lxc.mount.entry: /dev/dri dev/dri none bind,optional,create=dir" >>"$LXC_CONFIG"
local should=false
if [[ "$CT_TYPE" == "0" ]] || is_gpu_app "$APP"; then should=true; fi
[[ "$should" == "false" ]] && return
if [[ ${#VAAPI_DEVICES[@]} -eq 0 && ${#NVIDIA_DEVICES[@]} -eq 0 ]]; then
msg_info "No GPU devices found"; return
fi fi
local choices=() selected=() export GPU_TYPE="NVIDIA"
[[ ${#VAAPI_DEVICES[@]} -gt 0 ]] && choices+=("VAAPI" "Intel/AMD GPU" "OFF") msg_ok "NVIDIA GPU passthrough configured"
[[ ${#NVIDIA_DEVICES[@]} -gt 0 ]] && choices+=("NVIDIA" "NVIDIA GPU" "OFF") ;;
if [[ ${#choices[@]} -eq 3 ]]; then
selected=("VAAPI")
elif [[ ${#choices[@]} -eq 6 ]]; then
read -rp "Multiple GPUs found (VAAPI/NVIDIA). Which passthrough? " sel
[[ "$sel" =~ VAAPI|vaapi ]] && selected=("VAAPI")
[[ "$sel" =~ NVIDIA|nvidia ]] && selected=("NVIDIA")
fi
local idx=0
for s in "${selected[@]}"; do
case "$s" in
VAAPI) for d in "${VAAPI_DEVICES[@]}"; do configure_vaapi_device "$d" "$idx"; idx=$((idx+1)); done; export ENABLE_VAAPI=1 ;;
NVIDIA) configure_nvidia_devices; export ENABLE_NVIDIA=1 ;;
esac esac
done
[[ ${#selected[@]} -gt 0 ]] && msg_ok "GPU passthrough configured"
} }
configure_usb_passthrough # Additional device passthrough
configure_gpu_passthrough configure_additional_devices() {
# TUN device passthrough # TUN device passthrough
if [ "$ENABLE_TUN" == "yes" ]; then if [ "$ENABLE_TUN" == "yes" ]; then
cat <<EOF >>"$LXC_CONFIG" cat <<EOF >>"$LXC_CONFIG"
@ -2306,11 +2350,17 @@ lxc.mount.entry: /dev/net/tun dev/net/tun none bind,create=file
EOF EOF
fi fi
# Coral TPU passthrough (if available) # Coral TPU passthrough
if [[ -e /dev/apex_0 ]]; then if [[ -e /dev/apex_0 ]]; then
msg_info "Detected Coral TPU - configuring passthrough" msg_info "Detected Coral TPU - configuring passthrough"
echo "lxc.mount.entry: /dev/apex_0 dev/apex_0 none bind,optional,create=file" >>"$LXC_CONFIG" echo "lxc.mount.entry: /dev/apex_0 dev/apex_0 none bind,optional,create=file" >>"$LXC_CONFIG"
fi fi
}
# Execute pre-start configurations
configure_usb_passthrough
configure_gpu_passthrough
configure_additional_devices
# ============================================================================ # ============================================================================
# START CONTAINER AND INSTALL USERLAND # START CONTAINER AND INSTALL USERLAND
@ -2364,31 +2414,88 @@ EOF
msg_warn "Network reachable but gateway check failed" msg_warn "Network reachable but gateway check failed"
fi fi
fi fi
# Function to get correct GID inside container
install_gpu_userland() { get_container_gid() {
local gpu="$1" local group="$1"
if [[ "$var_os" == "alpine" ]]; then local gid=$(pct exec "$CTID" -- getent group "$group" 2>/dev/null | cut -d: -f3)
case "$gpu" in echo "${gid:-44}" # Default to 44 if not found
VAAPI) pct exec "$CTID" -- apk add mesa-dri-gallium mesa-va-gallium intel-media-driver libva-utils ;;
NVIDIA) msg_warn "NVIDIA drivers not in Alpine repos" ;;
esac
else
case "$gpu" in
VAAPI) pct exec "$CTID" -- bash -c "apt-get update && apt-get install -y intel-media-va-driver-non-free mesa-va-drivers vainfo" ;;
NVIDIA) pct exec "$CTID" -- bash -c "apt-get update && apt-get install -y nvidia-driver nvidia-utils libnvidia-encode1" ;;
esac
fi
} }
if [[ "${ENABLE_VAAPI:-0}" == "1" ]]; then # Install GPU drivers and fix permissions
install_gpu_userland "VAAPI" if [[ -n "${GPU_TYPE:-}" ]]; then
pct exec "$CTID" -- bash -c "chgrp video /dev/dri && chmod 755 /dev/dri && chmod 660 /dev/dri/*" msg_info "Installing GPU userland drivers for ${GPU_TYPE}"
pct exec "$CTID" -- vainfo >/dev/null 2>&1 && msg_ok "VAAPI verified" || msg_warn "VAAPI failed"
case "$GPU_TYPE" in
INTEL|AMD)
if [[ "$var_os" == "alpine" ]]; then
pct exec "$CTID" -- apk add mesa-dri-gallium mesa-va-gallium intel-media-driver libva-utils 2>/dev/null || true
else
pct exec "$CTID" -- bash -c "apt-get update && apt-get install -y vainfo intel-media-va-driver-non-free mesa-va-drivers" 2>/dev/null || true
fi fi
if [[ "${ENABLE_NVIDIA:-0}" == "1" ]]; then
install_gpu_userland "NVIDIA" # Fix permissions with correct GID
pct exec "$CTID" -- nvidia-smi >/dev/null 2>&1 && msg_ok "NVIDIA verified" || msg_warn "NVIDIA failed" local video_gid=$(get_container_gid "video")
local render_gid=$(get_container_gid "render")
msg_info "Setting GPU permissions (video:${video_gid}, render:${render_gid})"
# Fix device permissions inside container
if [[ "$CT_TYPE" == "0" ]]; then
pct exec "$CTID" -- bash -c "
if [ -d /dev/dri ]; then
chgrp ${video_gid} /dev/dri 2>/dev/null || true
chmod 755 /dev/dri
for dev in /dev/dri/*; do
if [[ \"\$dev\" =~ renderD ]]; then
chgrp ${render_gid} \"\$dev\" 2>/dev/null || true
else
chgrp ${video_gid} \"\$dev\" 2>/dev/null || true
fi fi
chmod 660 \"\$dev\"
done
fi
"
else
# For unprivileged containers, update the LXC config with correct GIDs
msg_info "Updating unprivileged container device GIDs"
# Stop container to update config
pct stop "$CTID"
# Update device entries with correct GIDs
sed -i "s/dev\([0-9]\+\):.*renderD.*/dev\1: \/dev\/dri\/renderD*, gid=${render_gid}/" "$LXC_CONFIG"
sed -i "s/dev\([0-9]\+\):.*card.*/dev\1: \/dev\/dri\/card*, gid=${video_gid}/" "$LXC_CONFIG"
# Restart container
pct start "$CTID"
sleep 5
fi
# Verify GPU access
if pct exec "$CTID" -- vainfo >/dev/null 2>&1; then
msg_ok "${GPU_TYPE} GPU verified working"
else
msg_warn "${GPU_TYPE} GPU verification failed - may need additional configuration"
fi
;;
NVIDIA)
if [[ "$var_os" != "alpine" ]]; then
pct exec "$CTID" -- bash -c "apt-get update && apt-get install -y nvidia-driver nvidia-utils libnvidia-encode1" 2>/dev/null || true
else
msg_warn "NVIDIA drivers not available in Alpine repos"
fi
if pct exec "$CTID" -- nvidia-smi >/dev/null 2>&1; then
msg_ok "NVIDIA GPU verified working"
else
msg_warn "NVIDIA GPU verification failed"
fi
;;
esac
fi
# Continue with standard container setup
msg_info "Customizing LXC Container" msg_info "Customizing LXC Container"
# # Install GPU userland if configured # # Install GPU userland if configured