package main
import (
"bytes"
"context"
"crypto/tls"
"fmt"
"log"
"net/smtp"
"strings"
"sync"
"time"
)
// AlertConfig holds SMTP alert configuration
type AlertConfig struct {
Enabled bool
SMTPHost string
SMTPPort int
SMTPUser string
SMTPPassword string
SMTPFrom string
SMTPTo []string
UseTLS bool
FailureThreshold float64 // Alert when failure rate exceeds this (e.g., 20.0 = 20%)
CheckInterval time.Duration // How often to check
Cooldown time.Duration // Minimum time between alerts
// Weekly Report settings
WeeklyReportEnabled bool // Enable weekly summary reports
WeeklyReportDay time.Weekday // Day to send report (0=Sunday, 1=Monday, etc.)
WeeklyReportHour int // Hour to send report (0-23)
}
// WeeklyReportData contains aggregated weekly statistics
type WeeklyReportData struct {
CalendarWeek int
Year int
StartDate time.Time
EndDate time.Time
TotalInstalls int
SuccessCount int
FailedCount int
SuccessRate float64
TopApps []AppStat
TopFailedApps []AppStat
ComparedToPrev WeekComparison
OsDistribution map[string]int
TypeDistribution map[string]int
}
// AppStat represents statistics for a single app
type AppStat struct {
Name string
Total int
Failed int
FailureRate float64
}
// WeekComparison shows changes compared to previous week
type WeekComparison struct {
InstallsChange int // Difference in total installs
InstallsPercent float64 // Percentage change
FailRateChange float64 // Change in failure rate (percentage points)
}
// Alerter handles alerting functionality
type Alerter struct {
cfg AlertConfig
lastAlertAt time.Time
lastWeeklyReport time.Time
mu sync.Mutex
pb *PBClient
lastStats alertStats
alertHistory []AlertEvent
}
type alertStats struct {
successCount int
failedCount int
checkedAt time.Time
}
// AlertEvent records an alert that was sent
type AlertEvent struct {
Timestamp time.Time `json:"timestamp"`
Type string `json:"type"`
Message string `json:"message"`
FailureRate float64 `json:"failure_rate,omitempty"`
}
// NewAlerter creates a new alerter instance
func NewAlerter(cfg AlertConfig, pb *PBClient) *Alerter {
return &Alerter{
cfg: cfg,
pb: pb,
alertHistory: make([]AlertEvent, 0),
}
}
// Start begins the alert monitoring loop
func (a *Alerter) Start() {
if !a.cfg.Enabled {
log.Println("INFO: alerting disabled")
return
}
if a.cfg.SMTPHost == "" || len(a.cfg.SMTPTo) == 0 {
log.Println("WARN: alerting enabled but SMTP not configured")
return
}
go a.monitorLoop()
log.Printf("INFO: alert monitoring started (threshold: %.1f%%, interval: %v)", a.cfg.FailureThreshold, a.cfg.CheckInterval)
// Start weekly report scheduler if enabled
if a.cfg.WeeklyReportEnabled {
go a.weeklyReportLoop()
log.Printf("INFO: weekly report scheduler started (day: %s, hour: %02d:00)", a.cfg.WeeklyReportDay, a.cfg.WeeklyReportHour)
}
}
func (a *Alerter) monitorLoop() {
ticker := time.NewTicker(a.cfg.CheckInterval)
defer ticker.Stop()
for range ticker.C {
a.checkAndAlert()
}
}
func (a *Alerter) checkAndAlert() {
ctx, cancel := newTimeoutContext(10 * time.Second)
defer cancel()
// Fetch last hour's data
data, err := a.pb.FetchDashboardData(ctx, 1, "ProxmoxVE")
if err != nil {
log.Printf("WARN: alert check failed: %v", err)
return
}
// Calculate current failure rate
total := data.SuccessCount + data.FailedCount
if total < 10 {
// Not enough data to determine rate
return
}
failureRate := float64(data.FailedCount) / float64(total) * 100
// Check if we should alert
if failureRate >= a.cfg.FailureThreshold {
a.maybeSendAlert(failureRate, data.FailedCount, total)
}
}
func (a *Alerter) maybeSendAlert(rate float64, failed, total int) {
a.mu.Lock()
defer a.mu.Unlock()
// Check cooldown
if time.Since(a.lastAlertAt) < a.cfg.Cooldown {
return
}
// Send alert
subject := fmt.Sprintf("[ProxmoxVED Alert] High Failure Rate: %.1f%%", rate)
body := fmt.Sprintf(`ProxmoxVE Helper Scripts - Telemetry Alert
⚠️ High installation failure rate detected!
Current Statistics (last 24h):
- Failure Rate: %.1f%%
- Failed Installations: %d
- Total Installations: %d
- Threshold: %.1f%%
Time: %s
Please check the dashboard for more details.
---
This is an automated alert from the telemetry service.
`, rate, failed, total, a.cfg.FailureThreshold, time.Now().Format(time.RFC1123))
if err := a.sendEmail(subject, body); err != nil {
log.Printf("ERROR: failed to send alert email: %v", err)
return
}
a.lastAlertAt = time.Now()
a.alertHistory = append(a.alertHistory, AlertEvent{
Timestamp: time.Now(),
Type: "high_failure_rate",
Message: fmt.Sprintf("Failure rate %.1f%% exceeded threshold %.1f%%", rate, a.cfg.FailureThreshold),
FailureRate: rate,
})
// Keep only last 100 alerts
if len(a.alertHistory) > 100 {
a.alertHistory = a.alertHistory[len(a.alertHistory)-100:]
}
log.Printf("ALERT: sent high failure rate alert (%.1f%%)", rate)
}
func (a *Alerter) sendEmail(subject, body string) error {
return a.sendEmailWithType(subject, body, "text/plain")
}
func (a *Alerter) sendHTMLEmail(subject, body string) error {
return a.sendEmailWithType(subject, body, "text/html")
}
func (a *Alerter) sendEmailWithType(subject, body, contentType string) error {
// Build message
var msg bytes.Buffer
msg.WriteString(fmt.Sprintf("From: %s\r\n", a.cfg.SMTPFrom))
msg.WriteString(fmt.Sprintf("To: %s\r\n", strings.Join(a.cfg.SMTPTo, ", ")))
msg.WriteString(fmt.Sprintf("Subject: %s\r\n", subject))
msg.WriteString("MIME-Version: 1.0\r\n")
msg.WriteString(fmt.Sprintf("Content-Type: %s; charset=UTF-8\r\n", contentType))
msg.WriteString("\r\n")
msg.WriteString(body)
addr := fmt.Sprintf("%s:%d", a.cfg.SMTPHost, a.cfg.SMTPPort)
var auth smtp.Auth
if a.cfg.SMTPUser != "" && a.cfg.SMTPPassword != "" {
auth = smtp.PlainAuth("", a.cfg.SMTPUser, a.cfg.SMTPPassword, a.cfg.SMTPHost)
}
if a.cfg.UseTLS {
// TLS connection
tlsConfig := &tls.Config{
ServerName: a.cfg.SMTPHost,
}
conn, err := tls.Dial("tcp", addr, tlsConfig)
if err != nil {
return fmt.Errorf("TLS dial failed: %w", err)
}
defer conn.Close()
client, err := smtp.NewClient(conn, a.cfg.SMTPHost)
if err != nil {
return fmt.Errorf("SMTP client failed: %w", err)
}
defer client.Close()
if auth != nil {
if err := client.Auth(auth); err != nil {
return fmt.Errorf("SMTP auth failed: %w", err)
}
}
if err := client.Mail(a.cfg.SMTPFrom); err != nil {
return fmt.Errorf("SMTP MAIL failed: %w", err)
}
for _, to := range a.cfg.SMTPTo {
if err := client.Rcpt(to); err != nil {
return fmt.Errorf("SMTP RCPT failed: %w", err)
}
}
w, err := client.Data()
if err != nil {
return fmt.Errorf("SMTP DATA failed: %w", err)
}
_, err = w.Write(msg.Bytes())
if err != nil {
return fmt.Errorf("SMTP write failed: %w", err)
}
return w.Close()
}
// Non-TLS (STARTTLS)
return smtp.SendMail(addr, auth, a.cfg.SMTPFrom, a.cfg.SMTPTo, msg.Bytes())
}
// GetAlertHistory returns recent alert events
func (a *Alerter) GetAlertHistory() []AlertEvent {
a.mu.Lock()
defer a.mu.Unlock()
result := make([]AlertEvent, len(a.alertHistory))
copy(result, a.alertHistory)
return result
}
// TestAlert sends a test alert email
func (a *Alerter) TestAlert() error {
if !a.cfg.Enabled || a.cfg.SMTPHost == "" {
return fmt.Errorf("alerting not configured")
}
subject := "[ProxmoxVED] Test Alert"
body := fmt.Sprintf(`This is a test alert from ProxmoxVE Helper Scripts telemetry service.
If you received this email, your alert configuration is working correctly.
Time: %s
SMTP Host: %s
Recipients: %s
---
This is an automated test message.
`, time.Now().Format(time.RFC1123), a.cfg.SMTPHost, strings.Join(a.cfg.SMTPTo, ", "))
return a.sendEmail(subject, body)
}
// Helper for timeout context
func newTimeoutContext(d time.Duration) (context.Context, context.CancelFunc) {
return context.WithTimeout(context.Background(), d)
}
// weeklyReportLoop checks periodically if it's time to send the weekly report
func (a *Alerter) weeklyReportLoop() {
// Check every hour
ticker := time.NewTicker(1 * time.Hour)
defer ticker.Stop()
for range ticker.C {
a.checkAndSendWeeklyReport()
}
}
// checkAndSendWeeklyReport sends the weekly report if it's the right time
func (a *Alerter) checkAndSendWeeklyReport() {
now := time.Now()
// Check if it's the right day and hour
if now.Weekday() != a.cfg.WeeklyReportDay || now.Hour() != a.cfg.WeeklyReportHour {
return
}
a.mu.Lock()
// Check if we already sent a report this week
_, lastWeek := a.lastWeeklyReport.ISOWeek()
_, currentWeek := now.ISOWeek()
if a.lastWeeklyReport.Year() == now.Year() && lastWeek == currentWeek {
a.mu.Unlock()
return
}
a.mu.Unlock()
// Send the weekly report
if err := a.SendWeeklyReport(); err != nil {
log.Printf("ERROR: failed to send weekly report: %v", err)
}
}
// SendWeeklyReport generates and sends the weekly summary email
func (a *Alerter) SendWeeklyReport() error {
if !a.cfg.Enabled || a.cfg.SMTPHost == "" {
return fmt.Errorf("alerting not configured")
}
ctx, cancel := newTimeoutContext(30 * time.Second)
defer cancel()
// Get data for the past week
reportData, err := a.fetchWeeklyReportData(ctx)
if err != nil {
return fmt.Errorf("failed to fetch weekly data: %w", err)
}
// Generate email content
subject := fmt.Sprintf("[ProxmoxVED] Weekly Report - Week %d, %d", reportData.CalendarWeek, reportData.Year)
body := a.generateWeeklyReportHTML(reportData)
if err := a.sendHTMLEmail(subject, body); err != nil {
return fmt.Errorf("failed to send email: %w", err)
}
a.mu.Lock()
a.lastWeeklyReport = time.Now()
a.alertHistory = append(a.alertHistory, AlertEvent{
Timestamp: time.Now(),
Type: "weekly_report",
Message: fmt.Sprintf("Weekly report KW %d/%d sent", reportData.CalendarWeek, reportData.Year),
})
a.mu.Unlock()
log.Printf("INFO: weekly report KW %d/%d sent successfully", reportData.CalendarWeek, reportData.Year)
return nil
}
// fetchWeeklyReportData collects data for the weekly report
func (a *Alerter) fetchWeeklyReportData(ctx context.Context) (*WeeklyReportData, error) {
// Calculate the previous week's date range (Mon-Sun)
now := time.Now()
// Find last Monday
daysToLastMonday := int(now.Weekday() - time.Monday)
if daysToLastMonday < 0 {
daysToLastMonday += 7
}
// Go back to the Monday of LAST week
lastMonday := now.AddDate(0, 0, -daysToLastMonday-7)
lastMonday = time.Date(lastMonday.Year(), lastMonday.Month(), lastMonday.Day(), 0, 0, 0, 0, lastMonday.Location())
lastSunday := lastMonday.AddDate(0, 0, 6)
lastSunday = time.Date(lastSunday.Year(), lastSunday.Month(), lastSunday.Day(), 23, 59, 59, 0, lastSunday.Location())
// Get calendar week
year, week := lastMonday.ISOWeek()
// Fetch current week's data (7 days)
currentData, err := a.pb.FetchDashboardData(ctx, 7, "ProxmoxVE")
if err != nil {
return nil, fmt.Errorf("failed to fetch current week data: %w", err)
}
// Fetch previous week's data for comparison (14 days, we'll compare)
prevData, err := a.pb.FetchDashboardData(ctx, 14, "ProxmoxVE")
if err != nil {
// Non-fatal, just log
log.Printf("WARN: could not fetch previous week data: %v", err)
prevData = nil
}
// Build report data
report := &WeeklyReportData{
CalendarWeek: week,
Year: year,
StartDate: lastMonday,
EndDate: lastSunday,
TotalInstalls: currentData.TotalInstalls,
SuccessCount: currentData.SuccessCount,
FailedCount: currentData.FailedCount,
OsDistribution: make(map[string]int),
TypeDistribution: make(map[string]int),
}
// Calculate success rate
if report.TotalInstalls > 0 {
report.SuccessRate = float64(report.SuccessCount) / float64(report.TotalInstalls) * 100
}
// Top 5 installed apps
for i, app := range currentData.TopApps {
if i >= 5 {
break
}
report.TopApps = append(report.TopApps, AppStat{
Name: app.App,
Total: app.Count,
})
}
// Top 5 failed apps
for i, app := range currentData.FailedApps {
if i >= 5 {
break
}
report.TopFailedApps = append(report.TopFailedApps, AppStat{
Name: app.App,
Total: app.TotalCount,
Failed: app.FailedCount,
FailureRate: app.FailureRate,
})
}
// OS distribution
for _, os := range currentData.OsDistribution {
report.OsDistribution[os.Os] = os.Count
}
// Type distribution (LXC vs VM)
for _, t := range currentData.TypeStats {
report.TypeDistribution[t.Type] = t.Count
}
// Calculate comparison to previous week
if prevData != nil {
// Previous week stats (subtract current from 14-day total)
prevInstalls := prevData.TotalInstalls - currentData.TotalInstalls
prevFailed := prevData.FailedCount - currentData.FailedCount
prevSuccess := prevData.SuccessCount - currentData.SuccessCount
if prevInstalls > 0 {
prevFailRate := float64(prevFailed) / float64(prevInstalls) * 100
currentFailRate := 100 - report.SuccessRate
report.ComparedToPrev.InstallsChange = report.TotalInstalls - prevInstalls
if prevInstalls > 0 {
report.ComparedToPrev.InstallsPercent = float64(report.TotalInstalls-prevInstalls) / float64(prevInstalls) * 100
}
report.ComparedToPrev.FailRateChange = currentFailRate - prevFailRate
_ = prevSuccess // suppress unused warning
}
}
return report, nil
}
// generateWeeklyReportHTML creates the HTML email body for the weekly report
func (a *Alerter) generateWeeklyReportHTML(data *WeeklyReportData) string {
var b strings.Builder
// HTML Email Template
b.WriteString(`
📊 Weekly Telemetry Report
ProxmoxVE Helper Scripts
|
Calendar Week
Week `)
b.WriteString(fmt.Sprintf("%d, %d", data.CalendarWeek, data.Year))
b.WriteString(`
|
Period
`)
b.WriteString(fmt.Sprintf("%s – %s", data.StartDate.Format("Jan 02"), data.EndDate.Format("Jan 02, 2006")))
b.WriteString(`
|
|
`)
b.WriteString(fmt.Sprintf("%d", data.TotalInstalls))
b.WriteString(`
Total
|
`)
b.WriteString(fmt.Sprintf("%d", data.SuccessCount))
b.WriteString(`
Successful
|
`)
b.WriteString(fmt.Sprintf("%d", data.FailedCount))
b.WriteString(`
Failed
|
`)
b.WriteString(fmt.Sprintf("%.1f%%", data.SuccessRate))
b.WriteString(`
Success Rate
|
|
`)
// Week comparison
if data.ComparedToPrev.InstallsChange != 0 || data.ComparedToPrev.FailRateChange != 0 {
installIcon := "📈"
installColor := "#16a34a"
if data.ComparedToPrev.InstallsChange < 0 {
installIcon = "📉"
installColor = "#dc2626"
}
failIcon := "✅"
failColor := "#16a34a"
if data.ComparedToPrev.FailRateChange > 0 {
failIcon = "⚠️"
failColor = "#dc2626"
}
b.WriteString(`
vs. Previous Week
`)
b.WriteString(installIcon)
b.WriteString(fmt.Sprintf(" %+d installations (%.1f%%)", data.ComparedToPrev.InstallsChange, data.ComparedToPrev.InstallsPercent))
b.WriteString(`
|
Failure Rate Change
`)
b.WriteString(failIcon)
b.WriteString(fmt.Sprintf(" %+.1f percentage points", data.ComparedToPrev.FailRateChange))
b.WriteString(`
|
|
`)
}
// Top 5 Installed Scripts
b.WriteString(`
🏆 Top 5 Installed Scripts
`)
if len(data.TopApps) > 0 {
for i, app := range data.TopApps {
bgColor := "#ffffff"
if i%2 == 0 {
bgColor = "#f8fafc"
}
b.WriteString(fmt.Sprintf(`
|
%d
%s
|
%d installs |
`, bgColor, i+1, app.Name, app.Total))
}
} else {
b.WriteString(`| No data available | `)
}
b.WriteString(`
|
`)
// Top 5 Failed Scripts
b.WriteString(`
⚠️ Top 5 Scripts with Highest Failure Rates
`)
if len(data.TopFailedApps) > 0 {
for i, app := range data.TopFailedApps {
bgColor := "#ffffff"
if i%2 == 0 {
bgColor = "#fef2f2"
}
rateColor := "#dc2626"
if app.FailureRate < 20 {
rateColor = "#ea580c"
}
if app.FailureRate < 10 {
rateColor = "#ca8a04"
}
b.WriteString(fmt.Sprintf(`
|
%s
|
%d / %d failed |
%.1f%%
|
`, bgColor, app.Name, app.Failed, app.Total, rateColor, app.FailureRate))
}
} else {
b.WriteString(`| 🎉 No failures this week! | `)
}
b.WriteString(`
|
`)
// Type Distribution
if len(data.TypeDistribution) > 0 {
b.WriteString(`
📦 Distribution by Type
`)
for t, count := range data.TypeDistribution {
percent := float64(count) / float64(data.TotalInstalls) * 100
b.WriteString(fmt.Sprintf(`|
| `, count, strings.ToUpper(t), percent))
}
b.WriteString(`
|
`)
}
// OS Distribution
if len(data.OsDistribution) > 0 {
b.WriteString(`
🐧 Top Operating Systems
`)
// Sort OS by count
type osEntry struct {
name string
count int
}
var osList []osEntry
for name, count := range data.OsDistribution {
osList = append(osList, osEntry{name, count})
}
for i := 0; i < len(osList); i++ {
for j := i + 1; j < len(osList); j++ {
if osList[j].count > osList[i].count {
osList[i], osList[j] = osList[j], osList[i]
}
}
}
for i, os := range osList {
if i >= 5 {
break
}
percent := float64(os.count) / float64(data.TotalInstalls) * 100
barWidth := int(percent * 2) // Scale for visual
if barWidth > 100 {
barWidth = 100
}
b.WriteString(fmt.Sprintf(`
| %s |
|
%d (%.1f%%) |
`, os.name, barWidth, os.count, percent))
}
b.WriteString(`
|
`)
}
// Footer
b.WriteString(`
|
Generated `)
b.WriteString(time.Now().Format("Jan 02, 2006 at 15:04 MST"))
b.WriteString(`
ProxmoxVE Helper Scripts —
This is an automated report from the telemetry service.
|
|
`)
return b.String()
}
// generateWeeklyReportEmail creates the plain text email body (kept for compatibility)
func (a *Alerter) generateWeeklyReportEmail(data *WeeklyReportData) string {
var b strings.Builder
b.WriteString("ProxmoxVE Helper Scripts - Weekly Telemetry Report\n")
b.WriteString("==================================================\n\n")
b.WriteString(fmt.Sprintf("Calendar Week: %d, %d\n", data.CalendarWeek, data.Year))
b.WriteString(fmt.Sprintf("Period: %s - %s\n\n",
data.StartDate.Format("Jan 02, 2006"),
data.EndDate.Format("Jan 02, 2006")))
b.WriteString("OVERVIEW\n")
b.WriteString("--------\n")
b.WriteString(fmt.Sprintf("Total Installations: %d\n", data.TotalInstalls))
b.WriteString(fmt.Sprintf("Successful: %d\n", data.SuccessCount))
b.WriteString(fmt.Sprintf("Failed: %d\n", data.FailedCount))
b.WriteString(fmt.Sprintf("Success Rate: %.1f%%\n\n", data.SuccessRate))
if data.ComparedToPrev.InstallsChange != 0 || data.ComparedToPrev.FailRateChange != 0 {
b.WriteString("vs. Previous Week:\n")
b.WriteString(fmt.Sprintf(" Installations: %+d (%.1f%%)\n", data.ComparedToPrev.InstallsChange, data.ComparedToPrev.InstallsPercent))
b.WriteString(fmt.Sprintf(" Failure Rate: %+.1f pp\n\n", data.ComparedToPrev.FailRateChange))
}
b.WriteString("TOP 5 INSTALLED SCRIPTS\n")
b.WriteString("-----------------------\n")
for i, app := range data.TopApps {
if i >= 5 {
break
}
b.WriteString(fmt.Sprintf("%d. %-25s %5d installs\n", i+1, app.Name, app.Total))
}
b.WriteString("\n")
b.WriteString("TOP 5 FAILED SCRIPTS\n")
b.WriteString("--------------------\n")
if len(data.TopFailedApps) > 0 {
for i, app := range data.TopFailedApps {
if i >= 5 {
break
}
b.WriteString(fmt.Sprintf("%d. %-20s %3d/%3d failed (%.1f%%)\n",
i+1, app.Name, app.Failed, app.Total, app.FailureRate))
}
} else {
b.WriteString("No failures this week!\n")
}
b.WriteString("\n")
b.WriteString("---\n")
b.WriteString(fmt.Sprintf("Generated: %s\n", time.Now().Format("Jan 02, 2006 15:04 MST")))
b.WriteString("This is an automated report from the telemetry service.\n")
return b.String()
}
// TestWeeklyReport sends a test weekly report email
func (a *Alerter) TestWeeklyReport() error {
return a.SendWeeklyReport()
}