diff --git a/nostr/commit-signatures.jsonl b/nostr/commit-signatures.jsonl index ddef64f..45f1eea 100644 --- a/nostr/commit-signatures.jsonl +++ b/nostr/commit-signatures.jsonl @@ -87,3 +87,4 @@ {"kind":1640,"pubkey":"573634b648634cbad10f2451776089ea21090d9407f715e83c577b4611ae6edc","created_at":1772008707,"tags":[["author","Silberengel","silberengel7@protonmail.com"],["message","fix bugs\nsign sync commits"]],"content":"Signed commit: fix bugs\nsign sync commits","id":"3b05eb0074772bd7d3322e0a32ef8932dbafa2334ff51a75ed5159fcdfdb3558","sig":"b7bdc3272a6daddf409dc519de6e06a4ee85407a14790996be7c5039a00358106285a8fc00084d9016587df529d7026f28108384976198789aa1fd60140a5738"} {"kind":1640,"pubkey":"573634b648634cbad10f2451776089ea21090d9407f715e83c577b4611ae6edc","created_at":1772009058,"tags":[["author","Silberengel","silberengel7@protonmail.com"],["message","fix doc page redicrects"]],"content":"Signed commit: fix doc page redicrects","id":"be18739cf8e9062e7163dca11c6768086cbf834d52f9758c884a420e4d9dceb7","sig":"2f068184caa9d921f38d6b132992614f39bab6ec6ea8040ac0d337db4c16de4e66de44c4d78162787f1cf8bf13978d01927b8152405f0b48adf608ca6bf34295"} {"kind":1640,"pubkey":"573634b648634cbad10f2451776089ea21090d9407f715e83c577b4611ae6edc","created_at":1772009909,"tags":[["author","Silberengel","silberengel7@protonmail.com"],["message","fix cli sync and refine commit workflow"]],"content":"Signed commit: fix cli sync and refine commit workflow","id":"ddf0b49bb68139efbdacd6308b95b4a5329a37f479b319d609d712bee83e2d45","sig":"aacc22f02a3129d18cd2bdcfc4e2dda66e9358e552eac507cd4c4808bb47cd582298aed7d28f21b677418e1a91f3f1553c08f02671df8f1f43681cf7b19a744e"} +{"kind":1640,"pubkey":"573634b648634cbad10f2451776089ea21090d9407f715e83c577b4611ae6edc","created_at":1772010107,"tags":[["author","Silberengel","silberengel7@protonmail.com"],["message","fix build"]],"content":"Signed commit: fix build","id":"968af17f95f1ba0cf6a4d1f04ce108a6e4eb4ec3a4f72ca6a9d2529dacb92811","sig":"1891b6131effda70ec76577efadd9ea7374ebcbd4d738d0b0650e7dce46c3e7253eccb4b8455690297b63b7c30f61a0c7dcc1af0147b2f5a631bbd91c517c32b"} diff --git a/server-maintenance-commands.md b/server-maintenance-commands.md new file mode 100644 index 0000000..4ae42f5 --- /dev/null +++ b/server-maintenance-commands.md @@ -0,0 +1,204 @@ +# Server Maintenance Commands + +## 1. Investigate Zombie Processes (CRITICAL - 3300 zombies) + +```bash +# Find processes with zombie children +ps aux | awk '$8 ~ /^Z/ { print $2, $11 }' | head -20 + +# Find parent processes that are creating zombies +ps aux | awk '$8 ~ /^Z/ { print $3 }' | sort | uniq -c | sort -rn | head -10 + +# Check for specific problematic processes +ps auxf | grep -E 'Z|defunct' + +# Check systemd services that might be spawning zombies +systemctl status | grep -i failed +systemctl list-units --type=service --state=failed +``` + +## 2. Identify the Root Cause (Git Processes Detected) + +Based on initial investigation, zombies are `[git]` processes. Run these commands: + +```bash +# Check all git processes (including zombies) +ps aux | grep -E 'git|\[git\]' | head -30 + +# Find what's spawning git processes +ps auxf | grep -B 5 -A 5 git | head -50 + +# Check for web server processes that might spawn git +ps aux | grep -E 'node|nginx|apache|php-fpm|plesk' | head -20 + +# Check system logs for git-related errors +journalctl -p err -n 100 | grep -i git +journalctl -u nginx -n 50 +journalctl -u apache2 -n 50 + +# Check for processes with many children (potential zombie creators) +ps aux --sort=-%cpu | head -20 +ps aux --sort=-%mem | head -20 + +# Monitor zombie creation in real-time (run for 30 seconds) +watch -n 1 'ps aux | awk '\''$8 ~ /^Z/ { count++ } END { print "Zombies:", count+0 }'\''' + +# Check if it's a GitRepublic application issue +ps aux | grep -E 'node.*gitrepublic|gitrepublic.*node' +systemctl status | grep -i gitrepublic +``` + +## 3. Apply Security Updates + +```bash +# Update package lists +apt update + +# See what security updates are available +apt list --upgradable | grep -i security + +# Apply security updates +apt upgrade -y + +# Or apply all updates (after investigating zombies) +apt upgrade +``` + +## 4. System Health Check + +```bash +# Check disk space +df -h + +# Check memory usage +free -h + +# Check system load +uptime +top -bn1 | head -20 + +# Check for failed services +systemctl list-units --type=service --state=failed + +# Check system logs +journalctl -p err -n 50 +``` + +## 5. Plan System Restart + +```bash +# Check what requires restart +cat /var/run/reboot-required.pkgs 2>/dev/null || echo "No reboot required file found" + +# Schedule maintenance window and restart +# (Only after fixing zombie issue) +# reboot +``` + +## 6. Plesk-Specific Checks + +```bash +# Check Plesk services +plesk repair all -y + +# Check Plesk logs +tail -100 /var/log/plesk/panel.log + +# Check for Plesk-related zombie processes +ps aux | grep -i plesk | grep -i defunct +``` + +## Root Cause Identified ✅ + +**Problem**: Node.js GitRepublic process (PID 330225, `node build`) is spawning git processes that aren't being properly reaped, creating zombies. + +**Evidence**: +- All zombie processes are `[git] ` children of the Node.js process +- Active git process: `git remote set-head remote-0 -a` (from `git-remote-sync.ts`) +- Git spawns subprocesses like `git-remote-https` that can become zombies if not properly waited for + +**Code Fix**: Updated `src/lib/services/git/git-remote-sync.ts` to: +- Add timeout handling (30 minutes) +- Properly clean up processes on exit +- Handle signals correctly +- Prevent zombie processes + +## Immediate Server Fix + +**Option 1: Restart the GitRepublic service (RECOMMENDED)** +```bash +# Find the service/container +docker ps | grep gitrepublic +# or +systemctl list-units | grep -i gitrepublic + +# Restart it (this will clean up zombies temporarily) +docker restart +# or +systemctl restart +``` + +**Option 2: Kill and let it restart (if managed by systemd/docker)** +```bash +# Find the process +ps aux | grep "node build" | grep -v grep + +# Kill it (systemd/docker will restart it) +kill -TERM 330225 + +# Wait a moment, then check if it restarted +ps aux | grep "node build" | grep -v grep +``` + +**Option 3: Clean up zombies manually (temporary fix)** +```bash +# This won't fix the root cause but will clean up existing zombies +# The zombies will come back until the code is fixed +# Note: You can't kill zombies directly, but killing the parent will clean them up +``` + +## Recommended Action Plan + +1. **IMMEDIATE**: Restart GitRepublic service to clean up existing zombies +2. **URGENT**: Deploy the code fix (updated `git-remote-sync.ts`) +3. **HIGH PRIORITY**: Apply security updates (section 3) +4. **MONITOR**: Watch for zombie process count after restart +5. **MAINTENANCE WINDOW**: Schedule system restart after deploying fix + +## Common Causes of Zombie Processes + +- Process spawning children without proper signal handling +- Systemd service not properly configured +- Application bugs (especially Node.js, Python, or long-running processes) +- Resource exhaustion causing process management issues +- Plesk or web server processes not reaping children + +## Git-Specific Zombie Issues + +Since zombies are `[git]` processes, likely causes: +- **Git operations not being properly waited for** - parent process exits before git finishes +- **Git HTTP backend issues** - web server spawning git processes that aren't reaped +- **GitRepublic application** - Node.js app spawning git commands without proper signal handling +- **Plesk Git integration** - Plesk's git features not properly managing child processes +- **Git hooks** - hooks spawning processes that become zombies + +### Quick Fixes to Try + +```bash +# Restart web server (if using nginx/apache) +systemctl restart nginx +# or +systemctl restart apache2 + +# Restart GitRepublic application (if running as service) +systemctl restart gitrepublic-web +# or find and restart the Node.js process +ps aux | grep node | grep gitrepublic +# Then restart it + +# Check git-http-backend processes +ps aux | grep git-http-backend + +# Kill any stuck git processes (CAREFUL - only if safe) +# pkill -9 git # Only if you're sure no important operations are running +``` diff --git a/src/lib/services/git/git-remote-sync.ts b/src/lib/services/git/git-remote-sync.ts index 140bea5..244707e 100644 --- a/src/lib/services/git/git-remote-sync.ts +++ b/src/lib/services/git/git-remote-sync.ts @@ -26,11 +26,35 @@ function execGitWithEnv( // Security: Only use whitelisted env vars, don't spread process.env // The env parameter should already contain only safe, whitelisted variables env: env, - stdio: ['ignore', 'pipe', 'pipe'] + stdio: ['ignore', 'pipe', 'pipe'], + // Ensure detached process group to prevent zombie processes + detached: false }); let stdout = ''; let stderr = ''; + let resolved = false; + + // Set a timeout to prevent hanging processes (30 minutes for long operations) + const timeoutMs = 30 * 60 * 1000; + const timeoutId = setTimeout(() => { + if (!resolved && !gitProcess.killed) { + resolved = true; + // Kill the process tree to prevent zombies + try { + gitProcess.kill('SIGTERM'); + // Force kill after grace period + setTimeout(() => { + if (!gitProcess.killed) { + gitProcess.kill('SIGKILL'); + } + }, 5000); + } catch (err) { + // Process might already be dead + } + reject(new Error(`Git command timeout after ${timeoutMs}ms: ${args.join(' ')}`)); + } + }, timeoutMs); gitProcess.stdout.on('data', (chunk: Buffer) => { stdout += chunk.toString(); @@ -40,17 +64,50 @@ function execGitWithEnv( stderr += chunk.toString(); }); - gitProcess.on('close', (code) => { + gitProcess.on('close', (code, signal) => { + clearTimeout(timeoutId); + if (resolved) return; + resolved = true; + + // Ensure process is fully cleaned up + if (gitProcess.pid && !gitProcess.killed) { + try { + // Wait for any remaining child processes + process.kill(gitProcess.pid, 0); // Check if process exists + } catch { + // Process already dead, that's fine + } + } + if (code === 0) { resolve({ stdout, stderr }); } else { - reject(new Error(`Git command failed with code ${code}: ${stderr || stdout}`)); + const errorMsg = signal + ? `Git command terminated by signal ${signal}: ${stderr || stdout}` + : `Git command failed with code ${code}: ${stderr || stdout}`; + reject(new Error(errorMsg)); } }); gitProcess.on('error', (err) => { + clearTimeout(timeoutId); + if (resolved) return; + resolved = true; reject(err); }); + + // Handle process exit (backup to 'close' event) + gitProcess.on('exit', (code, signal) => { + // This is handled by 'close' event, but ensures we catch all cases + if (!resolved && code !== null && code !== 0) { + clearTimeout(timeoutId); + resolved = true; + const errorMsg = signal + ? `Git command terminated by signal ${signal}: ${stderr || stdout}` + : `Git command failed with code ${code}: ${stderr || stdout}`; + reject(new Error(errorMsg)); + } + }); }); }