From a8c9850b19f1408ee42273f62261d2d267663025 Mon Sep 17 00:00:00 2001 From: Silberengel Date: Sat, 28 Feb 2026 08:41:30 +0100 Subject: [PATCH] polling update Nostr-Signature: 42c1a2a63a4568c65d82d78701451b3b4363bdf9c8c57e804535b5f3f0d7b6fc 573634b648634cbad10f2451776089ea21090d9407f715e83c577b4611ae6edc 8e5f32ecb79da876ac41eba04c3b1541b21d039ae50d1b9fefa630d35f31c97dd29af64e4b695742fa7d4eaec17db8f4a066b4db99ce628aed596971975d4a87 --- nostr/commit-signatures.jsonl | 1 + server-maintenance-commands.md | 219 ------------------------- src/hooks.server.ts | 4 + src/lib/services/nostr/repo-polling.ts | 39 ++++- src/lib/services/service-registry.ts | 19 ++- src/lib/utils/repo-poll-trigger.ts | 30 ++++ src/routes/api/repos/poll/+server.ts | 33 ++++ src/routes/api/user/level/+server.ts | 7 + src/routes/repos/+page.svelte | 29 +++- 9 files changed, 156 insertions(+), 225 deletions(-) delete mode 100644 server-maintenance-commands.md create mode 100644 src/lib/utils/repo-poll-trigger.ts create mode 100644 src/routes/api/repos/poll/+server.ts diff --git a/nostr/commit-signatures.jsonl b/nostr/commit-signatures.jsonl index cf4b32b..3699f16 100644 --- a/nostr/commit-signatures.jsonl +++ b/nostr/commit-signatures.jsonl @@ -114,3 +114,4 @@ {"kind":1640,"pubkey":"573634b648634cbad10f2451776089ea21090d9407f715e83c577b4611ae6edc","created_at":1772223624,"tags":[["author","Silberengel","silberengel7@protonmail.com"],["message","bug-fixes"]],"content":"Signed commit: bug-fixes","id":"99cb543f1e821f1b7df4bbde2b3da3ab3a09cda7a1e9a537fe1b8df79b19e8e8","sig":"762a7ea92457ce81cc5aae9bc644fb9d80f90c7500035fbb506f2f76a5942333b828cc8a59f7656b0e714b15a59158be0a671f51476be2e8eabe9731ced74bcb"} {"kind":1640,"pubkey":"573634b648634cbad10f2451776089ea21090d9407f715e83c577b4611ae6edc","created_at":1772226191,"tags":[["author","Silberengel","silberengel7@protonmail.com"],["message","bug-fixes"]],"content":"Signed commit: bug-fixes","id":"20be97351d2b05fa7ad9e161b2619e9babaaffc6a8090057c1a3ac50a0f08d6a","sig":"a174c7dd39f613dd88260ef5c111b943df381b0acae20d048596e11ef1a6b0e3c1bfb9a8858af3df0f8858c4c79d1e2d03ad248a0608ac5d5cded6a81e99af77"} {"kind":1640,"pubkey":"573634b648634cbad10f2451776089ea21090d9407f715e83c577b4611ae6edc","created_at":1772227102,"tags":[["author","Silberengel","silberengel7@protonmail.com"],["message","bug-fix"]],"content":"Signed commit: bug-fix","id":"0f366a0cc7c003f74e375f40e7c322781746d12829943df1287bf67f36e1330a","sig":"167177ccfeb053cd645e50e7d00450b847ecd65c305165777bcfbe39fd3f48ccc86b57fdd183d2a4b138d94d27d11e4f1c121d702b295d94b9aee0a8dc81a744"} +{"kind":1640,"pubkey":"573634b648634cbad10f2451776089ea21090d9407f715e83c577b4611ae6edc","created_at":1772261455,"tags":[["author","Silberengel","silberengel7@protonmail.com"],["message","fix zombie spawning on polling\nmake announcement commits non-blocking on repo provision"]],"content":"Signed commit: fix zombie spawning on polling\nmake announcement commits non-blocking on repo provision","id":"b0da119e7477b46f5d82be831693a92e117f25379476488f19351e2bac8f88b8","sig":"b8ca18e8215a9f5b3fc877ce113936c582353d44f8d03cdccd9f9ee70fb3e6fdd64db7cc6a3ca15339fb21b9ca87ea8471a38b587721a594a189d97cc2964ad9"} diff --git a/server-maintenance-commands.md b/server-maintenance-commands.md deleted file mode 100644 index 8206b25..0000000 --- a/server-maintenance-commands.md +++ /dev/null @@ -1,219 +0,0 @@ -# Server Maintenance Commands - -## 1. Investigate Zombie Processes (CRITICAL - 3300 zombies) - -```bash -# Find processes with zombie children -ps aux | awk '$8 ~ /^Z/ { print $2, $11 }' | head -20 - -# Find parent processes that are creating zombies -ps aux | awk '$8 ~ /^Z/ { print $3 }' | sort | uniq -c | sort -rn | head -10 - -# Check for specific problematic processes -ps auxf | grep -E 'Z|defunct' - -# Check systemd services that might be spawning zombies -systemctl status | grep -i failed -systemctl list-units --type=service --state=failed -``` - -## 2. Identify the Root Cause (Git Processes Detected) - -Based on initial investigation, zombies are `[git]` processes. Run these commands: - -```bash -# Check all git processes (including zombies) -ps aux | grep -E 'git|\[git\]' | head -30 - -# Find what's spawning git processes -ps auxf | grep -B 5 -A 5 git | head -50 - -# Check for web server processes that might spawn git -ps aux | grep -E 'node|nginx|apache|php-fpm|plesk' | head -20 - -# Check system logs for git-related errors -journalctl -p err -n 100 | grep -i git -journalctl -u nginx -n 50 -journalctl -u apache2 -n 50 - -# Check for processes with many children (potential zombie creators) -ps aux --sort=-%cpu | head -20 -ps aux --sort=-%mem | head -20 - -# Monitor zombie creation in real-time (run for 30 seconds) -watch -n 1 'ps aux | awk '\''$8 ~ /^Z/ { count++ } END { print "Zombies:", count+0 }'\''' - -# Check if it's a GitRepublic application issue -ps aux | grep -E 'node.*gitrepublic|gitrepublic.*node' -systemctl status | grep -i gitrepublic -``` - -## 3. Apply Security Updates - -```bash -# Update package lists -apt update - -# See what security updates are available -apt list --upgradable | grep -i security - -# Apply security updates -apt upgrade -y - -# Or apply all updates (after investigating zombies) -apt upgrade -``` - -## 4. System Health Check - -```bash -# Check disk space -df -h - -# Check memory usage -free -h - -# Check system load -uptime -top -bn1 | head -20 - -# Check for failed services -systemctl list-units --type=service --state=failed - -# Check system logs -journalctl -p err -n 50 -``` - -## 5. Plan System Restart - -```bash -# Check what requires restart -cat /var/run/reboot-required.pkgs 2>/dev/null || echo "No reboot required file found" - -# Schedule maintenance window and restart -# (Only after fixing zombie issue) -# reboot -``` - -## 6. Plesk-Specific Checks - -```bash -# Check Plesk services -plesk repair all -y - -# Check Plesk logs -tail -100 /var/log/plesk/panel.log - -# Check for Plesk-related zombie processes -ps aux | grep -i plesk | grep -i defunct -``` - -## Root Cause Identified ✅ - -**Problem**: Node.js GitRepublic process (PID 330225, `node build`) is spawning git processes that aren't being properly reaped, creating zombies. - -**Evidence**: -- All zombie processes are `[git] ` children of the Node.js process -- Active git process: `git remote set-head remote-0 -a` (from `git-remote-sync.ts`) -- Git spawns subprocesses like `git-remote-https` that can become zombies if not properly waited for - -**Code Fix**: Updated `src/lib/services/git/git-remote-sync.ts` to: -- Add timeout handling (30 minutes) -- Properly clean up processes on exit -- Handle signals correctly -- Prevent zombie processes - -## ⚠️ URGENT: Restart Service IMMEDIATELY ⚠️ - -**Zombie count is increasing rapidly (3300 → 5940). Restart NOW to stop the bleeding.** - -**Option 1: Restart the GitRepublic service (RECOMMENDED)** -```bash -# Find the service/container -docker ps | grep gitrepublic -# or -systemctl list-units | grep -i gitrepublic -# or find the process -ps aux | grep "node build" | grep -v grep - -# RESTART IT NOW (this will clean up zombies temporarily) -docker restart -# or -systemctl restart -# or if running directly -kill -TERM # Let systemd/docker restart it -``` - -**After restart, monitor zombie count:** -```bash -watch -n 2 'ps aux | awk '\''$8 ~ /^Z/ { count++ } END { print "Zombies:", count+0 }'\''' -``` - -**If zombies continue to increase after restart:** -- The code fix needs to be deployed -- Check if there are other services spawning git processes - -**Option 2: Kill and let it restart (if managed by systemd/docker)** -```bash -# Find the process -ps aux | grep "node build" | grep -v grep - -# Kill it (systemd/docker will restart it) -kill -TERM 330225 - -# Wait a moment, then check if it restarted -ps aux | grep "node build" | grep -v grep -``` - -**Option 3: Clean up zombies manually (temporary fix)** -```bash -# This won't fix the root cause but will clean up existing zombies -# The zombies will come back until the code is fixed -# Note: You can't kill zombies directly, but killing the parent will clean them up -``` - -## Recommended Action Plan - -1. **IMMEDIATE**: Restart GitRepublic service to clean up existing zombies -2. **URGENT**: Deploy the code fix (updated `git-remote-sync.ts`) -3. **HIGH PRIORITY**: Apply security updates (section 3) -4. **MONITOR**: Watch for zombie process count after restart -5. **MAINTENANCE WINDOW**: Schedule system restart after deploying fix - -## Common Causes of Zombie Processes - -- Process spawning children without proper signal handling -- Systemd service not properly configured -- Application bugs (especially Node.js, Python, or long-running processes) -- Resource exhaustion causing process management issues -- Plesk or web server processes not reaping children - -## Git-Specific Zombie Issues - -Since zombies are `[git]` processes, likely causes: -- **Git operations not being properly waited for** - parent process exits before git finishes -- **Git HTTP backend issues** - web server spawning git processes that aren't reaped -- **GitRepublic application** - Node.js app spawning git commands without proper signal handling -- **Plesk Git integration** - Plesk's git features not properly managing child processes -- **Git hooks** - hooks spawning processes that become zombies - -### Quick Fixes to Try - -```bash -# Restart web server (if using nginx/apache) -systemctl restart nginx -# or -systemctl restart apache2 - -# Restart GitRepublic application (if running as service) -systemctl restart gitrepublic-web -# or find and restart the Node.js process -ps aux | grep node | grep gitrepublic -# Then restart it - -# Check git-http-backend processes -ps aux | grep git-http-backend - -# Kill any stuck git processes (CAREFUL - only if safe) -# pkill -9 git # Only if you're sure no important operations are running -``` diff --git a/src/hooks.server.ts b/src/hooks.server.ts index f444cd9..7c7383f 100644 --- a/src/hooks.server.ts +++ b/src/hooks.server.ts @@ -7,6 +7,7 @@ import type { Handle } from '@sveltejs/kit'; import { error } from '@sveltejs/kit'; import { RepoPollingService } from './lib/services/nostr/repo-polling.js'; import { GIT_DOMAIN, DEFAULT_NOSTR_RELAYS } from './lib/config.js'; +import { setRepoPollingService } from './lib/services/service-registry.js'; import { rateLimiter } from './lib/services/security/rate-limiter.js'; import { auditLogger } from './lib/services/security/audit-logger.js'; import logger from './lib/services/logger.js'; @@ -30,6 +31,9 @@ if (typeof process !== 'undefined') { pollingService = new RepoPollingService(DEFAULT_NOSTR_RELAYS, repoRoot, domain); + // Register with service registry so it can be accessed from API endpoints + setRepoPollingService(pollingService); + // Start polling - the initial poll will complete asynchronously // The local repos endpoint will skip cache for the first 10 seconds after startup pollingService.start().then(() => { diff --git a/src/lib/services/nostr/repo-polling.ts b/src/lib/services/nostr/repo-polling.ts index 2398b41..85b96cd 100644 --- a/src/lib/services/nostr/repo-polling.ts +++ b/src/lib/services/nostr/repo-polling.ts @@ -84,6 +84,14 @@ export class RepoPollingService { } } + /** + * Trigger a manual poll (useful after user verification) + */ + async triggerPoll(): Promise { + logger.info('Manual poll triggered'); + return this.poll(); + } + /** * Poll for new repo announcements and provision repos */ @@ -106,9 +114,23 @@ export class RepoPollingService { } const cloneUrls = this.extractCloneUrls(event); - return cloneUrls.some(url => url.includes(this.domain)); + const listsDomain = cloneUrls.some(url => url.includes(this.domain)); + if (listsDomain) { + logger.debug({ + eventId: event.id, + pubkey: event.pubkey.slice(0, 16) + '...', + cloneUrls: cloneUrls.slice(0, 3) // Log first 3 URLs + }, 'Found repo announcement that lists this domain'); + } + return listsDomain; }); + logger.info({ + totalEvents: events.length, + relevantEvents: relevantEvents.length, + domain: this.domain + }, 'Filtered repo announcements'); + // Provision each repo for (const event of relevantEvents) { try { @@ -201,11 +223,22 @@ export class RepoPollingService { if (!isExistingRepo) { const userLevel = getCachedUserLevel(event.pubkey); const { hasUnlimitedAccess } = await import('../../utils/user-access.js'); - if (!hasUnlimitedAccess(userLevel?.level)) { + const hasAccess = hasUnlimitedAccess(userLevel?.level); + + logger.debug({ + eventId: event.id, + pubkey: event.pubkey.slice(0, 16) + '...', + cachedLevel: userLevel?.level || 'none', + hasAccess, + isExistingRepo + }, 'Checking user access for repo provisioning'); + + if (!hasAccess) { logger.warn({ eventId: event.id, pubkey: event.pubkey.slice(0, 16) + '...', - level: userLevel?.level || 'none' + level: userLevel?.level || 'none', + cacheExists: !!userLevel }, 'Skipping repo provisioning: user does not have unlimited access'); continue; } diff --git a/src/lib/services/service-registry.ts b/src/lib/services/service-registry.ts index e5813f1..2e3432f 100644 --- a/src/lib/services/service-registry.ts +++ b/src/lib/services/service-registry.ts @@ -15,7 +15,8 @@ import { ForkCountService } from './nostr/fork-count-service.js'; import { PRsService } from './nostr/prs-service.js'; import { HighlightsService } from './nostr/highlights-service.js'; import { ReleasesService } from './nostr/releases-service.js'; -import { DEFAULT_NOSTR_RELAYS, DEFAULT_NOSTR_SEARCH_RELAYS } from '../config.js'; +import { RepoPollingService } from './nostr/repo-polling.js'; +import { DEFAULT_NOSTR_RELAYS, DEFAULT_NOSTR_SEARCH_RELAYS, GIT_DOMAIN } from '../config.js'; // Get repo root from environment or use default const repoRoot = typeof process !== 'undefined' && process.env?.GIT_REPO_ROOT @@ -35,6 +36,7 @@ let _forkCountService: ForkCountService | null = null; let _prsService: PRsService | null = null; let _highlightsService: HighlightsService | null = null; let _releasesService: ReleasesService | null = null; +let _repoPollingService: RepoPollingService | null = null; /** * Get singleton FileManager instance @@ -156,6 +158,21 @@ export function getReleasesService(): ReleasesService { return _releasesService; } +/** + * Get singleton RepoPollingService instance + * Note: This should be initialized in hooks.server.ts on startup + */ +export function getRepoPollingService(): RepoPollingService | null { + return _repoPollingService; +} + +/** + * Set the RepoPollingService instance (called from hooks.server.ts) + */ +export function setRepoPollingService(service: RepoPollingService): void { + _repoPollingService = service; +} + // Convenience exports for direct access (common pattern) export const fileManager = getFileManager(); export const repoManager = getRepoManager(); diff --git a/src/lib/utils/repo-poll-trigger.ts b/src/lib/utils/repo-poll-trigger.ts new file mode 100644 index 0000000..5dbb9e3 --- /dev/null +++ b/src/lib/utils/repo-poll-trigger.ts @@ -0,0 +1,30 @@ +/** + * Shared utility for triggering repo polls + * This provides a consistent interface for triggering polls from anywhere in the codebase + */ + +import { getRepoPollingService } from '../services/service-registry.js'; +import logger from '../services/logger.js'; + +/** + * Trigger a repo poll + * This is the single source of truth for triggering polls + * @param context Optional context string for logging (e.g., 'user-verification', 'manual-refresh') + * @returns Promise that resolves when poll is triggered (not when it completes) + */ +export async function triggerRepoPoll(context?: string): Promise { + const pollingService = getRepoPollingService(); + + if (!pollingService) { + logger.warn({ context }, 'Poll request received but polling service not initialized'); + throw new Error('Polling service not available'); + } + + // Trigger poll asynchronously (non-blocking) + // The poll will complete in the background + pollingService.triggerPoll().catch((err) => { + logger.error({ error: err, context }, 'Failed to trigger poll'); + }); + + logger.info({ context }, 'Repo poll triggered'); +} diff --git a/src/routes/api/repos/poll/+server.ts b/src/routes/api/repos/poll/+server.ts new file mode 100644 index 0000000..87e4ff9 --- /dev/null +++ b/src/routes/api/repos/poll/+server.ts @@ -0,0 +1,33 @@ +/** + * API endpoint for manually triggering a repo poll + * This allows users to refresh the repo list and trigger provisioning of new repos + * + * This is the public API interface for triggering polls. + * All poll triggers should go through this endpoint or the shared triggerRepoPoll utility. + */ + +import { json } from '@sveltejs/kit'; +import type { RequestHandler } from './$types'; +import { triggerRepoPoll } from '$lib/utils/repo-poll-trigger.js'; +import { extractRequestContext } from '$lib/utils/api-context.js'; + +export const POST: RequestHandler = async (event) => { + const requestContext = extractRequestContext(event); + const clientIp = requestContext.clientIp || 'unknown'; + + try { + await triggerRepoPoll('api-endpoint'); + + return json({ + success: true, + message: 'Poll triggered successfully' + }); + } catch (err) { + const errorMessage = err instanceof Error ? err.message : String(err); + + return json({ + success: false, + error: errorMessage + }, { status: err instanceof Error && errorMessage.includes('not available') ? 503 : 500 }); + } +}; diff --git a/src/routes/api/user/level/+server.ts b/src/routes/api/user/level/+server.ts index 8a58ac3..3341aad 100644 --- a/src/routes/api/user/level/+server.ts +++ b/src/routes/api/user/level/+server.ts @@ -17,6 +17,7 @@ import { extractRequestContext } from '$lib/utils/api-context.js'; import { sanitizeError } from '$lib/utils/security.js'; import { verifyEvent } from 'nostr-tools'; import logger from '$lib/services/logger.js'; +import { triggerRepoPoll } from '$lib/utils/repo-poll-trigger.js'; export const POST: RequestHandler = async (event) => { const requestContext = extractRequestContext(event); @@ -143,6 +144,12 @@ export const POST: RequestHandler = async (event) => { // Cache the successful verification cacheUserLevel(userPubkeyHex, 'unlimited'); + // Trigger a repo poll to provision repos now that user is verified + // This is non-blocking - we don't wait for it to complete + triggerRepoPoll('user-verification').catch((err) => { + logger.warn({ error: err, userPubkeyHex }, 'Failed to trigger poll after user verification (non-blocking)'); + }); + auditLogger.logAuth( userPubkeyHex, clientIp, diff --git a/src/routes/repos/+page.svelte b/src/routes/repos/+page.svelte index 786d4cc..81d858f 100644 --- a/src/routes/repos/+page.svelte +++ b/src/routes/repos/+page.svelte @@ -308,7 +308,7 @@ } } - async function loadRepos() { + async function loadRepos(triggerPoll = false) { loading = true; error = null; @@ -357,6 +357,31 @@ loadForkCounts(registeredRepos.map(r => r.event)).catch(err => { console.warn('[RepoList] Failed to load some fork counts:', err); }); + + // If triggerPoll is true, trigger a poll and then refresh the list + if (triggerPoll) { + try { + // Trigger poll (non-blocking) + const pollResponse = await fetch('/api/repos/poll', { + method: 'POST', + headers: userPubkeyHex ? { + 'X-User-Pubkey': userPubkeyHex + } : {} + }); + + if (pollResponse.ok) { + // Wait a bit for the poll to process (lazy - don't wait for full completion) + // Give it 2-3 seconds to provision repos + await new Promise(resolve => setTimeout(resolve, 2500)); + + // Refresh the list after poll + await loadRepos(false); + } + } catch (pollErr) { + // Don't fail the whole operation if poll fails + console.warn('[RepoList] Failed to trigger poll:', pollErr); + } + } } catch (e) { error = String(e); console.error('[RepoList] Failed to load repos:', e); @@ -777,7 +802,7 @@

Repositories on {$page.data.gitDomain || 'localhost:6543'}

-