Browse Source

polling update

Nostr-Signature: 42c1a2a63a4568c65d82d78701451b3b4363bdf9c8c57e804535b5f3f0d7b6fc 573634b648634cbad10f2451776089ea21090d9407f715e83c577b4611ae6edc 8e5f32ecb79da876ac41eba04c3b1541b21d039ae50d1b9fefa630d35f31c97dd29af64e4b695742fa7d4eaec17db8f4a066b4db99ce628aed596971975d4a87
main
Silberengel 2 months ago
parent
commit
a8c9850b19
  1. 1
      nostr/commit-signatures.jsonl
  2. 219
      server-maintenance-commands.md
  3. 4
      src/hooks.server.ts
  4. 39
      src/lib/services/nostr/repo-polling.ts
  5. 19
      src/lib/services/service-registry.ts
  6. 30
      src/lib/utils/repo-poll-trigger.ts
  7. 33
      src/routes/api/repos/poll/+server.ts
  8. 7
      src/routes/api/user/level/+server.ts
  9. 29
      src/routes/repos/+page.svelte

1
nostr/commit-signatures.jsonl

@ -114,3 +114,4 @@ @@ -114,3 +114,4 @@
{"kind":1640,"pubkey":"573634b648634cbad10f2451776089ea21090d9407f715e83c577b4611ae6edc","created_at":1772223624,"tags":[["author","Silberengel","silberengel7@protonmail.com"],["message","bug-fixes"]],"content":"Signed commit: bug-fixes","id":"99cb543f1e821f1b7df4bbde2b3da3ab3a09cda7a1e9a537fe1b8df79b19e8e8","sig":"762a7ea92457ce81cc5aae9bc644fb9d80f90c7500035fbb506f2f76a5942333b828cc8a59f7656b0e714b15a59158be0a671f51476be2e8eabe9731ced74bcb"}
{"kind":1640,"pubkey":"573634b648634cbad10f2451776089ea21090d9407f715e83c577b4611ae6edc","created_at":1772226191,"tags":[["author","Silberengel","silberengel7@protonmail.com"],["message","bug-fixes"]],"content":"Signed commit: bug-fixes","id":"20be97351d2b05fa7ad9e161b2619e9babaaffc6a8090057c1a3ac50a0f08d6a","sig":"a174c7dd39f613dd88260ef5c111b943df381b0acae20d048596e11ef1a6b0e3c1bfb9a8858af3df0f8858c4c79d1e2d03ad248a0608ac5d5cded6a81e99af77"}
{"kind":1640,"pubkey":"573634b648634cbad10f2451776089ea21090d9407f715e83c577b4611ae6edc","created_at":1772227102,"tags":[["author","Silberengel","silberengel7@protonmail.com"],["message","bug-fix"]],"content":"Signed commit: bug-fix","id":"0f366a0cc7c003f74e375f40e7c322781746d12829943df1287bf67f36e1330a","sig":"167177ccfeb053cd645e50e7d00450b847ecd65c305165777bcfbe39fd3f48ccc86b57fdd183d2a4b138d94d27d11e4f1c121d702b295d94b9aee0a8dc81a744"}
{"kind":1640,"pubkey":"573634b648634cbad10f2451776089ea21090d9407f715e83c577b4611ae6edc","created_at":1772261455,"tags":[["author","Silberengel","silberengel7@protonmail.com"],["message","fix zombie spawning on polling\nmake announcement commits non-blocking on repo provision"]],"content":"Signed commit: fix zombie spawning on polling\nmake announcement commits non-blocking on repo provision","id":"b0da119e7477b46f5d82be831693a92e117f25379476488f19351e2bac8f88b8","sig":"b8ca18e8215a9f5b3fc877ce113936c582353d44f8d03cdccd9f9ee70fb3e6fdd64db7cc6a3ca15339fb21b9ca87ea8471a38b587721a594a189d97cc2964ad9"}

219
server-maintenance-commands.md

@ -1,219 +0,0 @@ @@ -1,219 +0,0 @@
# Server Maintenance Commands
## 1. Investigate Zombie Processes (CRITICAL - 3300 zombies)
```bash
# Find processes with zombie children
ps aux | awk '$8 ~ /^Z/ { print $2, $11 }' | head -20
# Find parent processes that are creating zombies
ps aux | awk '$8 ~ /^Z/ { print $3 }' | sort | uniq -c | sort -rn | head -10
# Check for specific problematic processes
ps auxf | grep -E 'Z|defunct'
# Check systemd services that might be spawning zombies
systemctl status | grep -i failed
systemctl list-units --type=service --state=failed
```
## 2. Identify the Root Cause (Git Processes Detected)
Based on initial investigation, zombies are `[git]` processes. Run these commands:
```bash
# Check all git processes (including zombies)
ps aux | grep -E 'git|\[git\]' | head -30
# Find what's spawning git processes
ps auxf | grep -B 5 -A 5 git | head -50
# Check for web server processes that might spawn git
ps aux | grep -E 'node|nginx|apache|php-fpm|plesk' | head -20
# Check system logs for git-related errors
journalctl -p err -n 100 | grep -i git
journalctl -u nginx -n 50
journalctl -u apache2 -n 50
# Check for processes with many children (potential zombie creators)
ps aux --sort=-%cpu | head -20
ps aux --sort=-%mem | head -20
# Monitor zombie creation in real-time (run for 30 seconds)
watch -n 1 'ps aux | awk '\''$8 ~ /^Z/ { count++ } END { print "Zombies:", count+0 }'\'''
# Check if it's a GitRepublic application issue
ps aux | grep -E 'node.*gitrepublic|gitrepublic.*node'
systemctl status | grep -i gitrepublic
```
## 3. Apply Security Updates
```bash
# Update package lists
apt update
# See what security updates are available
apt list --upgradable | grep -i security
# Apply security updates
apt upgrade -y
# Or apply all updates (after investigating zombies)
apt upgrade
```
## 4. System Health Check
```bash
# Check disk space
df -h
# Check memory usage
free -h
# Check system load
uptime
top -bn1 | head -20
# Check for failed services
systemctl list-units --type=service --state=failed
# Check system logs
journalctl -p err -n 50
```
## 5. Plan System Restart
```bash
# Check what requires restart
cat /var/run/reboot-required.pkgs 2>/dev/null || echo "No reboot required file found"
# Schedule maintenance window and restart
# (Only after fixing zombie issue)
# reboot
```
## 6. Plesk-Specific Checks
```bash
# Check Plesk services
plesk repair all -y
# Check Plesk logs
tail -100 /var/log/plesk/panel.log
# Check for Plesk-related zombie processes
ps aux | grep -i plesk | grep -i defunct
```
## Root Cause Identified ✅
**Problem**: Node.js GitRepublic process (PID 330225, `node build`) is spawning git processes that aren't being properly reaped, creating zombies.
**Evidence**:
- All zombie processes are `[git] <defunct>` children of the Node.js process
- Active git process: `git remote set-head remote-0 -a` (from `git-remote-sync.ts`)
- Git spawns subprocesses like `git-remote-https` that can become zombies if not properly waited for
**Code Fix**: Updated `src/lib/services/git/git-remote-sync.ts` to:
- Add timeout handling (30 minutes)
- Properly clean up processes on exit
- Handle signals correctly
- Prevent zombie processes
## ⚠ URGENT: Restart Service IMMEDIATELY ⚠
**Zombie count is increasing rapidly (3300 → 5940). Restart NOW to stop the bleeding.**
**Option 1: Restart the GitRepublic service (RECOMMENDED)**
```bash
# Find the service/container
docker ps | grep gitrepublic
# or
systemctl list-units | grep -i gitrepublic
# or find the process
ps aux | grep "node build" | grep -v grep
# RESTART IT NOW (this will clean up zombies temporarily)
docker restart <container-id>
# or
systemctl restart <service-name>
# or if running directly
kill -TERM <pid> # Let systemd/docker restart it
```
**After restart, monitor zombie count:**
```bash
watch -n 2 'ps aux | awk '\''$8 ~ /^Z/ { count++ } END { print "Zombies:", count+0 }'\'''
```
**If zombies continue to increase after restart:**
- The code fix needs to be deployed
- Check if there are other services spawning git processes
**Option 2: Kill and let it restart (if managed by systemd/docker)**
```bash
# Find the process
ps aux | grep "node build" | grep -v grep
# Kill it (systemd/docker will restart it)
kill -TERM 330225
# Wait a moment, then check if it restarted
ps aux | grep "node build" | grep -v grep
```
**Option 3: Clean up zombies manually (temporary fix)**
```bash
# This won't fix the root cause but will clean up existing zombies
# The zombies will come back until the code is fixed
# Note: You can't kill zombies directly, but killing the parent will clean them up
```
## Recommended Action Plan
1. **IMMEDIATE**: Restart GitRepublic service to clean up existing zombies
2. **URGENT**: Deploy the code fix (updated `git-remote-sync.ts`)
3. **HIGH PRIORITY**: Apply security updates (section 3)
4. **MONITOR**: Watch for zombie process count after restart
5. **MAINTENANCE WINDOW**: Schedule system restart after deploying fix
## Common Causes of Zombie Processes
- Process spawning children without proper signal handling
- Systemd service not properly configured
- Application bugs (especially Node.js, Python, or long-running processes)
- Resource exhaustion causing process management issues
- Plesk or web server processes not reaping children
## Git-Specific Zombie Issues
Since zombies are `[git]` processes, likely causes:
- **Git operations not being properly waited for** - parent process exits before git finishes
- **Git HTTP backend issues** - web server spawning git processes that aren't reaped
- **GitRepublic application** - Node.js app spawning git commands without proper signal handling
- **Plesk Git integration** - Plesk's git features not properly managing child processes
- **Git hooks** - hooks spawning processes that become zombies
### Quick Fixes to Try
```bash
# Restart web server (if using nginx/apache)
systemctl restart nginx
# or
systemctl restart apache2
# Restart GitRepublic application (if running as service)
systemctl restart gitrepublic-web
# or find and restart the Node.js process
ps aux | grep node | grep gitrepublic
# Then restart it
# Check git-http-backend processes
ps aux | grep git-http-backend
# Kill any stuck git processes (CAREFUL - only if safe)
# pkill -9 git # Only if you're sure no important operations are running
```

4
src/hooks.server.ts

@ -7,6 +7,7 @@ import type { Handle } from '@sveltejs/kit'; @@ -7,6 +7,7 @@ import type { Handle } from '@sveltejs/kit';
import { error } from '@sveltejs/kit';
import { RepoPollingService } from './lib/services/nostr/repo-polling.js';
import { GIT_DOMAIN, DEFAULT_NOSTR_RELAYS } from './lib/config.js';
import { setRepoPollingService } from './lib/services/service-registry.js';
import { rateLimiter } from './lib/services/security/rate-limiter.js';
import { auditLogger } from './lib/services/security/audit-logger.js';
import logger from './lib/services/logger.js';
@ -30,6 +31,9 @@ if (typeof process !== 'undefined') { @@ -30,6 +31,9 @@ if (typeof process !== 'undefined') {
pollingService = new RepoPollingService(DEFAULT_NOSTR_RELAYS, repoRoot, domain);
// Register with service registry so it can be accessed from API endpoints
setRepoPollingService(pollingService);
// Start polling - the initial poll will complete asynchronously
// The local repos endpoint will skip cache for the first 10 seconds after startup
pollingService.start().then(() => {

39
src/lib/services/nostr/repo-polling.ts

@ -84,6 +84,14 @@ export class RepoPollingService { @@ -84,6 +84,14 @@ export class RepoPollingService {
}
}
/**
* Trigger a manual poll (useful after user verification)
*/
async triggerPoll(): Promise<void> {
logger.info('Manual poll triggered');
return this.poll();
}
/**
* Poll for new repo announcements and provision repos
*/
@ -106,9 +114,23 @@ export class RepoPollingService { @@ -106,9 +114,23 @@ export class RepoPollingService {
}
const cloneUrls = this.extractCloneUrls(event);
return cloneUrls.some(url => url.includes(this.domain));
const listsDomain = cloneUrls.some(url => url.includes(this.domain));
if (listsDomain) {
logger.debug({
eventId: event.id,
pubkey: event.pubkey.slice(0, 16) + '...',
cloneUrls: cloneUrls.slice(0, 3) // Log first 3 URLs
}, 'Found repo announcement that lists this domain');
}
return listsDomain;
});
logger.info({
totalEvents: events.length,
relevantEvents: relevantEvents.length,
domain: this.domain
}, 'Filtered repo announcements');
// Provision each repo
for (const event of relevantEvents) {
try {
@ -201,11 +223,22 @@ export class RepoPollingService { @@ -201,11 +223,22 @@ export class RepoPollingService {
if (!isExistingRepo) {
const userLevel = getCachedUserLevel(event.pubkey);
const { hasUnlimitedAccess } = await import('../../utils/user-access.js');
if (!hasUnlimitedAccess(userLevel?.level)) {
const hasAccess = hasUnlimitedAccess(userLevel?.level);
logger.debug({
eventId: event.id,
pubkey: event.pubkey.slice(0, 16) + '...',
cachedLevel: userLevel?.level || 'none',
hasAccess,
isExistingRepo
}, 'Checking user access for repo provisioning');
if (!hasAccess) {
logger.warn({
eventId: event.id,
pubkey: event.pubkey.slice(0, 16) + '...',
level: userLevel?.level || 'none'
level: userLevel?.level || 'none',
cacheExists: !!userLevel
}, 'Skipping repo provisioning: user does not have unlimited access');
continue;
}

19
src/lib/services/service-registry.ts

@ -15,7 +15,8 @@ import { ForkCountService } from './nostr/fork-count-service.js'; @@ -15,7 +15,8 @@ import { ForkCountService } from './nostr/fork-count-service.js';
import { PRsService } from './nostr/prs-service.js';
import { HighlightsService } from './nostr/highlights-service.js';
import { ReleasesService } from './nostr/releases-service.js';
import { DEFAULT_NOSTR_RELAYS, DEFAULT_NOSTR_SEARCH_RELAYS } from '../config.js';
import { RepoPollingService } from './nostr/repo-polling.js';
import { DEFAULT_NOSTR_RELAYS, DEFAULT_NOSTR_SEARCH_RELAYS, GIT_DOMAIN } from '../config.js';
// Get repo root from environment or use default
const repoRoot = typeof process !== 'undefined' && process.env?.GIT_REPO_ROOT
@ -35,6 +36,7 @@ let _forkCountService: ForkCountService | null = null; @@ -35,6 +36,7 @@ let _forkCountService: ForkCountService | null = null;
let _prsService: PRsService | null = null;
let _highlightsService: HighlightsService | null = null;
let _releasesService: ReleasesService | null = null;
let _repoPollingService: RepoPollingService | null = null;
/**
* Get singleton FileManager instance
@ -156,6 +158,21 @@ export function getReleasesService(): ReleasesService { @@ -156,6 +158,21 @@ export function getReleasesService(): ReleasesService {
return _releasesService;
}
/**
* Get singleton RepoPollingService instance
* Note: This should be initialized in hooks.server.ts on startup
*/
export function getRepoPollingService(): RepoPollingService | null {
return _repoPollingService;
}
/**
* Set the RepoPollingService instance (called from hooks.server.ts)
*/
export function setRepoPollingService(service: RepoPollingService): void {
_repoPollingService = service;
}
// Convenience exports for direct access (common pattern)
export const fileManager = getFileManager();
export const repoManager = getRepoManager();

30
src/lib/utils/repo-poll-trigger.ts

@ -0,0 +1,30 @@ @@ -0,0 +1,30 @@
/**
* Shared utility for triggering repo polls
* This provides a consistent interface for triggering polls from anywhere in the codebase
*/
import { getRepoPollingService } from '../services/service-registry.js';
import logger from '../services/logger.js';
/**
* Trigger a repo poll
* This is the single source of truth for triggering polls
* @param context Optional context string for logging (e.g., 'user-verification', 'manual-refresh')
* @returns Promise that resolves when poll is triggered (not when it completes)
*/
export async function triggerRepoPoll(context?: string): Promise<void> {
const pollingService = getRepoPollingService();
if (!pollingService) {
logger.warn({ context }, 'Poll request received but polling service not initialized');
throw new Error('Polling service not available');
}
// Trigger poll asynchronously (non-blocking)
// The poll will complete in the background
pollingService.triggerPoll().catch((err) => {
logger.error({ error: err, context }, 'Failed to trigger poll');
});
logger.info({ context }, 'Repo poll triggered');
}

33
src/routes/api/repos/poll/+server.ts

@ -0,0 +1,33 @@ @@ -0,0 +1,33 @@
/**
* API endpoint for manually triggering a repo poll
* This allows users to refresh the repo list and trigger provisioning of new repos
*
* This is the public API interface for triggering polls.
* All poll triggers should go through this endpoint or the shared triggerRepoPoll utility.
*/
import { json } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
import { triggerRepoPoll } from '$lib/utils/repo-poll-trigger.js';
import { extractRequestContext } from '$lib/utils/api-context.js';
export const POST: RequestHandler = async (event) => {
const requestContext = extractRequestContext(event);
const clientIp = requestContext.clientIp || 'unknown';
try {
await triggerRepoPoll('api-endpoint');
return json({
success: true,
message: 'Poll triggered successfully'
});
} catch (err) {
const errorMessage = err instanceof Error ? err.message : String(err);
return json({
success: false,
error: errorMessage
}, { status: err instanceof Error && errorMessage.includes('not available') ? 503 : 500 });
}
};

7
src/routes/api/user/level/+server.ts

@ -17,6 +17,7 @@ import { extractRequestContext } from '$lib/utils/api-context.js'; @@ -17,6 +17,7 @@ import { extractRequestContext } from '$lib/utils/api-context.js';
import { sanitizeError } from '$lib/utils/security.js';
import { verifyEvent } from 'nostr-tools';
import logger from '$lib/services/logger.js';
import { triggerRepoPoll } from '$lib/utils/repo-poll-trigger.js';
export const POST: RequestHandler = async (event) => {
const requestContext = extractRequestContext(event);
@ -143,6 +144,12 @@ export const POST: RequestHandler = async (event) => { @@ -143,6 +144,12 @@ export const POST: RequestHandler = async (event) => {
// Cache the successful verification
cacheUserLevel(userPubkeyHex, 'unlimited');
// Trigger a repo poll to provision repos now that user is verified
// This is non-blocking - we don't wait for it to complete
triggerRepoPoll('user-verification').catch((err) => {
logger.warn({ error: err, userPubkeyHex }, 'Failed to trigger poll after user verification (non-blocking)');
});
auditLogger.logAuth(
userPubkeyHex,
clientIp,

29
src/routes/repos/+page.svelte

@ -308,7 +308,7 @@ @@ -308,7 +308,7 @@
}
}
async function loadRepos() {
async function loadRepos(triggerPoll = false) {
loading = true;
error = null;
@ -357,6 +357,31 @@ @@ -357,6 +357,31 @@
loadForkCounts(registeredRepos.map(r => r.event)).catch(err => {
console.warn('[RepoList] Failed to load some fork counts:', err);
});
// If triggerPoll is true, trigger a poll and then refresh the list
if (triggerPoll) {
try {
// Trigger poll (non-blocking)
const pollResponse = await fetch('/api/repos/poll', {
method: 'POST',
headers: userPubkeyHex ? {
'X-User-Pubkey': userPubkeyHex
} : {}
});
if (pollResponse.ok) {
// Wait a bit for the poll to process (lazy - don't wait for full completion)
// Give it 2-3 seconds to provision repos
await new Promise(resolve => setTimeout(resolve, 2500));
// Refresh the list after poll
await loadRepos(false);
}
} catch (pollErr) {
// Don't fail the whole operation if poll fails
console.warn('[RepoList] Failed to trigger poll:', pollErr);
}
}
} catch (e) {
error = String(e);
console.error('[RepoList] Failed to load repos:', e);
@ -777,7 +802,7 @@ @@ -777,7 +802,7 @@
<div class="repos-header">
<h2>Repositories on {$page.data.gitDomain || 'localhost:6543'}</h2>
<button onclick={loadRepos} disabled={loading}>
<button onclick={() => loadRepos(true)} disabled={loading}>
{loading ? 'Loading...' : 'Refresh'}
</button>
</div>

Loading…
Cancel
Save