From 7bf5a7215369cc285dc0c7bfb9e9cb260470142a Mon Sep 17 00:00:00 2001 From: limina1 Date: Sat, 19 Jul 2025 19:54:46 -0400 Subject: [PATCH] remove deduplication, kept in utils library --- src/lib/utils/eventDeduplication.ts | 214 ++++++++++++++++++++++++++++ src/routes/visualize/+page.svelte | 130 +++-------------- 2 files changed, 233 insertions(+), 111 deletions(-) create mode 100644 src/lib/utils/eventDeduplication.ts diff --git a/src/lib/utils/eventDeduplication.ts b/src/lib/utils/eventDeduplication.ts new file mode 100644 index 0000000..8c52e64 --- /dev/null +++ b/src/lib/utils/eventDeduplication.ts @@ -0,0 +1,214 @@ +import type { NDKEvent } from '@nostr-dev-kit/ndk'; + +/** + * Deduplicate content events by keeping only the most recent version + * @param contentEventSets Array of event sets from different sources + * @returns Map of coordinate to most recent event + */ +export function deduplicateContentEvents(contentEventSets: Set[]): Map { + const eventsByCoordinate = new Map(); + + // Track statistics for debugging + let totalEvents = 0; + let duplicateCoordinates = 0; + const duplicateDetails: Array<{ coordinate: string; count: number; events: string[] }> = []; + + contentEventSets.forEach((eventSet) => { + eventSet.forEach(event => { + totalEvents++; + const dTag = event.tagValue("d"); + const author = event.pubkey; + const kind = event.kind; + + if (dTag && author && kind) { + const coordinate = `${kind}:${author}:${dTag}`; + const existing = eventsByCoordinate.get(coordinate); + + if (existing) { + // We found a duplicate coordinate + duplicateCoordinates++; + + // Track details for the first few duplicates + if (duplicateDetails.length < 5) { + const existingDetails = duplicateDetails.find(d => d.coordinate === coordinate); + if (existingDetails) { + existingDetails.count++; + existingDetails.events.push(`${event.id} (created_at: ${event.created_at})`); + } else { + duplicateDetails.push({ + coordinate, + count: 2, // existing + current + events: [ + `${existing.id} (created_at: ${existing.created_at})`, + `${event.id} (created_at: ${event.created_at})` + ] + }); + } + } + } + + // Keep the most recent event (highest created_at) + if (!existing || (event.created_at !== undefined && existing.created_at !== undefined && event.created_at > existing.created_at)) { + eventsByCoordinate.set(coordinate, event); + } + } + }); + }); + + // Log deduplication results if any duplicates were found + if (duplicateCoordinates > 0) { + console.log(`[eventDeduplication] Found ${duplicateCoordinates} duplicate events out of ${totalEvents} total events`); + console.log(`[eventDeduplication] Reduced to ${eventsByCoordinate.size} unique coordinates`); + console.log(`[eventDeduplication] Duplicate details:`, duplicateDetails); + } else if (totalEvents > 0) { + console.log(`[eventDeduplication] No duplicates found in ${totalEvents} events`); + } + + return eventsByCoordinate; +} + +/** + * Deduplicate and combine all events, keeping only the most recent version of replaceable events + * @param nonPublicationEvents Array of non-publication events + * @param validIndexEvents Set of valid index events + * @param contentEvents Set of content events + * @returns Array of deduplicated events + */ +export function deduplicateAndCombineEvents( + nonPublicationEvents: NDKEvent[], + validIndexEvents: Set, + contentEvents: Set +): NDKEvent[] { + // Track statistics for debugging + const initialCount = nonPublicationEvents.length + validIndexEvents.size + contentEvents.size; + let replaceableEventsProcessed = 0; + let duplicateCoordinatesFound = 0; + const duplicateDetails: Array<{ coordinate: string; count: number; events: string[] }> = []; + + // First, build coordinate map for replaceable events + const coordinateMap = new Map(); + const allEventsToProcess = [ + ...nonPublicationEvents, // Non-publication events fetched earlier + ...Array.from(validIndexEvents), + ...Array.from(contentEvents) + ]; + + // First pass: identify the most recent version of each replaceable event + allEventsToProcess.forEach(event => { + if (!event.id) return; + + // For replaceable events (30000-39999), track by coordinate + if (event.kind && event.kind >= 30000 && event.kind < 40000) { + replaceableEventsProcessed++; + const dTag = event.tagValue("d"); + const author = event.pubkey; + + if (dTag && author) { + const coordinate = `${event.kind}:${author}:${dTag}`; + const existing = coordinateMap.get(coordinate); + + if (existing) { + // We found a duplicate coordinate + duplicateCoordinatesFound++; + + // Track details for the first few duplicates + if (duplicateDetails.length < 5) { + const existingDetails = duplicateDetails.find(d => d.coordinate === coordinate); + if (existingDetails) { + existingDetails.count++; + existingDetails.events.push(`${event.id} (created_at: ${event.created_at})`); + } else { + duplicateDetails.push({ + coordinate, + count: 2, // existing + current + events: [ + `${existing.id} (created_at: ${existing.created_at})`, + `${event.id} (created_at: ${event.created_at})` + ] + }); + } + } + } + + // Keep the most recent version + if (!existing || (event.created_at !== undefined && existing.created_at !== undefined && event.created_at > existing.created_at)) { + coordinateMap.set(coordinate, event); + } + } + } + }); + + // Second pass: build final event map + const finalEventMap = new Map(); + const seenCoordinates = new Set(); + + allEventsToProcess.forEach(event => { + if (!event.id) return; + + // For replaceable events, only add if it's the chosen version + if (event.kind && event.kind >= 30000 && event.kind < 40000) { + const dTag = event.tagValue("d"); + const author = event.pubkey; + + if (dTag && author) { + const coordinate = `${event.kind}:${author}:${dTag}`; + const chosenEvent = coordinateMap.get(coordinate); + + // Only add this event if it's the chosen one for this coordinate + if (chosenEvent && chosenEvent.id === event.id) { + if (!seenCoordinates.has(coordinate)) { + finalEventMap.set(event.id, event); + seenCoordinates.add(coordinate); + } + } + return; + } + } + + // Non-replaceable events are added directly + finalEventMap.set(event.id, event); + }); + + const finalCount = finalEventMap.size; + const reduction = initialCount - finalCount; + + // Log deduplication results if any duplicates were found + if (duplicateCoordinatesFound > 0) { + console.log(`[eventDeduplication] deduplicateAndCombineEvents: Found ${duplicateCoordinatesFound} duplicate coordinates out of ${replaceableEventsProcessed} replaceable events`); + console.log(`[eventDeduplication] deduplicateAndCombineEvents: Reduced from ${initialCount} to ${finalCount} events (${reduction} removed)`); + console.log(`[eventDeduplication] deduplicateAndCombineEvents: Duplicate details:`, duplicateDetails); + } else if (replaceableEventsProcessed > 0) { + console.log(`[eventDeduplication] deduplicateAndCombineEvents: No duplicates found in ${replaceableEventsProcessed} replaceable events`); + } + + return Array.from(finalEventMap.values()); +} + +/** + * Check if an event is a replaceable event (kinds 30000-39999) + * @param event The event to check + * @returns True if the event is replaceable + */ +export function isReplaceableEvent(event: NDKEvent): boolean { + return event.kind !== undefined && event.kind >= 30000 && event.kind < 40000; +} + +/** + * Get the coordinate for a replaceable event + * @param event The event to get the coordinate for + * @returns The coordinate string (kind:pubkey:d-tag) or null if not a valid replaceable event + */ +export function getEventCoordinate(event: NDKEvent): string | null { + if (!isReplaceableEvent(event)) { + return null; + } + + const dTag = event.tagValue("d"); + const author = event.pubkey; + + if (!dTag || !author) { + return null; + } + + return `${event.kind}:${author}:${dTag}`; +} \ No newline at end of file diff --git a/src/routes/visualize/+page.svelte b/src/routes/visualize/+page.svelte index 31c1b68..62421b3 100644 --- a/src/routes/visualize/+page.svelte +++ b/src/routes/visualize/+page.svelte @@ -257,7 +257,18 @@ const contentEvents = await fetchContentEvents(validIndexEvents, publicationConfigs); // Step 5: Deduplicate and combine all events - const combinedEvents = deduplicateAndCombineEvents(nonPublicationEvents, validIndexEvents, contentEvents); + // Combine all events (relays handle deduplication) + const combinedEvents = [ + ...nonPublicationEvents, + ...Array.from(validIndexEvents), + ...Array.from(contentEvents) + ]; + debug("Combined events:", { combinedEvents: combinedEvents.length }); + + // Update state + allEvents = combinedEvents; + followListEvents = []; + baseEvents = [...allEvents]; // Store base events for tag expansion // Step 6: Fetch profiles for discovered pubkeys const eventsWithProfiles = await fetchProfilesForEvents(combinedEvents, kind0Config); @@ -423,10 +434,12 @@ const contentEventSets = await Promise.all(contentEventPromises); - // Deduplicate by keeping only the most recent version of each d-tag per author - const eventsByCoordinate = deduplicateContentEvents(contentEventSets); - const contentEvents = new Set(eventsByCoordinate.values()); - debug("Fetched content events after deduplication:", contentEvents.size); + // Combine all content events (relays handle deduplication) + const contentEvents = new Set(); + contentEventSets.forEach(eventSet => { + eventSet.forEach(event => contentEvents.add(event)); + }); + debug("Fetched content events:", contentEvents.size); return contentEvents; } @@ -485,112 +498,7 @@ return referencesByAuthor; } - /** - * Deduplicate content events by keeping only the most recent version - */ - function deduplicateContentEvents(contentEventSets: Set[]): Map { - const eventsByCoordinate = new Map(); - - contentEventSets.forEach((eventSet) => { - eventSet.forEach(event => { - const dTag = event.tagValue("d"); - const author = event.pubkey; - const kind = event.kind; - - if (dTag && author && kind) { - const coordinate = `${kind}:${author}:${dTag}`; - const existing = eventsByCoordinate.get(coordinate); - - // Keep the most recent event (highest created_at) - if (!existing || (event.created_at && existing.created_at && event.created_at > existing.created_at)) { - eventsByCoordinate.set(coordinate, event); - debug(`Keeping newer version of ${coordinate}, created_at: ${event.created_at}`); - } else if (existing) { - debug(`Skipping older version of ${coordinate}, created_at: ${event.created_at} vs ${existing.created_at}`); - } - } - }); - }); - - return eventsByCoordinate; - } - - /** - * Step 5: Deduplicate and combine all events - */ - function deduplicateAndCombineEvents( - nonPublicationEvents: NDKEvent[], - validIndexEvents: Set, - contentEvents: Set - ): NDKEvent[] { - // First, build coordinate map for replaceable events - const coordinateMap = new Map(); - const allEventsToProcess = [ - ...nonPublicationEvents, // Non-publication events fetched earlier - ...Array.from(validIndexEvents), - ...Array.from(contentEvents) - ]; - - // First pass: identify the most recent version of each replaceable event - allEventsToProcess.forEach(event => { - if (!event.id) return; - - // For replaceable events (30000-39999), track by coordinate - if (event.kind && event.kind >= 30000 && event.kind < 40000) { - const dTag = event.tagValue("d"); - const author = event.pubkey; - - if (dTag && author) { - const coordinate = `${event.kind}:${author}:${dTag}`; - const existing = coordinateMap.get(coordinate); - - // Keep the most recent version - if (!existing || (event.created_at && existing.created_at && event.created_at > existing.created_at)) { - coordinateMap.set(coordinate, event); - } - } - } - }); - - // Second pass: build final event map - const finalEventMap = new Map(); - const seenCoordinates = new Set(); - - allEventsToProcess.forEach(event => { - if (!event.id) return; - - // For replaceable events, only add if it's the chosen version - if (event.kind && event.kind >= 30000 && event.kind < 40000) { - const dTag = event.tagValue("d"); - const author = event.pubkey; - - if (dTag && author) { - const coordinate = `${event.kind}:${author}:${dTag}`; - const chosenEvent = coordinateMap.get(coordinate); - - // Only add this event if it's the chosen one for this coordinate - if (chosenEvent && chosenEvent.id === event.id) { - if (!seenCoordinates.has(coordinate)) { - finalEventMap.set(event.id, event); - seenCoordinates.add(coordinate); - } - } - return; - } - } - - // Non-replaceable events are added directly - finalEventMap.set(event.id, event); - }); - - // Replace mode (always replace, no append mode) - allEvents = Array.from(finalEventMap.values()); - followListEvents = []; - - baseEvents = [...allEvents]; // Store base events for tag expansion - - return allEvents; - } + // Removed deduplication import - relays handle this properly /** * Step 6: Fetch profiles for discovered pubkeys