You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
322 lines
8.6 KiB
322 lines
8.6 KiB
import { HASHTAG_REGEX } from '@/constants' |
|
import { NostrEvent } from 'nostr-tools' |
|
|
|
/** |
|
* Normalize a hashtag/topic string |
|
* @param text The text to normalize |
|
* @param replaceSpaces Whether to replace spaces with hyphens (true for t-tags, false for content hashtags) |
|
* @returns Normalized string (lowercase, filtered characters, singular form) |
|
*/ |
|
export function normalizeHashtag(text: string, replaceSpaces: boolean = true): string { |
|
// Convert to lowercase and optionally replace spaces with hyphens |
|
let normalized = text.toLowerCase() |
|
if (replaceSpaces) { |
|
normalized = normalized.replace(/\s+/g, '-') |
|
} |
|
|
|
// Only allow letters, numbers, hyphens, and underscores |
|
normalized = normalized.replace(/[^a-z0-9_-]/g, '') |
|
|
|
// Clean up multiple consecutive hyphens/underscores |
|
normalized = normalized.replace(/[-_]+/g, '-') |
|
|
|
// Remove leading/trailing hyphens/underscores |
|
normalized = normalized.replace(/^[-_]+|[-_]+$/g, '') |
|
|
|
// Reject hashtags that are only numbers |
|
if (/^[0-9]+$/.test(normalized)) { |
|
return '' |
|
} |
|
|
|
// Reject empty strings |
|
if (!normalized) { |
|
return '' |
|
} |
|
|
|
// Convert plural to singular (simple English plurals) |
|
// Handle common cases: -ies -> -y, -es -> (sometimes), -s -> remove |
|
if (normalized.endsWith('ies') && normalized.length > 4) { |
|
// cities -> city, berries -> berry |
|
normalized = normalized.slice(0, -3) + 'y' |
|
} else if (normalized.endsWith('ses') && normalized.length > 4) { |
|
// classes -> class, bosses -> boss |
|
normalized = normalized.slice(0, -2) |
|
} else if (normalized.endsWith('xes') && normalized.length > 4) { |
|
// boxes -> box |
|
normalized = normalized.slice(0, -2) |
|
} else if (normalized.endsWith('ches') && normalized.length > 5) { |
|
// churches -> church |
|
normalized = normalized.slice(0, -2) |
|
} else if (normalized.endsWith('s') && normalized.length > 2) { |
|
// Simple plural: cats -> cat, bitcoins -> bitcoin, Christians -> Christian |
|
// But avoid removing 's' from words that naturally end in 's' |
|
// Check if second-to-last character is not 's' to avoid "ss" words |
|
const secondLast = normalized[normalized.length - 2] |
|
if (secondLast !== 's') { |
|
normalized = normalized.slice(0, -1) |
|
} |
|
} |
|
|
|
return normalized |
|
} |
|
|
|
/** |
|
* Normalize a topic string (t-tags) - replaces spaces with hyphens |
|
* Alias for normalizeHashtag with replaceSpaces=true |
|
*/ |
|
export function normalizeTopic(topic: string): string { |
|
return normalizeHashtag(topic, true) |
|
} |
|
|
|
/** |
|
* Extract hashtags from content |
|
*/ |
|
export function extractHashtagsFromContent(content: string): string[] { |
|
const matches = content.matchAll(HASHTAG_REGEX) |
|
const hashtags: string[] = [] |
|
|
|
for (const match of matches) { |
|
// Remove the # prefix and normalize |
|
const tag = match[0].substring(1) |
|
hashtags.push(normalizeTopic(tag)) |
|
} |
|
|
|
return hashtags |
|
} |
|
|
|
/** |
|
* Extract t-tags from event tags |
|
*/ |
|
export function extractTTagsFromEvent(event: NostrEvent): string[] { |
|
return event.tags |
|
.filter(tag => tag[0] === 't' && tag[1]) |
|
.map(tag => normalizeTopic(tag[1])) |
|
} |
|
|
|
/** |
|
* Extract all topics (both hashtags and t-tags) from an event |
|
*/ |
|
export function extractAllTopics(event: NostrEvent): string[] { |
|
const hashtags = extractHashtagsFromContent(event.content) |
|
const tTags = extractTTagsFromEvent(event) |
|
|
|
// Combine and deduplicate |
|
const allTopics = [...new Set([...hashtags, ...tTags])] |
|
|
|
return allTopics |
|
} |
|
|
|
/** |
|
* Group threads by their primary topic and collect subtopic statistics |
|
*/ |
|
export interface TopicAnalysis { |
|
primaryTopic: string |
|
subtopics: Map<string, Set<string>> // subtopic -> set of npubs |
|
threads: NostrEvent[] |
|
} |
|
|
|
export function analyzeThreadTopics( |
|
threads: NostrEvent[], |
|
availableTopicIds: string[] |
|
): Map<string, TopicAnalysis> { |
|
const topicMap = new Map<string, TopicAnalysis>() |
|
|
|
for (const thread of threads) { |
|
const allTopics = extractAllTopics(thread) |
|
|
|
|
|
// Find the primary topic (first match from available topics) |
|
let primaryTopic = 'general' |
|
for (const topic of allTopics) { |
|
if (availableTopicIds.includes(topic)) { |
|
primaryTopic = topic |
|
break |
|
} |
|
} |
|
|
|
// Get or create topic analysis |
|
if (!topicMap.has(primaryTopic)) { |
|
topicMap.set(primaryTopic, { |
|
primaryTopic, |
|
subtopics: new Map(), |
|
threads: [] |
|
}) |
|
} |
|
|
|
const analysis = topicMap.get(primaryTopic)! |
|
analysis.threads.push(thread) |
|
|
|
// Track subtopics (all topics except the primary one and 'all'/'all-topics') |
|
// For 'general' topic, include all other topics as subtopics |
|
// Special case: Always include 'readings' as a subtopic for literature threads |
|
const subtopics = allTopics.filter( |
|
t => t !== primaryTopic && t !== 'all' && t !== 'all-topics' |
|
) |
|
|
|
// Special handling for literature threads with 'readings' hashtag |
|
if (primaryTopic === 'literature' && allTopics.includes('readings')) { |
|
// Ensure 'readings' is included as a subtopic |
|
if (!subtopics.includes('readings')) { |
|
subtopics.push('readings') |
|
} |
|
} |
|
|
|
for (const subtopic of subtopics) { |
|
if (!analysis.subtopics.has(subtopic)) { |
|
analysis.subtopics.set(subtopic, new Set()) |
|
} |
|
analysis.subtopics.get(subtopic)!.add(thread.pubkey) |
|
} |
|
} |
|
|
|
return topicMap |
|
} |
|
|
|
/** |
|
* Get dynamic subtopics for a given main topic |
|
* Returns subtopics that have been used by more than minNpubs unique npubs |
|
*/ |
|
export function getDynamicSubtopics( |
|
analysis: TopicAnalysis | undefined, |
|
minNpubs: number = 3 |
|
): string[] { |
|
if (!analysis) return [] |
|
|
|
const subtopics: string[] = [] |
|
|
|
|
|
for (const [subtopic, npubs] of analysis.subtopics.entries()) { |
|
if (npubs.size >= minNpubs) { |
|
subtopics.push(subtopic) |
|
} |
|
} |
|
|
|
// Sort alphabetically |
|
return subtopics.sort() |
|
} |
|
|
|
/** |
|
* Check if a thread matches a specific subtopic |
|
*/ |
|
export function threadMatchesSubtopic( |
|
thread: NostrEvent, |
|
subtopic: string |
|
): boolean { |
|
const allTopics = extractAllTopics(thread) |
|
return allTopics.includes(subtopic) |
|
} |
|
|
|
/** |
|
* Get the categorized topic for a thread |
|
*/ |
|
export function getCategorizedTopic( |
|
thread: NostrEvent, |
|
availableTopicIds: string[] |
|
): string { |
|
const allTopics = extractAllTopics(thread) |
|
|
|
// Find the first matching topic from available topics |
|
for (const topic of allTopics) { |
|
if (availableTopicIds.includes(topic)) { |
|
return topic |
|
} |
|
} |
|
|
|
return 'general' |
|
} |
|
|
|
/** |
|
* Extract h-tag (group ID) from event tags |
|
*/ |
|
export function extractHTagFromEvent(event: NostrEvent): string | null { |
|
const hTag = event.tags.find(tag => tag[0] === 'h' && tag[1]) |
|
return hTag ? hTag[1] : null |
|
} |
|
|
|
/** |
|
* Parse group identifier from h-tag and relay sources |
|
* Supports both "relay'group-id" format and bare group IDs |
|
*/ |
|
export function parseGroupIdentifier( |
|
hTag: string, |
|
relaySources: string[] |
|
): { groupId: string; groupRelay: string | null; fullIdentifier: string } { |
|
// Check if h-tag already contains relay'group-id format |
|
if (hTag.includes("'")) { |
|
const [relay, groupId] = hTag.split("'", 2) |
|
return { |
|
groupId, |
|
groupRelay: relay, |
|
fullIdentifier: hTag |
|
} |
|
} |
|
|
|
// For bare group IDs, use the first relay source |
|
const groupRelay = relaySources.length > 0 ? relaySources[0] : null |
|
const fullIdentifier = groupRelay ? `${groupRelay}'${hTag}` : hTag |
|
|
|
return { |
|
groupId: hTag, |
|
groupRelay, |
|
fullIdentifier |
|
} |
|
} |
|
|
|
/** |
|
* Check if a discussion belongs to a group |
|
*/ |
|
export function isGroupDiscussion(event: NostrEvent): boolean { |
|
return extractHTagFromEvent(event) !== null |
|
} |
|
|
|
/** |
|
* Build display name for a group |
|
*/ |
|
export function buildGroupDisplayName( |
|
groupId: string, |
|
groupRelay: string | null |
|
): string { |
|
if (!groupRelay) { |
|
return groupId |
|
} |
|
|
|
// Extract hostname from relay URL for cleaner display |
|
try { |
|
const url = new URL(groupRelay) |
|
const hostname = url.hostname |
|
return `${hostname}'${groupId}` |
|
} catch { |
|
// Fallback to full relay URL if parsing fails |
|
return `${groupRelay}'${groupId}` |
|
} |
|
} |
|
|
|
/** |
|
* Extract group information from event |
|
*/ |
|
export function extractGroupInfo( |
|
event: NostrEvent, |
|
relaySources: string[] |
|
): { groupId: string | null; groupRelay: string | null; groupDisplayName: string | null; isGroupDiscussion: boolean } { |
|
const hTag = extractHTagFromEvent(event) |
|
|
|
if (!hTag) { |
|
return { |
|
groupId: null, |
|
groupRelay: null, |
|
groupDisplayName: null, |
|
isGroupDiscussion: false |
|
} |
|
} |
|
|
|
const { groupId, groupRelay, fullIdentifier } = parseGroupIdentifier(hTag, relaySources) |
|
const groupDisplayName = buildGroupDisplayName(groupId, groupRelay) |
|
|
|
return { |
|
groupId, |
|
groupRelay, |
|
groupDisplayName, |
|
isGroupDiscussion: true |
|
} |
|
} |
|
|
|
|