From ff8f3c47a9e90e60850f52b1a2c8fca8d322556e Mon Sep 17 00:00:00 2001 From: Silberengel Date: Tue, 3 Mar 2026 21:43:54 +0100 Subject: [PATCH] Handle wiki markup links --- src/converters/to-asciidoc.ts | 48 +++++++++++++++++++++++++++++++++++ src/detector.ts | 20 ++++++++++++++- src/types.ts | 1 + 3 files changed, 68 insertions(+), 1 deletion(-) diff --git a/src/converters/to-asciidoc.ts b/src/converters/to-asciidoc.ts index 78e082f..70da96a 100644 --- a/src/converters/to-asciidoc.ts +++ b/src/converters/to-asciidoc.ts @@ -27,6 +27,10 @@ export function convertToAsciidoc( }); break; + case ContentFormat.Wikipedia: + asciidoc = convertWikipediaToAsciidoc(content); + break; + case ContentFormat.Markdown: asciidoc = convertMarkdownToAsciidoc(content); break; @@ -52,6 +56,50 @@ export function convertToAsciidoc( return asciidoc; } +/** + * Converts Wikipedia markup to AsciiDoc format + * Handles Wikipedia-style headings, links, and formatting + */ +function convertWikipediaToAsciidoc(content: string): string { + let asciidoc = content.replace(/\\n/g, '\n'); + + // Convert Wikipedia headings: == Heading == to AsciiDoc == Heading + // Wikipedia uses == for level 2, === for level 3, etc. + // AsciiDoc uses = for title, == for level 1, === for level 2, etc. + // So Wikipedia level 2 (==) maps to AsciiDoc level 1 (==) + asciidoc = asciidoc.replace(/^(=+)\s+(.+?)\s+\1$/gm, (match, equals, heading) => { + const level = equals.length - 1; // Count = signs, subtract 1 for AsciiDoc mapping + const asciidocEquals = '='.repeat(level + 1); // AsciiDoc uses one more = for same level + return `${asciidocEquals} ${heading.trim()}`; + }); + + // Convert Wikipedia bold: ''text'' to AsciiDoc *text* + asciidoc = asciidoc.replace(/''([^']+)''/g, '*$1*'); + + // Convert Wikipedia italic: 'text' to AsciiDoc _text_ + // Be careful not to match apostrophes in words + asciidoc = asciidoc.replace(/(^|[^'])'([^']+)'([^']|$)/g, '$1_$2_$3'); + + // Convert Wikipedia links: [[Page]] or [[Page|Display]] to wikilinks + // These will be processed by processWikilinks later, but we need to ensure + // they're in the right format. Wikipedia links are already in [[...]] format + // which matches our wikilink format, so they should work as-is. + + // Convert Wikipedia external links: [URL text] to AsciiDoc link:URL[text] + asciidoc = asciidoc.replace(/\[(https?:\/\/[^\s\]]+)\s+([^\]]+)\]/g, 'link:$1[$2]'); + asciidoc = asciidoc.replace(/\[(https?:\/\/[^\s\]]+)\]/g, 'link:$1[$1]'); + + // Convert Wikipedia lists (they use * or # similar to Markdown) + // This is handled similarly to Markdown, so we can reuse that logic + // But Wikipedia also uses : for definition lists and ; for term lists + // For now, we'll handle basic lists and let AsciiDoc handle the rest + + // Convert horizontal rules: ---- to AsciiDoc ''' + asciidoc = asciidoc.replace(/^----+$/gm, "'''"); + + return asciidoc; +} + /** * Converts Markdown to AsciiDoc format * Based on jumble's conversion patterns diff --git a/src/detector.ts b/src/detector.ts index 3374e12..3b56a35 100644 --- a/src/detector.ts +++ b/src/detector.ts @@ -27,6 +27,21 @@ export function detectFormat(content: string): ContentFormat { } } + // Check for Wikipedia markup indicators (== Heading == format) + const wikipediaIndicators = [ + /^==+\s+.+?\s+==+$/m, // Wikipedia headings: == Heading == + /\[\[[^\]]+\]\]/, // Wikipedia links: [[Page]] + /''[^']+''/, // Wikipedia bold: ''text'' + /'[^']+'/, // Wikipedia italic: 'text' + ]; + + let wikipediaScore = 0; + for (const indicator of wikipediaIndicators) { + if (indicator.test(content)) { + wikipediaScore++; + } + } + // Check for Markdown indicators (more specific patterns to avoid false positives) const markdownIndicators = [ /^#{1,6}\s+/m, // Heading at start of line @@ -45,7 +60,10 @@ export function detectFormat(content: string): ContentFormat { } // Determine format based on scores - if (asciidocScore > markdownScore && asciidocScore >= 2) { + // Wikipedia format takes precedence if detected (it's more specific) + if (wikipediaScore > 0 && wikipediaScore >= 2) { + return ContentFormat.Wikipedia; + } else if (asciidocScore > markdownScore && asciidocScore >= 2) { return ContentFormat.AsciiDoc; } else if (markdownScore > 0) { return ContentFormat.Markdown; diff --git a/src/types.ts b/src/types.ts index 8fcf337..da1b350 100644 --- a/src/types.ts +++ b/src/types.ts @@ -68,5 +68,6 @@ export enum ContentFormat { Unknown = 'unknown', AsciiDoc = 'asciidoc', Markdown = 'markdown', + Wikipedia = 'wikipedia', Plain = 'plain' }