Browse Source

Handle wiki markup links

master
Silberengel 2 weeks ago
parent
commit
ff8f3c47a9
  1. 48
      src/converters/to-asciidoc.ts
  2. 20
      src/detector.ts
  3. 1
      src/types.ts

48
src/converters/to-asciidoc.ts

@ -27,6 +27,10 @@ export function convertToAsciidoc( @@ -27,6 +27,10 @@ export function convertToAsciidoc(
});
break;
case ContentFormat.Wikipedia:
asciidoc = convertWikipediaToAsciidoc(content);
break;
case ContentFormat.Markdown:
asciidoc = convertMarkdownToAsciidoc(content);
break;
@ -52,6 +56,50 @@ export function convertToAsciidoc( @@ -52,6 +56,50 @@ export function convertToAsciidoc(
return asciidoc;
}
/**
* Converts Wikipedia markup to AsciiDoc format
* Handles Wikipedia-style headings, links, and formatting
*/
function convertWikipediaToAsciidoc(content: string): string {
let asciidoc = content.replace(/\\n/g, '\n');
// Convert Wikipedia headings: == Heading == to AsciiDoc == Heading
// Wikipedia uses == for level 2, === for level 3, etc.
// AsciiDoc uses = for title, == for level 1, === for level 2, etc.
// So Wikipedia level 2 (==) maps to AsciiDoc level 1 (==)
asciidoc = asciidoc.replace(/^(=+)\s+(.+?)\s+\1$/gm, (match, equals, heading) => {
const level = equals.length - 1; // Count = signs, subtract 1 for AsciiDoc mapping
const asciidocEquals = '='.repeat(level + 1); // AsciiDoc uses one more = for same level
return `${asciidocEquals} ${heading.trim()}`;
});
// Convert Wikipedia bold: ''text'' to AsciiDoc *text*
asciidoc = asciidoc.replace(/''([^']+)''/g, '*$1*');
// Convert Wikipedia italic: 'text' to AsciiDoc _text_
// Be careful not to match apostrophes in words
asciidoc = asciidoc.replace(/(^|[^'])'([^']+)'([^']|$)/g, '$1_$2_$3');
// Convert Wikipedia links: [[Page]] or [[Page|Display]] to wikilinks
// These will be processed by processWikilinks later, but we need to ensure
// they're in the right format. Wikipedia links are already in [[...]] format
// which matches our wikilink format, so they should work as-is.
// Convert Wikipedia external links: [URL text] to AsciiDoc link:URL[text]
asciidoc = asciidoc.replace(/\[(https?:\/\/[^\s\]]+)\s+([^\]]+)\]/g, 'link:$1[$2]');
asciidoc = asciidoc.replace(/\[(https?:\/\/[^\s\]]+)\]/g, 'link:$1[$1]');
// Convert Wikipedia lists (they use * or # similar to Markdown)
// This is handled similarly to Markdown, so we can reuse that logic
// But Wikipedia also uses : for definition lists and ; for term lists
// For now, we'll handle basic lists and let AsciiDoc handle the rest
// Convert horizontal rules: ---- to AsciiDoc '''
asciidoc = asciidoc.replace(/^----+$/gm, "'''");
return asciidoc;
}
/**
* Converts Markdown to AsciiDoc format
* Based on jumble's conversion patterns

20
src/detector.ts

@ -27,6 +27,21 @@ export function detectFormat(content: string): ContentFormat { @@ -27,6 +27,21 @@ export function detectFormat(content: string): ContentFormat {
}
}
// Check for Wikipedia markup indicators (== Heading == format)
const wikipediaIndicators = [
/^==+\s+.+?\s+==+$/m, // Wikipedia headings: == Heading ==
/\[\[[^\]]+\]\]/, // Wikipedia links: [[Page]]
/''[^']+''/, // Wikipedia bold: ''text''
/'[^']+'/, // Wikipedia italic: 'text'
];
let wikipediaScore = 0;
for (const indicator of wikipediaIndicators) {
if (indicator.test(content)) {
wikipediaScore++;
}
}
// Check for Markdown indicators (more specific patterns to avoid false positives)
const markdownIndicators = [
/^#{1,6}\s+/m, // Heading at start of line
@ -45,7 +60,10 @@ export function detectFormat(content: string): ContentFormat { @@ -45,7 +60,10 @@ export function detectFormat(content: string): ContentFormat {
}
// Determine format based on scores
if (asciidocScore > markdownScore && asciidocScore >= 2) {
// Wikipedia format takes precedence if detected (it's more specific)
if (wikipediaScore > 0 && wikipediaScore >= 2) {
return ContentFormat.Wikipedia;
} else if (asciidocScore > markdownScore && asciidocScore >= 2) {
return ContentFormat.AsciiDoc;
} else if (markdownScore > 0) {
return ContentFormat.Markdown;

1
src/types.ts

@ -68,5 +68,6 @@ export enum ContentFormat { @@ -68,5 +68,6 @@ export enum ContentFormat {
Unknown = 'unknown',
AsciiDoc = 'asciidoc',
Markdown = 'markdown',
Wikipedia = 'wikipedia',
Plain = 'plain'
}

Loading…
Cancel
Save