Compare commits
No commits in common. '3498f764d450f78d1c4a1b67e084eb10ff29d7eb' and 'ff8f3c47a9e90e60850f52b1a2c8fca8d322556e' have entirely different histories.
3498f764d4
...
ff8f3c47a9
23 changed files with 1443 additions and 2556 deletions
@ -1,388 +0,0 @@ |
|||||||
= AsciiDoc Test Document |
|
||||||
Kismet Lee |
|
||||||
2.9, October 31, 2021: Fall incarnation |
|
||||||
:description: Test description |
|
||||||
:author: Kismet Lee |
|
||||||
:date: 2021-10-31 |
|
||||||
:version: 2.9 |
|
||||||
:status: Draft |
|
||||||
:keywords: AsciiDoc, Test, Document |
|
||||||
:category: Test |
|
||||||
:language: English |
|
||||||
|
|
||||||
== Bullet list |
|
||||||
|
|
||||||
This is a test unordered list with mixed bullets: |
|
||||||
|
|
||||||
* First item with a number 2. in it |
|
||||||
* Second item |
|
||||||
* Third item |
|
||||||
** Indented item |
|
||||||
** Indented item |
|
||||||
* Fourth item |
|
||||||
|
|
||||||
Another unordered list: |
|
||||||
|
|
||||||
* 1st item |
|
||||||
* 2nd item |
|
||||||
* third item containing _italic_ text |
|
||||||
** indented item |
|
||||||
** second indented item |
|
||||||
* fourth item |
|
||||||
|
|
||||||
This is a test ordered list with indented items: |
|
||||||
|
|
||||||
. First item |
|
||||||
. Second item |
|
||||||
. Third item |
|
||||||
.. Indented item |
|
||||||
.. Indented item |
|
||||||
. Fourth item |
|
||||||
|
|
||||||
Ordered list where everything has no number: |
|
||||||
|
|
||||||
. First item |
|
||||||
. Second item |
|
||||||
. Third item |
|
||||||
. Fourth item |
|
||||||
|
|
||||||
This is a mixed list with indented items: |
|
||||||
|
|
||||||
. First item |
|
||||||
. Second item |
|
||||||
. Third item |
|
||||||
* Indented item |
|
||||||
* Indented item |
|
||||||
. Fourth item |
|
||||||
|
|
||||||
This is another mixed list with indented items: |
|
||||||
|
|
||||||
* First item |
|
||||||
* Second item |
|
||||||
* Third item |
|
||||||
. Indented item |
|
||||||
. Indented item |
|
||||||
* Fourth item |
|
||||||
|
|
||||||
== Headers |
|
||||||
|
|
||||||
=== Third-level header |
|
||||||
|
|
||||||
==== Fourth-level header |
|
||||||
|
|
||||||
===== Fifth-level header |
|
||||||
|
|
||||||
====== Sixth-level header |
|
||||||
|
|
||||||
== Media and Links |
|
||||||
|
|
||||||
=== Nostr address |
|
||||||
|
|
||||||
This should be ignored and rendered as plaintext: naddr1qvzqqqr4gupzplfq3m5v3u5r0q9f255fdeyz8nyac6lagssx8zy4wugxjs8ajf7pqyghwumn8ghj7mn0wd68ytnvv9hxgtcqy4sj6ar9wd6xv6tvv5kkvmmj94kkzuntv3hhwm3dvfuj6enyxgcrset98p3nsve2v5l |
|
||||||
|
|
||||||
This is also plaintext: |
|
||||||
|
|
||||||
npub1gv069u6q7zkl393ad47xutpqmyfj0rrfrlnqnlfc2ld38k8nnl4st9wa6q |
|
||||||
|
|
||||||
These should be turned into links: |
|
||||||
|
|
||||||
nostr:naddr1qvzqqqr4gupzplfq3m5v3u5r0q9f255fdeyz8nyac6lagssx8zy4wugxjs8ajf7pqyghwumn8ghj7mn0wd68ytnvv9hxgtcqy4sj6ar9wd6xv6tvv5kkvmmj94kkzuntv3hhwm3dvfuj6enyxgcrset98p3nsve2v5l |
|
||||||
|
|
||||||
nostr:npub1l5sga6xg72phsz5422ykujprejwud075ggrr3z2hwyrfgr7eylqstegx9z |
|
||||||
|
|
||||||
nostr:nevent1qvzqqqqqqypzp382htsmu08k277ps40wqhnfm60st89h5pvjyutghq9cjasuh38qqythwumn8ghj7un9d3shjtnswf5k6ctv9ehx2ap0qqsysletg3lqnl4uy59xsj4rp9rgw67wg23l827f4uvn5ckn20fuxcq45d8pj |
|
||||||
|
|
||||||
nostr:nprofile1qqsxhedgkuneycxpcdjlg6tgtxdy8gurdz64nq2h0flc288a0jag98qguy3nh |
|
||||||
|
|
||||||
nostr:note1txyefcha2xt3pgungx4k6j077dsteyef6hzpyuuku00s4h0eymzq4k33yg |
|
||||||
|
|
||||||
=== Hashtag |
|
||||||
|
|
||||||
#testhashtag at the start of the line and #inlinehashtag in the middle |
|
||||||
|
|
||||||
=== Wikilinks |
|
||||||
|
|
||||||
[[NKBIP-01|Specification]] and [[mirepoix]] |
|
||||||
|
|
||||||
=== URL |
|
||||||
|
|
||||||
https://www.welt.de/politik/ausland/article69a7ca00ad41f3cd65a1bc63/iran-drohte-jedes-schiff-zu-verbrennen-trump-will-oel-tanker-durch-strasse-von-hormus-eskortieren.html |
|
||||||
|
|
||||||
link:https://www.welt.de/politik/ausland/article69a7ca00ad41f3cd65a1bc63/iran-drohte-jedes-schiff-zu-verbrennen-trump-will-oel-tanker-durch-strasse-von-hormus-eskortieren.html[Welt Online link] |
|
||||||
|
|
||||||
this should render as plaintext: `http://www.example.com` |
|
||||||
|
|
||||||
this should be a hyperlink to the http URL with the same address link:https://theforest.nostr1.com[wss://theforest.nostr1.com] |
|
||||||
|
|
||||||
=== Images |
|
||||||
|
|
||||||
https://blog.ronin.cloud/content/images/size/w2000/2022/02/markdown.png |
|
||||||
|
|
||||||
image::https://blog.ronin.cloud/content/images/size/w2000/2022/02/markdown.png[width=400] |
|
||||||
|
|
||||||
=== Media |
|
||||||
|
|
||||||
==== YouTube |
|
||||||
|
|
||||||
Normal |
|
||||||
|
|
||||||
https://www.youtube.com/watch?v=KGIAS0cslSU |
|
||||||
|
|
||||||
https://youtu.be/KGIAS0cslSU |
|
||||||
|
|
||||||
video::KGIAS0cslSU[youtube] |
|
||||||
|
|
||||||
Shorts |
|
||||||
|
|
||||||
https://www.youtube.com/shorts/s-BQhXdCs8Y |
|
||||||
|
|
||||||
video::s-BQhXdCs8Y[youtube] |
|
||||||
|
|
||||||
==== Spotify |
|
||||||
|
|
||||||
https://open.spotify.com/episode/1GSZFA8vWltPyxYkArdRKx |
|
||||||
|
|
||||||
link:https://open.spotify.com/episode/1GSZFA8vWltPyxYkArdRKx[] |
|
||||||
|
|
||||||
==== Audio |
|
||||||
|
|
||||||
https://media.blubrry.com/takeituneasy/ins.blubrry.com/takeituneasy/lex_ai_rick_beato.mp3 |
|
||||||
|
|
||||||
audio::https://media.blubrry.com/takeituneasy/ins.blubrry.com/takeituneasy/lex_ai_rick_beato.mp3[] |
|
||||||
|
|
||||||
==== Video |
|
||||||
|
|
||||||
https://v.nostr.build/MTjaYib4upQuf8zn.mp4 |
|
||||||
|
|
||||||
video::https://v.nostr.build/MTjaYib4upQuf8zn.mp4[] |
|
||||||
|
|
||||||
== Tables |
|
||||||
|
|
||||||
=== Orderly |
|
||||||
|
|
||||||
[cols="1,2"] |
|
||||||
|=== |
|
||||||
|Syntax|Description |
|
||||||
|
|
||||||
|Header |
|
||||||
|Title |
|
||||||
|
|
||||||
|Paragraph |
|
||||||
|Text |
|
||||||
|=== |
|
||||||
|
|
||||||
=== Unorderly |
|
||||||
|
|
||||||
[cols="1,2"] |
|
||||||
|=== |
|
||||||
|Syntax|Description |
|
||||||
|
|
||||||
|Header |
|
||||||
|Title |
|
||||||
|
|
||||||
|Paragraph |
|
||||||
|Text |
|
||||||
|=== |
|
||||||
|
|
||||||
=== With alignment |
|
||||||
|
|
||||||
[cols="<,^,>"] |
|
||||||
|=== |
|
||||||
|Syntax|Description|Test Text |
|
||||||
|
|
||||||
|Header |
|
||||||
|Title |
|
||||||
|Here's this |
|
||||||
|
|
||||||
|Paragraph |
|
||||||
|Text |
|
||||||
|And more |
|
||||||
|=== |
|
||||||
|
|
||||||
== Code blocks |
|
||||||
|
|
||||||
=== json |
|
||||||
|
|
||||||
[source,json] |
|
||||||
---- |
|
||||||
{ |
|
||||||
"id": "<event_id>", |
|
||||||
"pubkey": "<event_originator_pubkey>", |
|
||||||
"created_at": 1725087283, |
|
||||||
"kind": 30040, |
|
||||||
"tags": [ |
|
||||||
["d", "aesop's-fables-by-aesop"], |
|
||||||
["title", "Aesop's Fables"], |
|
||||||
["author", "Aesop"], |
|
||||||
], |
|
||||||
"sig": "<event_signature>" |
|
||||||
} |
|
||||||
---- |
|
||||||
|
|
||||||
=== typescript |
|
||||||
|
|
||||||
[source,typescript] |
|
||||||
---- |
|
||||||
/** |
|
||||||
* Get Nostr identifier type |
|
||||||
*/ |
|
||||||
function getNostrType(id: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'note' | null { |
|
||||||
if (id.startsWith('npub')) return 'npub'; |
|
||||||
if (id.startsWith('nprofile')) return 'nprofile'; |
|
||||||
if (id.startsWith('nevent')) return 'nevent'; |
|
||||||
if (id.startsWith('naddr')) return 'naddr'; |
|
||||||
if (id.startsWith('note')) return 'note'; |
|
||||||
return null; |
|
||||||
} |
|
||||||
---- |
|
||||||
|
|
||||||
=== shell |
|
||||||
|
|
||||||
[source,shell] |
|
||||||
---- |
|
||||||
|
|
||||||
mkdir new_directory |
|
||||||
cp source.txt destination.txt |
|
||||||
|
|
||||||
---- |
|
||||||
|
|
||||||
=== LaTeX |
|
||||||
|
|
||||||
[source,latex] |
|
||||||
---- |
|
||||||
$$ |
|
||||||
M = |
|
||||||
\begin{bmatrix} |
|
||||||
\frac{5}{6} & \frac{1}{6} & 0 \\[0.3em] |
|
||||||
\frac{5}{6} & 0 & \frac{1}{6} \\[0.3em] |
|
||||||
0 & \frac{5}{6} & \frac{1}{6} |
|
||||||
\end{bmatrix} |
|
||||||
$$ |
|
||||||
---- |
|
||||||
|
|
||||||
[source,latex] |
|
||||||
---- |
|
||||||
$$ |
|
||||||
f(x)= |
|
||||||
\begin{cases} |
|
||||||
1/d_{ij} & \quad \text{when $d_{ij} \leq 160$}\\ |
|
||||||
0 & \quad \text{otherwise} |
|
||||||
\end{cases} |
|
||||||
$$ |
|
||||||
---- |
|
||||||
|
|
||||||
=== ABC Notation |
|
||||||
|
|
||||||
[abc] |
|
||||||
---- |
|
||||||
X:1 |
|
||||||
T:Ohne Titel |
|
||||||
C:Aufgezeichnet 1784 |
|
||||||
A:Seibis nahe Lichtenberg in Oberfranken |
|
||||||
S:Handschrift, bezeichnet und datiert: "Heinrich Nicol Philipp zu Seibis den 30 Junius 1784" |
|
||||||
M:4/4 |
|
||||||
L:1/4 |
|
||||||
K:D |
|
||||||
dd d2 | ee e2 | fg ad | cB cA |\ |
|
||||||
dd d2 | ee e2 | fg ad | ed/c/ d2 :| |
|
||||||
|:\ |
|
||||||
fg ad | cB cA | fg ad | cB cA |\ |
|
||||||
dd d2 | ee e2 | fg ad | ed/c/ d2 :| |
|
||||||
---- |
|
||||||
|
|
||||||
=== PlantUML |
|
||||||
|
|
||||||
[plantuml] |
|
||||||
---- |
|
||||||
@startuml |
|
||||||
Alice -> Bob: Authentication Request |
|
||||||
Bob --> Alice: Authentication Response |
|
||||||
@enduml |
|
||||||
---- |
|
||||||
|
|
||||||
=== BPMN |
|
||||||
|
|
||||||
[plantuml] |
|
||||||
---- |
|
||||||
@startbpmn |
|
||||||
start |
|
||||||
:Task 1; |
|
||||||
:Task 2; |
|
||||||
stop |
|
||||||
@endbpmn |
|
||||||
---- |
|
||||||
|
|
||||||
== LaTeX |
|
||||||
|
|
||||||
=== LaTeX in inline-code |
|
||||||
|
|
||||||
`$[ x^n + y^n = z^n \]$` and `$[\sqrt{x^2+1}\]$` and `$\color{blue}{X \sim Normal \; (\mu,\sigma^2)}$` |
|
||||||
|
|
||||||
== Footnotes |
|
||||||
|
|
||||||
Here's a simple footnote,footnote:[This is the first footnote.] and here's a longer one.footnote:[Here's one with multiple paragraphs and code.] |
|
||||||
|
|
||||||
== Anchor links |
|
||||||
|
|
||||||
<<_bullet_list,Link to bullet list section>> |
|
||||||
|
|
||||||
== Formatting |
|
||||||
|
|
||||||
=== Strikethrough |
|
||||||
|
|
||||||
[line-through]#The world is flat.# We now know that the world is round. This should not be ~struck~ through. |
|
||||||
|
|
||||||
=== Bold |
|
||||||
|
|
||||||
This is *bold* text. So is this *bold* text. |
|
||||||
|
|
||||||
=== Italic |
|
||||||
|
|
||||||
This is _italic_ text. So is this _italic_ text. |
|
||||||
|
|
||||||
=== Task List |
|
||||||
|
|
||||||
* [x] Write the press release |
|
||||||
* [ ] Update the website |
|
||||||
* [ ] Contact the media |
|
||||||
|
|
||||||
=== Emoji shortcodes |
|
||||||
|
|
||||||
Gone camping! :tent: Be back soon. |
|
||||||
|
|
||||||
That is so funny! :joy: |
|
||||||
|
|
||||||
=== Marking and highlighting text |
|
||||||
|
|
||||||
I need to highlight these [highlight]#very important words#. |
|
||||||
|
|
||||||
=== Subscript and Superscript |
|
||||||
|
|
||||||
H~2~O |
|
||||||
|
|
||||||
X^2^ |
|
||||||
|
|
||||||
=== Delimiter |
|
||||||
|
|
||||||
based upon a single quote |
|
||||||
|
|
||||||
''' |
|
||||||
|
|
||||||
based upon a dashes |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
=== Quotes |
|
||||||
|
|
||||||
[quote] |
|
||||||
____ |
|
||||||
This is a single line blockequote sdfjsdlfkjasldkfjsdölfkjsdlfkjsadlöfkjsdlöfkjsadölfkjsdlf kjsldfkjsdalkjslkdfjlöskdfjlösdkjfsldkfjsöldkfjlösdkfjalsd kfjlsdkfjlödkfjlaksdfjlkjdfslkjalsdkfjlasdkfj alsdkjflskdfj sdfklj |
|
||||||
____ |
|
||||||
|
|
||||||
[quote] |
|
||||||
____ |
|
||||||
This is a multi line blockequote sdfjsdlfkjasldkfjsdölfkjsdlfkjsadlöfkjsdlöfkjsadölfkjsdlf kjsldfkjsdalkjslkdfjlöskdfjlösdkjfsldkfjsöldkfjlösdkfjalsd kfjlsdkfjlödkfjlaksdfjlkjdfslkjalsdkfjlasdkfj alsdkjflskdfj sdfklj |
|
||||||
This is a multi line blockequote sdfjsdlfkjasldkfjsdölfkjsdlfkjsadlöfkjsdlöfkjsadölfkjsdlf kjsldfkjsdalkjslkdfjlöskdfjlösdkjfsldkfjsöldkfjlösdkfjalsd kfjlsdkfjlödkfjlaksdfjlkjdfslkjalsdkfjlasdkfj alsdkjflskdfj sdfklj |
|
||||||
This is a multi line blockequote sdfjsdlfkjasldkfjsdölfkjsdlfkjsadlöfkjsdlöfkjsadölfkjsdlf kjsldfkjsdalkjslkdfjlöskdfjlösdkjfsldkfjsöldkfjlösdkfjalsd kfjlsdkfjlödkfjlaksdfjlkjdfslkjalsdkfjlasdkfj alsdkjflskdfj sdfklj |
|
||||||
This is a multi line blockequote sdfjsdlfkjasldkfjsdölfkjsdlfkjsadlöfkjsdlöfkjsadölfkjsdlf kjsldfkjsdalkjslkdfjlöskdfjlösdkjfsldkfjsöldkfjlösdkfjalsd kfjlsdkfjlödkfjlaksdfjlkjdfslkjalsdkfjlasdkfj alsdkjflskdfj sdfklj |
|
||||||
____ |
|
||||||
@ -0,0 +1,55 @@ |
|||||||
|
#!/usr/bin/env node
|
||||||
|
|
||||||
|
/** |
||||||
|
* Example usage of gc-parser |
||||||
|
* This can be called from Go or used directly in Node.js |
||||||
|
*/ |
||||||
|
|
||||||
|
const { Parser, defaultOptions } = require('./dist/index.js'); |
||||||
|
|
||||||
|
async function main() { |
||||||
|
// Create parser with default options
|
||||||
|
const opts = defaultOptions(); |
||||||
|
opts.linkBaseURL = process.env.LINK_BASE_URL || 'https://example.com'; |
||||||
|
|
||||||
|
const parser = new Parser(opts); |
||||||
|
|
||||||
|
// Get content from command line argument or stdin
|
||||||
|
let content = ''; |
||||||
|
if (process.argv[2]) { |
||||||
|
content = process.argv[2]; |
||||||
|
} else { |
||||||
|
// Read from stdin
|
||||||
|
const readline = require('readline'); |
||||||
|
const rl = readline.createInterface({ |
||||||
|
input: process.stdin, |
||||||
|
output: process.stdout, |
||||||
|
terminal: false |
||||||
|
}); |
||||||
|
|
||||||
|
for await (const line of rl) { |
||||||
|
content += line + '\n'; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (!content) { |
||||||
|
console.error('No content provided'); |
||||||
|
process.exit(1); |
||||||
|
} |
||||||
|
|
||||||
|
try { |
||||||
|
const result = await parser.process(content); |
||||||
|
|
||||||
|
// Output as JSON for easy parsing
|
||||||
|
console.log(JSON.stringify(result, null, 2)); |
||||||
|
} catch (error) { |
||||||
|
console.error('Error processing content:', error); |
||||||
|
process.exit(1); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (require.main === module) { |
||||||
|
main(); |
||||||
|
} |
||||||
|
|
||||||
|
module.exports = { main }; |
||||||
@ -1,23 +0,0 @@ |
|||||||
module.exports = { |
|
||||||
preset: 'ts-jest', |
|
||||||
testEnvironment: 'node', |
|
||||||
roots: ['<rootDir>/src'], |
|
||||||
testMatch: ['**/__tests__/**/*.test.ts', '**/?(*.)+(spec|test).ts'], |
|
||||||
testPathIgnorePatterns: ['/node_modules/', '/dist/', 'asciidoc.test.ts'], |
|
||||||
transform: { |
|
||||||
'^.+\\.ts$': ['ts-jest', { |
|
||||||
tsconfig: { |
|
||||||
esModuleInterop: true, |
|
||||||
}, |
|
||||||
}], |
|
||||||
'^.+\\.js$': 'babel-jest', |
|
||||||
}, |
|
||||||
moduleFileExtensions: ['ts', 'js', 'json'], |
|
||||||
moduleNameMapper: { |
|
||||||
'^marked$': '<rootDir>/node_modules/marked/lib/marked.umd.js', |
|
||||||
}, |
|
||||||
collectCoverageFrom: [ |
|
||||||
'src/**/*.ts', |
|
||||||
'!src/**/*.d.ts', |
|
||||||
], |
|
||||||
}; |
|
||||||
@ -1,277 +0,0 @@ |
|||||||
--- |
|
||||||
# this is YAML front matter |
|
||||||
author: James Smith |
|
||||||
summary: This is a summary |
|
||||||
topics: list, of, topics |
|
||||||
variable: one |
|
||||||
array: |
|
||||||
- one thing |
|
||||||
- two things |
|
||||||
- several things |
|
||||||
# all of this data is available to our layout |
|
||||||
--- |
|
||||||
|
|
||||||
# Markdown Test Document |
|
||||||
|
|
||||||
## Bullet list |
|
||||||
|
|
||||||
This is a test unordered list with mixed bullets: |
|
||||||
|
|
||||||
* First item with a number 2. in it |
|
||||||
* Second item |
|
||||||
* Third item |
|
||||||
- Indented item |
|
||||||
- Indented item |
|
||||||
* Fourth item |
|
||||||
|
|
||||||
Another unordered list: |
|
||||||
|
|
||||||
- 1st item |
|
||||||
- 2nd item |
|
||||||
- third item containing _italic_ text |
|
||||||
- indented item |
|
||||||
- second indented item |
|
||||||
- fourth item |
|
||||||
|
|
||||||
This is a test ordered list with indented items: |
|
||||||
|
|
||||||
1. First item |
|
||||||
2. Second item |
|
||||||
3. Third item |
|
||||||
1. Indented item |
|
||||||
2. Indented item |
|
||||||
4. Fourth item |
|
||||||
|
|
||||||
Ordered list that is wrongly numbered: |
|
||||||
|
|
||||||
1. First item |
|
||||||
8. Second item |
|
||||||
3. Third item |
|
||||||
5. Fourth item |
|
||||||
|
|
||||||
This is a mixed list with indented items: |
|
||||||
|
|
||||||
1. First item |
|
||||||
2. Second item |
|
||||||
3. Third item |
|
||||||
* Indented item |
|
||||||
* Indented item |
|
||||||
4. Fourth item |
|
||||||
|
|
||||||
This is another mixed list with indented items: |
|
||||||
|
|
||||||
- First item |
|
||||||
- Second item |
|
||||||
- Third item |
|
||||||
1. Indented item |
|
||||||
2. Indented item |
|
||||||
- Fourth item |
|
||||||
|
|
||||||
## Headers |
|
||||||
|
|
||||||
### Third-level header |
|
||||||
|
|
||||||
#### Fourth-level header |
|
||||||
|
|
||||||
##### Fifth-level header |
|
||||||
|
|
||||||
###### Sixth-level header |
|
||||||
|
|
||||||
## Media and Links |
|
||||||
|
|
||||||
### Nostr address |
|
||||||
|
|
||||||
This should be ignored and rendered as plaintext: naddr1qvzqqqr4gupzplfq3m5v3u5r0q9f255fdeyz8nyac6lagssx8zy4wugxjs8ajf7pqyghwumn8ghj7mn0wd68ytnvv9hxgtcqy4sj6ar9wd6xv6tvv5kkvmmj94kkzuntv3hhwm3dvfuj6enyxgcrset98p3nsve2v5l |
|
||||||
|
|
||||||
This is also plaintext: |
|
||||||
|
|
||||||
npub1gv069u6q7zkl393ad47xutpqmyfj0rrfrlnqnlfc2ld38k8nnl4st9wa6q |
|
||||||
|
|
||||||
These should be turned into links: |
|
||||||
|
|
||||||
nostr:naddr1qvzqqqr4gupzplfq3m5v3u5r0q9f255fdeyz8nyac6lagssx8zy4wugxjs8ajf7pqyghwumn8ghj7mn0wd68ytnvv9hxgtcqy4sj6ar9wd6xv6tvv5kkvmmj94kkzuntv3hhwm3dvfuj6enyxgcrset98p3nsve2v5l |
|
||||||
|
|
||||||
nostr:npub1l5sga6xg72phsz5422ykujprejwud075ggrr3z2hwyrfgr7eylqstegx9z |
|
||||||
|
|
||||||
nostr:nevent1qvzqqqqqqypzp382htsmu08k277ps40wqhnfm60st89h5pvjyutghq9cjasuh38qqythwumn8ghj7un9d3shjtnswf5k6ctv9ehx2ap0qqsysletg3lqnl4uy59xsj4rp9rgw67wg23l827f4uvn5ckn20fuxcq45d8pj |
|
||||||
|
|
||||||
nostr:nprofile1qqsxhedgkuneycxpcdjlg6tgtxdy8gurdz64nq2h0flc288a0jag98qguy3nh |
|
||||||
|
|
||||||
nostr:note1txyefcha2xt3pgungx4k6j077dsteyef6hzpyuuku00s4h0eymzq4k33yg |
|
||||||
|
|
||||||
### Hashtag |
|
||||||
|
|
||||||
#testhashtag at the start of the line and #inlinehashtag in the middle |
|
||||||
|
|
||||||
### Wikilinks |
|
||||||
|
|
||||||
[[NKBIP-01|Specification]] and [[mirepoix]] |
|
||||||
|
|
||||||
### URL |
|
||||||
|
|
||||||
https://www.welt.de/politik/ausland/article69a7ca00ad41f3cd65a1bc63/iran-drohte-jedes-schiff-zu-verbrennen-trump-will-oel-tanker-durch-strasse-von-hormus-eskortieren.html |
|
||||||
|
|
||||||
[Welt Online link](https://www.welt.de/politik/ausland/article69a7ca00ad41f3cd65a1bc63/iran-drohte-jedes-schiff-zu-verbrennen-trump-will-oel-tanker-durch-strasse-von-hormus-eskortieren.html) |
|
||||||
|
|
||||||
this should render as plaintext: `http://www.example.com` |
|
||||||
|
|
||||||
this shouild be a hyperlink to the http URL with the same address [wss://theforest.nostr1.com](https://theforest.nostr1.com) |
|
||||||
|
|
||||||
### Images |
|
||||||
|
|
||||||
https://blog.ronin.cloud/content/images/size/w2000/2022/02/markdown.png |
|
||||||
|
|
||||||
 |
|
||||||
|
|
||||||
### Media |
|
||||||
|
|
||||||
#### YouTube |
|
||||||
|
|
||||||
https://youtube.com/shorts/ZWfvChb-i0w |
|
||||||
|
|
||||||
#### Spotify |
|
||||||
|
|
||||||
https://open.spotify.com/episode/1GSZFA8vWltPyxYkArdRKx?si=bq6-az28TcuP596feTkRFQ |
|
||||||
|
|
||||||
#### Audio |
|
||||||
|
|
||||||
https://media.blubrry.com/takeituneasy/ins.blubrry.com/takeituneasy/lex_ai_rick_beato.mp3 |
|
||||||
|
|
||||||
#### Video |
|
||||||
|
|
||||||
https://v.nostr.build/MTjaYib4upQuf8zn.mp4 |
|
||||||
|
|
||||||
## Tables |
|
||||||
|
|
||||||
### Orderly |
|
||||||
|
|
||||||
| Syntax | Description | |
|
||||||
| ----------- | ----------- | |
|
||||||
| Header | Title | |
|
||||||
| Paragraph | Text | |
|
||||||
|
|
||||||
### Unorderly |
|
||||||
|
|
||||||
| Syntax | Description | |
|
||||||
| --- | ----------- | |
|
||||||
| Header | Title | |
|
||||||
| Paragraph | Text | |
|
||||||
|
|
||||||
## Code blocks |
|
||||||
|
|
||||||
### json |
|
||||||
|
|
||||||
```json |
|
||||||
{ |
|
||||||
"id": "<event_id>", |
|
||||||
"pubkey": "<event_originator_pubkey>", |
|
||||||
"created_at": 1725087283, |
|
||||||
"kind": 30040, |
|
||||||
"tags": [ |
|
||||||
["d", "aesop's-fables-by-aesop"], |
|
||||||
["title", "Aesop's Fables"], |
|
||||||
["author", "Aesop"], |
|
||||||
], |
|
||||||
"sig": "<event_signature>" |
|
||||||
} |
|
||||||
``` |
|
||||||
|
|
||||||
### typescript |
|
||||||
|
|
||||||
```typescript |
|
||||||
/** |
|
||||||
* Get Nostr identifier type |
|
||||||
*/ |
|
||||||
function getNostrType(id: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'note' | null { |
|
||||||
if (id.startsWith('npub')) return 'npub'; |
|
||||||
if (id.startsWith('nprofile')) return 'nprofile'; |
|
||||||
if (id.startsWith('nevent')) return 'nevent'; |
|
||||||
if (id.startsWith('naddr')) return 'naddr'; |
|
||||||
if (id.startsWith('note')) return 'note'; |
|
||||||
return null; |
|
||||||
} |
|
||||||
``` |
|
||||||
|
|
||||||
### shell |
|
||||||
|
|
||||||
```shell |
|
||||||
|
|
||||||
mkdir new_directory |
|
||||||
cp source.txt destination.txt |
|
||||||
|
|
||||||
``` |
|
||||||
|
|
||||||
### LaTeX |
|
||||||
|
|
||||||
```latex |
|
||||||
$$ |
|
||||||
M = |
|
||||||
\begin{bmatrix} |
|
||||||
\frac{5}{6} & \frac{1}{6} & 0 \\[0.3em] |
|
||||||
\frac{5}{6} & 0 & \frac{1}{6} \\[0.3em] |
|
||||||
0 & \frac{5}{6} & \frac{1}{6} |
|
||||||
\end{bmatrix} |
|
||||||
$$ |
|
||||||
``` |
|
||||||
|
|
||||||
## LateX |
|
||||||
|
|
||||||
### LaTex in inline-code |
|
||||||
|
|
||||||
`$[ x^n + y^n = z^n \]$` and `$[\sqrt{x^2+1}\]$` and `$\color{blue}{X \sim Normal \; (\mu,\sigma^2)}$` |
|
||||||
|
|
||||||
## Footnotes |
|
||||||
|
|
||||||
Here's a simple footnote,[^1] and here's a longer one.[^bignote] |
|
||||||
|
|
||||||
[^1]: This is the first footnote. |
|
||||||
|
|
||||||
[^bignote]: Here's one with multiple paragraphs and code. |
|
||||||
|
|
||||||
## Anchor links |
|
||||||
|
|
||||||
[Link to bullet list section](#bullet-list) |
|
||||||
|
|
||||||
## Formatting |
|
||||||
|
|
||||||
### Strikethrough |
|
||||||
|
|
||||||
~~The world is flat.~~ We now know that the world is round. |
|
||||||
|
|
||||||
### Bold |
|
||||||
|
|
||||||
This is *italic* text. So is this **bold** text. |
|
||||||
|
|
||||||
### Task List |
|
||||||
|
|
||||||
- [x] Write the press release |
|
||||||
- [ ] Update the website |
|
||||||
- [ ] Contact the media |
|
||||||
|
|
||||||
### Emoji shortcodes |
|
||||||
|
|
||||||
Gone camping! :tent: Be back soon. |
|
||||||
|
|
||||||
That is so funny! :joy: |
|
||||||
|
|
||||||
### Subscript and Superscript |
|
||||||
|
|
||||||
X^2^ |
|
||||||
|
|
||||||
### Delimiter |
|
||||||
|
|
||||||
based upon a - |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
based upon a * |
|
||||||
|
|
||||||
*** |
|
||||||
|
|
||||||
### Quotes |
|
||||||
|
|
||||||
> This is a single line blockequote sdfjsdlfkjasldkfjsdölfkjsdlfkjsadlöfkjsdlöfkjsadölfkjsdlf kjsldfkjsdalkjslkdfjlöskdfjlösdkjfsldkfjsöldkfjlösdkfjalsd kfjlsdkfjlödkfjlaksdfjlkjdfslkjalsdkfjlasdkfj alsdkjflskdfj sdfklj |
|
||||||
|
|
||||||
> This is a multi line blockequote sdfjsdlfkjasldkfjsdölfkjsdlfkjsadlöfkjsdlöfkjsadölfkjsdlf kjsldfkjsdalkjslkdfjlöskdfjlösdkjfsldkfjsöldkfjlösdkfjalsd kfjlsdkfjlödkfjlaksdfjlkjdfslkjalsdkfjlasdkfj alsdkjflskdfj sdfklj |
|
||||||
> This is a multi line blockequote sdfjsdlfkjasldkfjsdölfkjsdlfkjsadlöfkjsdlöfkjsadölfkjsdlf kjsldfkjsdalkjslkdfjlöskdfjlösdkjfsldkfjsöldkfjlösdkfjalsd kfjlsdkfjlödkfjlaksdfjlkjdfslkjalsdkfjlasdkfj alsdkjflskdfj sdfklj |
|
||||||
> This is a multi line blockequote sdfjsdlfkjasldkfjsdölfkjsdlfkjsadlöfkjsdlöfkjsadölfkjsdlf kjsldfkjsdalkjslkdfjlöskdfjlösdkjfsldkfjsöldkfjlösdkfjalsd kfjlsdkfjlödkfjlaksdfjlkjdfslkjalsdkfjlasdkfj alsdkjflskdfj sdfklj |
|
||||||
@ -1,353 +0,0 @@ |
|||||||
import { Parser } from '../parser'; |
|
||||||
import { readFileSync, writeFileSync, mkdirSync } from 'fs'; |
|
||||||
import { join } from 'path'; |
|
||||||
|
|
||||||
/** |
|
||||||
* Simple test runner for AsciiDoc tests (separate from Jest due to Opal compatibility issues) |
|
||||||
*/ |
|
||||||
async function runAsciiDocTests() { |
|
||||||
console.log('Running AsciiDoc tests...\n'); |
|
||||||
|
|
||||||
const asciidocContent = readFileSync(join(__dirname, '../../asciidoc_testdoc.adoc'), 'utf-8'); |
|
||||||
const parser = new Parser({ |
|
||||||
linkBaseURL: 'https://example.com', |
|
||||||
enableNostrAddresses: true, |
|
||||||
wikilinkUrl: '/events?d={dtag}', |
|
||||||
hashtagUrl: '/hashtag/{topic}' |
|
||||||
}); |
|
||||||
|
|
||||||
let passed = 0; |
|
||||||
let failed = 0; |
|
||||||
const failures: string[] = []; |
|
||||||
|
|
||||||
const testPromises: Promise<void>[] = []; |
|
||||||
|
|
||||||
function test(name: string, fn: () => void | Promise<void>) { |
|
||||||
const testPromise = (async () => { |
|
||||||
try { |
|
||||||
const result = fn(); |
|
||||||
if (result instanceof Promise) { |
|
||||||
await result; |
|
||||||
} |
|
||||||
passed++; |
|
||||||
console.log(`✓ ${name}`); |
|
||||||
} catch (error: any) { |
|
||||||
failed++; |
|
||||||
failures.push(`${name}: ${error.message}`); |
|
||||||
console.error(`✗ ${name}: ${error.message}`); |
|
||||||
} |
|
||||||
})(); |
|
||||||
testPromises.push(testPromise); |
|
||||||
} |
|
||||||
|
|
||||||
function expect(actual: any) { |
|
||||||
return { |
|
||||||
toBeDefined: () => { |
|
||||||
if (actual === undefined || actual === null) { |
|
||||||
throw new Error(`Expected value to be defined, but got ${actual}`); |
|
||||||
} |
|
||||||
}, |
|
||||||
toBe: (expected: any) => { |
|
||||||
if (actual !== expected) { |
|
||||||
throw new Error(`Expected ${expected}, but got ${actual}`); |
|
||||||
} |
|
||||||
}, |
|
||||||
toContain: (substring: string) => { |
|
||||||
if (typeof actual === 'string' && !actual.includes(substring)) { |
|
||||||
throw new Error(`Expected string to contain "${substring}"`); |
|
||||||
} |
|
||||||
}, |
|
||||||
toMatch: (regex: RegExp) => { |
|
||||||
if (typeof actual === 'string' && !regex.test(actual)) { |
|
||||||
throw new Error(`Expected string to match ${regex}`); |
|
||||||
} |
|
||||||
}, |
|
||||||
toHaveProperty: (prop: string) => { |
|
||||||
if (!(prop in actual)) { |
|
||||||
throw new Error(`Expected object to have property "${prop}"`); |
|
||||||
} |
|
||||||
}, |
|
||||||
toBeGreaterThan: (value: number) => { |
|
||||||
if (typeof actual !== 'number' || actual <= value) { |
|
||||||
throw new Error(`Expected ${actual} to be greater than ${value}`); |
|
||||||
} |
|
||||||
}, |
|
||||||
length: { |
|
||||||
toBeGreaterThan: (value: number) => { |
|
||||||
if (!Array.isArray(actual) || actual.length <= value) { |
|
||||||
throw new Error(`Expected array length to be greater than ${value}, but got ${actual.length}`); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
} |
|
||||||
|
|
||||||
// Run tests
|
|
||||||
const result = await parser.process(asciidocContent); |
|
||||||
|
|
||||||
// Write HTML output to file for inspection
|
|
||||||
const outputDir = join(__dirname, '../../test-output'); |
|
||||||
try { |
|
||||||
mkdirSync(outputDir, { recursive: true }); |
|
||||||
} catch (e) { |
|
||||||
// Directory might already exist
|
|
||||||
} |
|
||||||
|
|
||||||
const htmlOutput = `<!DOCTYPE html>
|
|
||||||
<html lang="en"> |
|
||||||
<head> |
|
||||||
<meta charset="UTF-8"> |
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
||||||
<meta name="referrer" content="strict-origin-when-cross-origin"> |
|
||||||
<meta http-equiv="Content-Security-Policy" content="default-src 'self'; script-src 'unsafe-inline' 'unsafe-eval' https://www.youtube.com https://s.ytimg.com https://www.gstatic.com https://*.googlevideo.com; frame-src https://www.youtube.com https://youtube.com https://open.spotify.com https://*.googlevideo.com; style-src 'unsafe-inline'; img-src 'self' data: https:; media-src 'self' https:; connect-src https:; child-src https://www.youtube.com https://youtube.com;"> |
|
||||||
<title>AsciiDoc Test Output</title> |
|
||||||
<style> |
|
||||||
body { font-family: sans-serif; max-width: 1200px; margin: 0 auto; padding: 20px; line-height: 1.6; } |
|
||||||
.hashtag { color: #1da1f2; font-weight: 500; } |
|
||||||
.wikilink { color: #0066cc; text-decoration: underline; } |
|
||||||
.nostr-link { color: #8b5cf6; text-decoration: underline; } |
|
||||||
pre { background: #f5f5f5; padding: 10px; border-radius: 4px; overflow-x: auto; } |
|
||||||
code { background: #f5f5f5; padding: 2px 4px; border-radius: 3px; font-family: 'Courier New', monospace; } |
|
||||||
.line-through { text-decoration: line-through; } |
|
||||||
.highlight { background-color: #ffeb3b; padding: 2px 4px; border-radius: 3px; } |
|
||||||
.bare-image { max-width: 100%; width: auto; height: auto; margin: 10px 0; display: block; } |
|
||||||
.bare-video, .bare-audio { width: 100%; max-width: 800px; margin: 10px 0; display: block; } |
|
||||||
.youtube-embed, .spotify-embed { max-width: 100%; margin: 10px 0; border-radius: 8px; display: block; } |
|
||||||
.youtube-embed { width: 100%; max-width: 640px; height: auto; aspect-ratio: 16/9; border: 0; display: block; } |
|
||||||
.spotify-embed { width: 100%; max-width: 800px; } |
|
||||||
/* Table styles */ |
|
||||||
table { border-collapse: collapse; width: 100%; margin: 1em 0; } |
|
||||||
table thead { background-color: #f2f2f2; } |
|
||||||
table th { font-weight: bold; padding: 8px; border: 1px solid #ddd; background-color: #f2f2f2; } |
|
||||||
table td { padding: 8px; border: 1px solid #ddd; } |
|
||||||
/* Alignment classes - AsciiDoc uses halign-* and valign-* classes */ |
|
||||||
.halign-left { text-align: left !important; } |
|
||||||
.halign-center { text-align: center !important; } |
|
||||||
.halign-right { text-align: right !important; } |
|
||||||
.valign-top { vertical-align: top !important; } |
|
||||||
.valign-middle { vertical-align: middle !important; } |
|
||||||
.valign-bottom { vertical-align: bottom !important; } |
|
||||||
/* Also handle tableblock classes */ |
|
||||||
.tableblock.halign-left { text-align: left !important; } |
|
||||||
.tableblock.halign-center { text-align: center !important; } |
|
||||||
.tableblock.halign-right { text-align: right !important; } |
|
||||||
.tableblock.valign-top { vertical-align: top !important; } |
|
||||||
.tableblock.valign-middle { vertical-align: middle !important; } |
|
||||||
.tableblock.valign-bottom { vertical-align: bottom !important; } |
|
||||||
/* Task list styles */ |
|
||||||
.checklist { list-style: none; padding-left: 0; } |
|
||||||
.checklist li { padding-left: 1.5em; position: relative; margin: 0.5em 0; } |
|
||||||
.checklist li i.fa-check-square-o::before { content: "☑ "; font-style: normal; font-family: sans-serif; } |
|
||||||
.checklist li i.fa-square-o::before { content: "☐ "; font-style: normal; font-family: sans-serif; } |
|
||||||
.checklist li i { position: absolute; left: 0; font-style: normal; } |
|
||||||
/* Fallback if Font Awesome doesn't load */ |
|
||||||
.checklist li i.fa-check-square-o { display: inline-block; width: 1em; } |
|
||||||
.checklist li i.fa-check-square-o:before { content: "☑"; } |
|
||||||
.checklist li i.fa-square-o { display: inline-block; width: 1em; } |
|
||||||
.checklist li i.fa-square-o:before { content: "☐"; } |
|
||||||
/* AsciiDoc specific styles */ |
|
||||||
.sect1, .sect2, .sect3, .sect4, .sect5 { margin-top: 1.5em; margin-bottom: 1em; } |
|
||||||
.paragraph { margin: 1em 0; } |
|
||||||
table { border-collapse: collapse; width: 100%; margin: 1em 0; } |
|
||||||
table th, table td { border: 1px solid #ddd; padding: 8px; text-align: left; } |
|
||||||
table th { background-color: #f2f2f2; } |
|
||||||
blockquote { border-left: 4px solid #ddd; padding-left: 1em; margin: 1em 0; color: #666; } |
|
||||||
</style> |
|
||||||
</head> |
|
||||||
<body> |
|
||||||
<h1>AsciiDoc Test Document - Parsed Output</h1> |
|
||||||
<hr> |
|
||||||
${result.content} |
|
||||||
<hr> |
|
||||||
<h2>Metadata</h2> |
|
||||||
<pre>${JSON.stringify({ |
|
||||||
hasLaTeX: result.hasLaTeX, |
|
||||||
hasMusicalNotation: result.hasMusicalNotation, |
|
||||||
nostrLinks: result.nostrLinks, |
|
||||||
wikilinks: result.wikilinks, |
|
||||||
hashtags: result.hashtags, |
|
||||||
links: result.links, |
|
||||||
media: result.media |
|
||||||
}, null, 2)}</pre> |
|
||||||
</body> |
|
||||||
</html>`;
|
|
||||||
|
|
||||||
const outputPath = join(outputDir, 'asciidoc-output.html'); |
|
||||||
writeFileSync(outputPath, htmlOutput, 'utf-8'); |
|
||||||
console.log(`\n📄 HTML output written to: ${outputPath}\n`); |
|
||||||
|
|
||||||
test('should parse AsciiDoc content', () => { |
|
||||||
expect(result).toBeDefined(); |
|
||||||
expect(result.content).toBeDefined(); |
|
||||||
expect(typeof result.content).toBe('string'); |
|
||||||
expect(result.content.length).toBeGreaterThan(0); |
|
||||||
}); |
|
||||||
|
|
||||||
test('should have HTML content', () => { |
|
||||||
expect(result.content).toContain('<'); |
|
||||||
expect(result.content).toContain('>'); |
|
||||||
}); |
|
||||||
|
|
||||||
test('should extract table of contents', () => { |
|
||||||
expect(result.tableOfContents).toBeDefined(); |
|
||||||
expect(typeof result.tableOfContents).toBe('string'); |
|
||||||
}); |
|
||||||
|
|
||||||
test('should detect LaTeX', () => { |
|
||||||
expect(result.hasLaTeX).toBeDefined(); |
|
||||||
expect(typeof result.hasLaTeX).toBe('boolean'); |
|
||||||
expect(result.hasLaTeX).toBe(true); |
|
||||||
}); |
|
||||||
|
|
||||||
test('should detect musical notation', () => { |
|
||||||
expect(result.hasMusicalNotation).toBeDefined(); |
|
||||||
expect(typeof result.hasMusicalNotation).toBe('boolean'); |
|
||||||
expect(result.hasMusicalNotation).toBe(true); |
|
||||||
}); |
|
||||||
|
|
||||||
test('should extract nostr links', () => { |
|
||||||
expect(result.nostrLinks).toBeDefined(); |
|
||||||
expect(Array.isArray(result.nostrLinks)).toBe(true); |
|
||||||
expect(result.nostrLinks.length).toBeGreaterThan(0); |
|
||||||
|
|
||||||
const nostrLink = result.nostrLinks[0]; |
|
||||||
expect(nostrLink).toHaveProperty('type'); |
|
||||||
expect(nostrLink).toHaveProperty('id'); |
|
||||||
expect(nostrLink).toHaveProperty('text'); |
|
||||||
expect(nostrLink).toHaveProperty('bech32'); |
|
||||||
const validTypes = ['npub', 'nprofile', 'nevent', 'naddr', 'note']; |
|
||||||
if (!validTypes.includes(nostrLink.type)) { |
|
||||||
throw new Error(`Invalid nostr type: ${nostrLink.type}`); |
|
||||||
} |
|
||||||
}); |
|
||||||
|
|
||||||
test('should extract wikilinks', () => { |
|
||||||
expect(result.wikilinks).toBeDefined(); |
|
||||||
expect(Array.isArray(result.wikilinks)).toBe(true); |
|
||||||
expect(result.wikilinks.length).toBeGreaterThan(0); |
|
||||||
|
|
||||||
const wikilink = result.wikilinks[0]; |
|
||||||
expect(wikilink).toHaveProperty('dtag'); |
|
||||||
expect(wikilink).toHaveProperty('display'); |
|
||||||
expect(wikilink).toHaveProperty('original'); |
|
||||||
}); |
|
||||||
|
|
||||||
test('should extract hashtags', () => { |
|
||||||
expect(result.hashtags).toBeDefined(); |
|
||||||
expect(Array.isArray(result.hashtags)).toBe(true); |
|
||||||
expect(result.hashtags.length).toBeGreaterThan(0); |
|
||||||
|
|
||||||
result.hashtags.forEach((tag: string) => { |
|
||||||
if (tag.includes('#')) { |
|
||||||
throw new Error(`Hashtag should not include #: ${tag}`); |
|
||||||
} |
|
||||||
}); |
|
||||||
}); |
|
||||||
|
|
||||||
test('should extract regular links', () => { |
|
||||||
expect(result.links).toBeDefined(); |
|
||||||
expect(Array.isArray(result.links)).toBe(true); |
|
||||||
|
|
||||||
if (result.links.length > 0) { |
|
||||||
const link = result.links[0]; |
|
||||||
expect(link).toHaveProperty('url'); |
|
||||||
expect(link).toHaveProperty('text'); |
|
||||||
expect(link).toHaveProperty('isExternal'); |
|
||||||
expect(typeof link.isExternal).toBe('boolean'); |
|
||||||
} |
|
||||||
}); |
|
||||||
|
|
||||||
test('should extract media URLs', () => { |
|
||||||
expect(result.media).toBeDefined(); |
|
||||||
expect(Array.isArray(result.media)).toBe(true); |
|
||||||
}); |
|
||||||
|
|
||||||
test('should process nostr: addresses in HTML', () => { |
|
||||||
const nostrAddresses = result.nostrLinks; |
|
||||||
expect(nostrAddresses.length).toBeGreaterThan(0); |
|
||||||
|
|
||||||
nostrAddresses.forEach((link: any) => { |
|
||||||
if (!result.content.includes(`data-nostr-type="${link.type}"`)) { |
|
||||||
throw new Error(`Missing nostr type attribute for ${link.type}`); |
|
||||||
} |
|
||||||
if (!result.content.includes(`data-nostr-id="${link.bech32}"`)) { |
|
||||||
throw new Error(`Missing nostr id attribute for ${link.bech32}`); |
|
||||||
} |
|
||||||
}); |
|
||||||
}); |
|
||||||
|
|
||||||
test('should process wikilinks in HTML', () => { |
|
||||||
const wikilinks = result.wikilinks; |
|
||||||
expect(wikilinks.length).toBeGreaterThan(0); |
|
||||||
|
|
||||||
wikilinks.forEach((wikilink: any) => { |
|
||||||
if (!result.content.includes(`class="wikilink"`)) { |
|
||||||
throw new Error('Missing wikilink class'); |
|
||||||
} |
|
||||||
if (!result.content.includes(`data-dtag="${wikilink.dtag}"`)) { |
|
||||||
throw new Error(`Missing dtag attribute for ${wikilink.dtag}`); |
|
||||||
} |
|
||||||
}); |
|
||||||
}); |
|
||||||
|
|
||||||
test('should process hashtags in HTML', () => { |
|
||||||
const hashtags = result.hashtags; |
|
||||||
expect(hashtags.length).toBeGreaterThan(0); |
|
||||||
|
|
||||||
hashtags.forEach((tag: string) => { |
|
||||||
if (!result.content.includes(`data-topic="${tag}"`)) { |
|
||||||
throw new Error(`Missing topic attribute for ${tag}`); |
|
||||||
} |
|
||||||
if (!result.content.includes('class="hashtag"')) { |
|
||||||
throw new Error('Missing hashtag class'); |
|
||||||
} |
|
||||||
}); |
|
||||||
}); |
|
||||||
|
|
||||||
test('should contain expected content sections', () => { |
|
||||||
if (!/Bullet list|bullet/i.test(result.content)) { |
|
||||||
throw new Error('Missing bullet list section'); |
|
||||||
} |
|
||||||
if (!/Headers|header/i.test(result.content)) { |
|
||||||
throw new Error('Missing headers section'); |
|
||||||
} |
|
||||||
if (!/Media and Links|media|links/i.test(result.content)) { |
|
||||||
throw new Error('Missing media and links section'); |
|
||||||
} |
|
||||||
}); |
|
||||||
|
|
||||||
test('should return consistent structure', () => { |
|
||||||
expect(result).toHaveProperty('content'); |
|
||||||
expect(result).toHaveProperty('tableOfContents'); |
|
||||||
expect(result).toHaveProperty('hasLaTeX'); |
|
||||||
expect(result).toHaveProperty('hasMusicalNotation'); |
|
||||||
expect(result).toHaveProperty('nostrLinks'); |
|
||||||
expect(result).toHaveProperty('wikilinks'); |
|
||||||
expect(result).toHaveProperty('hashtags'); |
|
||||||
expect(result).toHaveProperty('links'); |
|
||||||
expect(result).toHaveProperty('media'); |
|
||||||
}); |
|
||||||
|
|
||||||
// Wait for all tests to complete
|
|
||||||
await Promise.all(testPromises); |
|
||||||
|
|
||||||
// Print summary
|
|
||||||
console.log(`\n${'='.repeat(50)}`); |
|
||||||
console.log(`Tests passed: ${passed}`); |
|
||||||
console.log(`Tests failed: ${failed}`); |
|
||||||
|
|
||||||
if (failures.length > 0) { |
|
||||||
console.log('\nFailures:'); |
|
||||||
failures.forEach(f => console.error(` - ${f}`)); |
|
||||||
process.exit(1); |
|
||||||
} else { |
|
||||||
console.log('\nAll tests passed!'); |
|
||||||
process.exit(0); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
// Run tests
|
|
||||||
runAsciiDocTests().catch(error => { |
|
||||||
console.error('Test runner error:', error); |
|
||||||
process.exit(1); |
|
||||||
}); |
|
||||||
@ -1,238 +0,0 @@ |
|||||||
import { Parser } from '../parser'; |
|
||||||
import { readFileSync, writeFileSync, mkdirSync } from 'fs'; |
|
||||||
import { join } from 'path'; |
|
||||||
|
|
||||||
describe('Parser', () => { |
|
||||||
let asciidocContent: string; |
|
||||||
let markdownContent: string; |
|
||||||
|
|
||||||
beforeAll(() => { |
|
||||||
asciidocContent = readFileSync(join(__dirname, '../../asciidoc_testdoc.adoc'), 'utf-8'); |
|
||||||
markdownContent = readFileSync(join(__dirname, '../../markdown_testdoc.md'), 'utf-8'); |
|
||||||
}); |
|
||||||
|
|
||||||
// AsciiDoc tests are run separately using a Node.js script (asciidoc.test.ts)
|
|
||||||
// due to Jest/Opal runtime compatibility issues
|
|
||||||
// Run with: npm run test:asciidoc
|
|
||||||
|
|
||||||
describe('Markdown Test Document', () => { |
|
||||||
let result: any; |
|
||||||
|
|
||||||
beforeAll(async () => { |
|
||||||
const parser = new Parser({ |
|
||||||
linkBaseURL: 'https://example.com', |
|
||||||
enableNostrAddresses: true, |
|
||||||
wikilinkUrl: '/events?d={dtag}', |
|
||||||
hashtagUrl: '/hashtag/{topic}' |
|
||||||
}); |
|
||||||
result = await parser.process(markdownContent); |
|
||||||
|
|
||||||
// Write HTML output to file for inspection
|
|
||||||
const outputDir = join(__dirname, '../../test-output'); |
|
||||||
try { |
|
||||||
mkdirSync(outputDir, { recursive: true }); |
|
||||||
} catch (e) { |
|
||||||
// Directory might already exist
|
|
||||||
} |
|
||||||
|
|
||||||
const htmlOutput = `<!DOCTYPE html>
|
|
||||||
<html lang="en"> |
|
||||||
<head> |
|
||||||
<meta charset="UTF-8"> |
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
||||||
<title>Markdown Test Output</title> |
|
||||||
<style> |
|
||||||
body { font-family: sans-serif; max-width: 1200px; margin: 0 auto; padding: 20px; line-height: 1.6; } |
|
||||||
.hashtag { color: #1da1f2; font-weight: 500; } |
|
||||||
.wikilink { color: #0066cc; text-decoration: underline; } |
|
||||||
.nostr-link { color: #8b5cf6; text-decoration: underline; } |
|
||||||
pre { background: #f5f5f5; padding: 10px; border-radius: 4px; overflow-x: auto; } |
|
||||||
code { background: #f5f5f5; padding: 2px 4px; border-radius: 3px; font-family: 'Courier New', monospace; } |
|
||||||
.bare-image, .bare-video, .bare-audio { max-width: 100%; margin: 10px 0; } |
|
||||||
.bare-video, .bare-audio { width: 100%; max-width: 600px; } |
|
||||||
blockquote { border-left: 4px solid #ddd; padding-left: 1em; margin: 1em 0; color: #666; } |
|
||||||
table { border-collapse: collapse; width: 100%; margin: 1em 0; } |
|
||||||
table th, table td { border: 1px solid #ddd; padding: 8px; text-align: left; } |
|
||||||
table th { background-color: #f2f2f2; } |
|
||||||
</style> |
|
||||||
</head> |
|
||||||
<body> |
|
||||||
<h1>Markdown Test Document - Parsed Output</h1> |
|
||||||
<hr> |
|
||||||
${result.content} |
|
||||||
<hr> |
|
||||||
<h2>Metadata</h2> |
|
||||||
<pre>${JSON.stringify({ |
|
||||||
frontmatter: result.frontmatter, |
|
||||||
hasLaTeX: result.hasLaTeX, |
|
||||||
hasMusicalNotation: result.hasMusicalNotation, |
|
||||||
nostrLinks: result.nostrLinks, |
|
||||||
wikilinks: result.wikilinks, |
|
||||||
hashtags: result.hashtags, |
|
||||||
links: result.links, |
|
||||||
media: result.media |
|
||||||
}, null, 2)}</pre> |
|
||||||
</body> |
|
||||||
</html>`;
|
|
||||||
|
|
||||||
const outputPath = join(outputDir, 'markdown-output.html'); |
|
||||||
writeFileSync(outputPath, htmlOutput, 'utf-8'); |
|
||||||
// Use console.info to ensure it shows in Jest output
|
|
||||||
console.info(`\n📄 HTML output written to: ${outputPath}\n`); |
|
||||||
}); |
|
||||||
|
|
||||||
it('should parse Markdown content', () => { |
|
||||||
expect(result).toBeDefined(); |
|
||||||
expect(result.content).toBeDefined(); |
|
||||||
expect(typeof result.content).toBe('string'); |
|
||||||
expect(result.content.length).toBeGreaterThan(0); |
|
||||||
}); |
|
||||||
|
|
||||||
it('should have HTML content', () => { |
|
||||||
expect(result.content).toContain('<'); |
|
||||||
expect(result.content).toContain('>'); |
|
||||||
}); |
|
||||||
|
|
||||||
it('should extract frontmatter', () => { |
|
||||||
expect(result.frontmatter).toBeDefined(); |
|
||||||
expect(typeof result.frontmatter).toBe('object'); |
|
||||||
expect(result.frontmatter).toHaveProperty('author'); |
|
||||||
expect(result.frontmatter.author).toBe('James Smith'); |
|
||||||
expect(result.frontmatter).toHaveProperty('summary'); |
|
||||||
expect(result.frontmatter.summary).toBe('This is a summary'); |
|
||||||
}); |
|
||||||
|
|
||||||
it('should detect LaTeX', () => { |
|
||||||
expect(result.hasLaTeX).toBeDefined(); |
|
||||||
expect(typeof result.hasLaTeX).toBe('boolean'); |
|
||||||
// The test doc has LaTeX, so it should be true
|
|
||||||
expect(result.hasLaTeX).toBe(true); |
|
||||||
}); |
|
||||||
|
|
||||||
it('should detect musical notation', () => { |
|
||||||
expect(result.hasMusicalNotation).toBeDefined(); |
|
||||||
expect(typeof result.hasMusicalNotation).toBe('boolean'); |
|
||||||
}); |
|
||||||
|
|
||||||
it('should extract nostr links', () => { |
|
||||||
expect(result.nostrLinks).toBeDefined(); |
|
||||||
expect(Array.isArray(result.nostrLinks)).toBe(true); |
|
||||||
expect(result.nostrLinks.length).toBeGreaterThan(0); |
|
||||||
|
|
||||||
// Check that nostr: addresses are extracted
|
|
||||||
const nostrLink = result.nostrLinks[0]; |
|
||||||
expect(nostrLink).toHaveProperty('type'); |
|
||||||
expect(nostrLink).toHaveProperty('id'); |
|
||||||
expect(nostrLink).toHaveProperty('text'); |
|
||||||
expect(nostrLink).toHaveProperty('bech32'); |
|
||||||
expect(['npub', 'nprofile', 'nevent', 'naddr', 'note']).toContain(nostrLink.type); |
|
||||||
}); |
|
||||||
|
|
||||||
it('should extract wikilinks', () => { |
|
||||||
expect(result.wikilinks).toBeDefined(); |
|
||||||
expect(Array.isArray(result.wikilinks)).toBe(true); |
|
||||||
expect(result.wikilinks.length).toBeGreaterThan(0); |
|
||||||
|
|
||||||
// Check wikilink structure
|
|
||||||
const wikilink = result.wikilinks[0]; |
|
||||||
expect(wikilink).toHaveProperty('dtag'); |
|
||||||
expect(wikilink).toHaveProperty('display'); |
|
||||||
expect(wikilink).toHaveProperty('original'); |
|
||||||
}); |
|
||||||
|
|
||||||
it('should extract hashtags', () => { |
|
||||||
expect(result.hashtags).toBeDefined(); |
|
||||||
expect(Array.isArray(result.hashtags)).toBe(true); |
|
||||||
expect(result.hashtags.length).toBeGreaterThan(0); |
|
||||||
|
|
||||||
// Hashtags should not include the # symbol
|
|
||||||
result.hashtags.forEach((tag: string) => { |
|
||||||
expect(tag).not.toContain('#'); |
|
||||||
}); |
|
||||||
}); |
|
||||||
|
|
||||||
it('should extract regular links', () => { |
|
||||||
expect(result.links).toBeDefined(); |
|
||||||
expect(Array.isArray(result.links)).toBe(true); |
|
||||||
|
|
||||||
if (result.links.length > 0) { |
|
||||||
const link = result.links[0]; |
|
||||||
expect(link).toHaveProperty('url'); |
|
||||||
expect(link).toHaveProperty('text'); |
|
||||||
expect(link).toHaveProperty('isExternal'); |
|
||||||
expect(typeof link.isExternal).toBe('boolean'); |
|
||||||
} |
|
||||||
}); |
|
||||||
|
|
||||||
it('should extract media URLs', () => { |
|
||||||
expect(result.media).toBeDefined(); |
|
||||||
expect(Array.isArray(result.media)).toBe(true); |
|
||||||
}); |
|
||||||
|
|
||||||
it('should process nostr: addresses in HTML', () => { |
|
||||||
// Check that nostr: addresses are converted to links
|
|
||||||
const nostrAddresses = result.nostrLinks; |
|
||||||
expect(nostrAddresses.length).toBeGreaterThan(0); |
|
||||||
|
|
||||||
// Check that HTML contains links for nostr addresses
|
|
||||||
nostrAddresses.forEach((link: any) => { |
|
||||||
expect(result.content).toContain(`data-nostr-type="${link.type}"`); |
|
||||||
expect(result.content).toContain(`data-nostr-id="${link.bech32}"`); |
|
||||||
}); |
|
||||||
}); |
|
||||||
|
|
||||||
it('should process wikilinks in HTML', () => { |
|
||||||
// Check that wikilinks are converted to links
|
|
||||||
const wikilinks = result.wikilinks; |
|
||||||
expect(wikilinks.length).toBeGreaterThan(0); |
|
||||||
|
|
||||||
wikilinks.forEach((wikilink: any) => { |
|
||||||
expect(result.content).toContain(`class="wikilink"`); |
|
||||||
expect(result.content).toContain(`data-dtag="${wikilink.dtag}"`); |
|
||||||
}); |
|
||||||
}); |
|
||||||
|
|
||||||
it('should process hashtags in HTML', () => { |
|
||||||
// Check that hashtags are processed
|
|
||||||
const hashtags = result.hashtags; |
|
||||||
expect(hashtags.length).toBeGreaterThan(0); |
|
||||||
|
|
||||||
hashtags.forEach((tag: string) => { |
|
||||||
expect(result.content).toContain(`data-topic="${tag}"`); |
|
||||||
expect(result.content).toMatch(new RegExp(`class="hashtag"`)); |
|
||||||
}); |
|
||||||
}); |
|
||||||
|
|
||||||
it('should contain expected content sections', () => { |
|
||||||
// Check for some expected content from the test doc
|
|
||||||
expect(result.content).toMatch(/Bullet list|bullet/i); |
|
||||||
expect(result.content).toMatch(/Headers|header/i); |
|
||||||
expect(result.content).toMatch(/Media and Links|media|links/i); |
|
||||||
}); |
|
||||||
|
|
||||||
it('should have empty table of contents for markdown', () => { |
|
||||||
// Markdown doesn't generate TOC by default
|
|
||||||
expect(result.tableOfContents).toBeDefined(); |
|
||||||
expect(typeof result.tableOfContents).toBe('string'); |
|
||||||
}); |
|
||||||
}); |
|
||||||
|
|
||||||
describe('Result structure validation', () => { |
|
||||||
|
|
||||||
it('should return consistent structure for Markdown', async () => { |
|
||||||
const parser = new Parser(); |
|
||||||
const result = await parser.process(markdownContent); |
|
||||||
|
|
||||||
// Check all required fields
|
|
||||||
expect(result).toHaveProperty('content'); |
|
||||||
expect(result).toHaveProperty('tableOfContents'); |
|
||||||
expect(result).toHaveProperty('hasLaTeX'); |
|
||||||
expect(result).toHaveProperty('hasMusicalNotation'); |
|
||||||
expect(result).toHaveProperty('nostrLinks'); |
|
||||||
expect(result).toHaveProperty('wikilinks'); |
|
||||||
expect(result).toHaveProperty('hashtags'); |
|
||||||
expect(result).toHaveProperty('links'); |
|
||||||
expect(result).toHaveProperty('media'); |
|
||||||
}); |
|
||||||
}); |
|
||||||
}); |
|
||||||
@ -0,0 +1,332 @@ |
|||||||
|
import { ContentFormat } from '../types'; |
||||||
|
|
||||||
|
export interface ConvertOptions { |
||||||
|
enableNostrAddresses?: boolean; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Converts content to AsciiDoc format based on detected format |
||||||
|
* This is the unified entry point - everything becomes AsciiDoc |
||||||
|
*/ |
||||||
|
export function convertToAsciidoc( |
||||||
|
content: string, |
||||||
|
format: ContentFormat, |
||||||
|
linkBaseURL: string, |
||||||
|
options: ConvertOptions = {} |
||||||
|
): string { |
||||||
|
let asciidoc = ''; |
||||||
|
|
||||||
|
switch (format) { |
||||||
|
case ContentFormat.AsciiDoc: |
||||||
|
// For AsciiDoc content, ensure proper formatting
|
||||||
|
asciidoc = content.replace(/\\n/g, '\n'); |
||||||
|
|
||||||
|
// Ensure headers are on their own lines with proper spacing
|
||||||
|
asciidoc = asciidoc.replace(/(\S[^\n]*)\n(={1,6}\s+[^\n]+)/g, (_match, before, header) => { |
||||||
|
return `${before}\n\n${header}`; |
||||||
|
}); |
||||||
|
break; |
||||||
|
|
||||||
|
case ContentFormat.Wikipedia: |
||||||
|
asciidoc = convertWikipediaToAsciidoc(content); |
||||||
|
break; |
||||||
|
|
||||||
|
case ContentFormat.Markdown: |
||||||
|
asciidoc = convertMarkdownToAsciidoc(content); |
||||||
|
break; |
||||||
|
|
||||||
|
case ContentFormat.Plain: |
||||||
|
default: |
||||||
|
asciidoc = convertPlainTextToAsciidoc(content); |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
// Process special elements for all content types
|
||||||
|
// Process wikilinks
|
||||||
|
asciidoc = processWikilinks(asciidoc, linkBaseURL); |
||||||
|
|
||||||
|
// Process nostr: addresses if enabled
|
||||||
|
if (options.enableNostrAddresses !== false) { |
||||||
|
asciidoc = processNostrAddresses(asciidoc, linkBaseURL); |
||||||
|
} |
||||||
|
|
||||||
|
// Process hashtags
|
||||||
|
asciidoc = processHashtags(asciidoc); |
||||||
|
|
||||||
|
return asciidoc; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Converts Wikipedia markup to AsciiDoc format |
||||||
|
* Handles Wikipedia-style headings, links, and formatting |
||||||
|
*/ |
||||||
|
function convertWikipediaToAsciidoc(content: string): string { |
||||||
|
let asciidoc = content.replace(/\\n/g, '\n'); |
||||||
|
|
||||||
|
// Convert Wikipedia headings: == Heading == to AsciiDoc == Heading
|
||||||
|
// Wikipedia uses == for level 2, === for level 3, etc.
|
||||||
|
// AsciiDoc uses = for title, == for level 1, === for level 2, etc.
|
||||||
|
// So Wikipedia level 2 (==) maps to AsciiDoc level 1 (==)
|
||||||
|
asciidoc = asciidoc.replace(/^(=+)\s+(.+?)\s+\1$/gm, (match, equals, heading) => { |
||||||
|
const level = equals.length - 1; // Count = signs, subtract 1 for AsciiDoc mapping
|
||||||
|
const asciidocEquals = '='.repeat(level + 1); // AsciiDoc uses one more = for same level
|
||||||
|
return `${asciidocEquals} ${heading.trim()}`; |
||||||
|
}); |
||||||
|
|
||||||
|
// Convert Wikipedia bold: ''text'' to AsciiDoc *text*
|
||||||
|
asciidoc = asciidoc.replace(/''([^']+)''/g, '*$1*'); |
||||||
|
|
||||||
|
// Convert Wikipedia italic: 'text' to AsciiDoc _text_
|
||||||
|
// Be careful not to match apostrophes in words
|
||||||
|
asciidoc = asciidoc.replace(/(^|[^'])'([^']+)'([^']|$)/g, '$1_$2_$3'); |
||||||
|
|
||||||
|
// Convert Wikipedia links: [[Page]] or [[Page|Display]] to wikilinks
|
||||||
|
// These will be processed by processWikilinks later, but we need to ensure
|
||||||
|
// they're in the right format. Wikipedia links are already in [[...]] format
|
||||||
|
// which matches our wikilink format, so they should work as-is.
|
||||||
|
|
||||||
|
// Convert Wikipedia external links: [URL text] to AsciiDoc link:URL[text]
|
||||||
|
asciidoc = asciidoc.replace(/\[(https?:\/\/[^\s\]]+)\s+([^\]]+)\]/g, 'link:$1[$2]'); |
||||||
|
asciidoc = asciidoc.replace(/\[(https?:\/\/[^\s\]]+)\]/g, 'link:$1[$1]'); |
||||||
|
|
||||||
|
// Convert Wikipedia lists (they use * or # similar to Markdown)
|
||||||
|
// This is handled similarly to Markdown, so we can reuse that logic
|
||||||
|
// But Wikipedia also uses : for definition lists and ; for term lists
|
||||||
|
// For now, we'll handle basic lists and let AsciiDoc handle the rest
|
||||||
|
|
||||||
|
// Convert horizontal rules: ---- to AsciiDoc '''
|
||||||
|
asciidoc = asciidoc.replace(/^----+$/gm, "'''"); |
||||||
|
|
||||||
|
return asciidoc; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Converts Markdown to AsciiDoc format |
||||||
|
* Based on jumble's conversion patterns |
||||||
|
*/ |
||||||
|
function convertMarkdownToAsciidoc(content: string): string { |
||||||
|
let asciidoc = content.replace(/\\n/g, '\n'); |
||||||
|
|
||||||
|
// Fix spacing issues
|
||||||
|
asciidoc = asciidoc.replace(/`([^`\n]+)`\s*\(([^)]+)\)/g, '`$1` ($2)'); |
||||||
|
asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`([a-zA-Z0-9])/g, '$1 `$2` $3'); |
||||||
|
asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`\s*\(/g, '$1 `$2` ('); |
||||||
|
asciidoc = asciidoc.replace(/\)`([^`\n]+)`([a-zA-Z0-9])/g, ') `$1` $2'); |
||||||
|
asciidoc = asciidoc.replace(/([a-zA-Z0-9])\)([a-zA-Z0-9])/g, '$1) $2'); |
||||||
|
asciidoc = asciidoc.replace(/([a-zA-Z0-9])==/g, '$1 =='); |
||||||
|
|
||||||
|
// Note: nostr: addresses are processed later in processNostrAddresses
|
||||||
|
|
||||||
|
// Convert headers
|
||||||
|
asciidoc = asciidoc.replace(/^#{6}\s+(.+)$/gm, '====== $1 ======'); |
||||||
|
asciidoc = asciidoc.replace(/^#{5}\s+(.+)$/gm, '===== $1 ====='); |
||||||
|
asciidoc = asciidoc.replace(/^#{4}\s+(.+)$/gm, '==== $1 ===='); |
||||||
|
asciidoc = asciidoc.replace(/^#{3}\s+(.+)$/gm, '=== $1 ==='); |
||||||
|
asciidoc = asciidoc.replace(/^#{2}\s+(.+)$/gm, '== $1 =='); |
||||||
|
asciidoc = asciidoc.replace(/^#{1}\s+(.+)$/gm, '= $1 ='); |
||||||
|
asciidoc = asciidoc.replace(/^==\s+(.+?)\s+==$/gm, '== $1 =='); |
||||||
|
asciidoc = asciidoc.replace(/\s==\s+([^=]+?)\s+==\s/g, ' == $1 == '); |
||||||
|
|
||||||
|
// Convert emphasis
|
||||||
|
asciidoc = asciidoc.replace(/\*\*(.+?)\*\*/g, '*$1*'); // Bold
|
||||||
|
asciidoc = asciidoc.replace(/__(.+?)__/g, '*$1*'); // Bold
|
||||||
|
asciidoc = asciidoc.replace(/\*(.+?)\*/g, '_$1_'); // Italic
|
||||||
|
asciidoc = asciidoc.replace(/_(.+?)_/g, '_$1_'); // Italic
|
||||||
|
asciidoc = asciidoc.replace(/~~(.+?)~~/g, '[line-through]#$1#'); // Strikethrough
|
||||||
|
asciidoc = asciidoc.replace(/~(.+?)~/g, '[subscript]#$1#'); // Subscript
|
||||||
|
asciidoc = asciidoc.replace(/\^(.+?)\^/g, '[superscript]#$1#'); // Superscript
|
||||||
|
|
||||||
|
// Convert code blocks (handle both \n and \r\n line endings)
|
||||||
|
asciidoc = asciidoc.replace(/```(\w+)?\r?\n([\s\S]*?)\r?\n```/g, (_match, lang, code) => { |
||||||
|
const trimmedCode = code.trim(); |
||||||
|
if (trimmedCode.length === 0) return ''; |
||||||
|
|
||||||
|
const hasCodePatterns = /[{}();=<>]|function|class|import|export|def |if |for |while |return |const |let |var |public |private |static |console\.log/.test(trimmedCode); |
||||||
|
const isLikelyText = /^[A-Za-z\s.,!?\-'"]+$/.test(trimmedCode) && trimmedCode.length > 50; |
||||||
|
const hasTooManySpaces = (trimmedCode.match(/\s{3,}/g) || []).length > 3; |
||||||
|
const hasMarkdownPatterns = /^#{1,6}\s|^\*\s|^\d+\.\s|^\>\s|^\|.*\|/.test(trimmedCode); |
||||||
|
|
||||||
|
if ((!hasCodePatterns && trimmedCode.length > 100) || isLikelyText || hasTooManySpaces || hasMarkdownPatterns) { |
||||||
|
return _match; |
||||||
|
} |
||||||
|
|
||||||
|
return `[source${lang ? ',' + lang : ''}]\n----\n${trimmedCode}\n----`; |
||||||
|
}); |
||||||
|
asciidoc = asciidoc.replace(/`([^`]+)`/g, '`$1`'); // Inline code
|
||||||
|
asciidoc = asciidoc.replace(/`\$([^$]+)\$`/g, '`$\\$1\\$$`'); // Preserve LaTeX in code
|
||||||
|
|
||||||
|
// Convert images
|
||||||
|
asciidoc = asciidoc.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, 'image::$2[$1,width=100%]'); |
||||||
|
asciidoc = asciidoc.replace(/image::([^\[]+)\[([^\]]+),width=100%\]/g, 'image::$1[$2,width=100%]'); |
||||||
|
|
||||||
|
// Convert links
|
||||||
|
asciidoc = asciidoc.replace(/\[([^\]]+)\]\(([^)]+)\)/g, 'link:$2[$1]'); |
||||||
|
|
||||||
|
// Convert horizontal rules
|
||||||
|
asciidoc = asciidoc.replace(/^---$/gm, '\'\'\''); |
||||||
|
|
||||||
|
// Convert unordered lists
|
||||||
|
asciidoc = asciidoc.replace(/^(\s*)\*\s+(.+)$/gm, '$1* $2'); |
||||||
|
asciidoc = asciidoc.replace(/^(\s*)-\s+(.+)$/gm, '$1* $2'); |
||||||
|
asciidoc = asciidoc.replace(/^(\s*)\+\s+(.+)$/gm, '$1* $2'); |
||||||
|
|
||||||
|
// Convert ordered lists
|
||||||
|
asciidoc = asciidoc.replace(/^(\s*)\d+\.\s+(.+)$/gm, '$1. $2'); |
||||||
|
|
||||||
|
// Convert blockquotes with attribution
|
||||||
|
asciidoc = asciidoc.replace(/^(>\s+.+(?:\n>\s+.+)*)/gm, (match) => { |
||||||
|
const lines = match.split('\n').map(line => line.replace(/^>\s*/, '')); |
||||||
|
|
||||||
|
let quoteBodyLines: string[] = []; |
||||||
|
let attributionLine: string | undefined; |
||||||
|
|
||||||
|
for (let i = lines.length - 1; i >= 0; i--) { |
||||||
|
const line = lines[i].trim(); |
||||||
|
if (line.startsWith('—') || line.startsWith('--')) { |
||||||
|
attributionLine = line; |
||||||
|
quoteBodyLines = lines.slice(0, i); |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
const quoteContent = quoteBodyLines.filter(l => l.trim() !== '').join('\n').trim(); |
||||||
|
|
||||||
|
if (attributionLine) { |
||||||
|
let cleanedAttribution = attributionLine.replace(/^[—-]+/, '').trim(); |
||||||
|
|
||||||
|
let author = ''; |
||||||
|
let source = ''; |
||||||
|
|
||||||
|
const linkMatch = cleanedAttribution.match(/^(.*?),?\s*link:([^[\\]]+)\[([^\\]]+)\]$/); |
||||||
|
|
||||||
|
if (linkMatch) { |
||||||
|
author = linkMatch[1].trim(); |
||||||
|
source = `link:${linkMatch[2].trim()}[${linkMatch[3].trim()}]`; |
||||||
|
} else { |
||||||
|
const parts = cleanedAttribution.split(',').map(p => p.trim()); |
||||||
|
author = parts[0]; |
||||||
|
if (parts.length > 1) { |
||||||
|
source = parts.slice(1).join(', ').trim(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return `[quote, ${author}, ${source}]\n____\n${quoteContent}\n____`; |
||||||
|
} else { |
||||||
|
return `____\n${quoteContent}\n____`; |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
// Convert tables
|
||||||
|
asciidoc = asciidoc.replace(/(\|.*\|[\r\n]+\|[\s\-\|]*[\r\n]+(\|.*\|[\r\n]+)*)/g, (match) => { |
||||||
|
const lines = match.trim().split('\n').filter(line => line.trim()); |
||||||
|
if (lines.length < 2) return match; |
||||||
|
|
||||||
|
const headerRow = lines[0]; |
||||||
|
const separatorRow = lines[1]; |
||||||
|
const dataRows = lines.slice(2); |
||||||
|
|
||||||
|
if (!separatorRow.includes('-')) return match; |
||||||
|
|
||||||
|
let tableAsciidoc = '[cols="1,1"]\n|===\n'; |
||||||
|
tableAsciidoc += headerRow + '\n'; |
||||||
|
dataRows.forEach(row => { |
||||||
|
tableAsciidoc += row + '\n'; |
||||||
|
}); |
||||||
|
tableAsciidoc += '|==='; |
||||||
|
|
||||||
|
return tableAsciidoc; |
||||||
|
}); |
||||||
|
|
||||||
|
// Convert footnotes
|
||||||
|
const footnoteDefinitions: { [id: string]: string } = {}; |
||||||
|
let tempAsciidoc = asciidoc; |
||||||
|
|
||||||
|
tempAsciidoc = tempAsciidoc.replace(/^\[\^([^\]]+)\]:\s*([\s\S]*?)(?=\n\[\^|\n---|\n##|\n###|\n####|\n#####|\n######|$)/gm, (_, id, text) => { |
||||||
|
footnoteDefinitions[id] = text.trim(); |
||||||
|
return ''; |
||||||
|
}); |
||||||
|
|
||||||
|
asciidoc = tempAsciidoc.replace(/\[\^([^\]]+)\]/g, (match, id) => { |
||||||
|
if (footnoteDefinitions[id]) { |
||||||
|
return `footnote:[${footnoteDefinitions[id]}]`; |
||||||
|
} |
||||||
|
return match; |
||||||
|
}); |
||||||
|
|
||||||
|
return asciidoc; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Converts plain text to AsciiDoc format |
||||||
|
* Preserves line breaks by converting single newlines to line continuations |
||||||
|
*/ |
||||||
|
function convertPlainTextToAsciidoc(content: string): string { |
||||||
|
// Preserve double newlines (paragraph breaks)
|
||||||
|
// Convert single newlines to line continuations ( +\n)
|
||||||
|
return content |
||||||
|
.replace(/\r\n/g, '\n') // Normalize line endings
|
||||||
|
.replace(/\n\n+/g, '\n\n') // Normalize multiple newlines to double
|
||||||
|
.replace(/([^\n])\n([^\n])/g, '$1 +\n$2'); // Single newlines become line continuations
|
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Normalizes text to d-tag format |
||||||
|
*/ |
||||||
|
function normalizeDtag(text: string): string { |
||||||
|
return text |
||||||
|
.toLowerCase() |
||||||
|
.replace(/[^a-z0-9]+/g, '-') |
||||||
|
.replace(/^-+|-+$/g, ''); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Processes wikilinks: [[target]] or [[target|display text]] |
||||||
|
* Converts to WIKILINK: placeholder format to protect from AsciiDoc processing |
||||||
|
*/ |
||||||
|
function processWikilinks(content: string, linkBaseURL: string): string { |
||||||
|
// Process bookstr macro wikilinks: [[book::...]]
|
||||||
|
content = content.replace(/\[\[book::([^\]]+)\]\]/g, (_match, bookContent) => { |
||||||
|
const cleanContent = bookContent.trim(); |
||||||
|
return `BOOKSTR:${cleanContent}`; |
||||||
|
}); |
||||||
|
|
||||||
|
// Process standard wikilinks: [[Target Page]] or [[target page|see this]]
|
||||||
|
// Use placeholder format to prevent AsciiDoc from processing the brackets
|
||||||
|
content = content.replace(/\[\[([^|\]]+)(?:\|([^\]]+))?\]\]/g, (_match, target, displayText) => { |
||||||
|
const cleanTarget = target.trim(); |
||||||
|
const cleanDisplay = displayText ? displayText.trim() : cleanTarget; |
||||||
|
const dTag = normalizeDtag(cleanTarget); |
||||||
|
|
||||||
|
// Use placeholder format: WIKILINK:dtag|display
|
||||||
|
// This prevents AsciiDoc from interpreting the brackets
|
||||||
|
return `WIKILINK:${dTag}|${cleanDisplay}`; |
||||||
|
}); |
||||||
|
|
||||||
|
return content; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Processes nostr: addresses |
||||||
|
* Converts to link:nostr:...[...] format |
||||||
|
* Valid bech32 prefixes: npub, nprofile, nevent, naddr, note |
||||||
|
*/ |
||||||
|
function processNostrAddresses(content: string, linkBaseURL: string): string { |
||||||
|
// Match nostr: followed by valid bech32 prefix and identifier
|
||||||
|
// Bech32 format: prefix + separator (1) + data (at least 6 chars for valid identifiers)
|
||||||
|
const nostrPattern = /nostr:((?:npub|nprofile|nevent|naddr|note)1[a-z0-9]{6,})/gi; |
||||||
|
return content.replace(nostrPattern, (_match, bech32Id) => { |
||||||
|
return `link:nostr:${bech32Id}[${bech32Id}]`; |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Processes hashtags |
||||||
|
* Converts to hashtag:tag[#tag] format |
||||||
|
*/ |
||||||
|
function processHashtags(content: string): string { |
||||||
|
// Match # followed by word characters, avoiding those in URLs, code blocks, etc.
|
||||||
|
return content.replace(/\B#([a-zA-Z0-9_]+)/g, (_match, hashtag) => { |
||||||
|
const normalizedHashtag = hashtag.toLowerCase(); |
||||||
|
return `hashtag:${normalizedHashtag}[#${hashtag}]`; |
||||||
|
}); |
||||||
|
} |
||||||
@ -0,0 +1,274 @@ |
|||||||
|
import { NostrLink, Wikilink } from '../types'; |
||||||
|
|
||||||
|
export interface ExtractedMetadata { |
||||||
|
nostrLinks: NostrLink[]; |
||||||
|
wikilinks: Wikilink[]; |
||||||
|
hashtags: string[]; |
||||||
|
links: Array<{ url: string; text: string; isExternal: boolean }>; |
||||||
|
media: string[]; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Extracts metadata from content before processing |
||||||
|
*/ |
||||||
|
export function extractMetadata(content: string, linkBaseURL: string): ExtractedMetadata { |
||||||
|
return { |
||||||
|
nostrLinks: extractNostrLinks(content), |
||||||
|
wikilinks: extractWikilinks(content), |
||||||
|
hashtags: extractHashtags(content), |
||||||
|
links: extractLinks(content, linkBaseURL), |
||||||
|
media: extractMedia(content), |
||||||
|
}; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Extract Nostr links from content |
||||||
|
*/ |
||||||
|
function extractNostrLinks(content: string): NostrLink[] { |
||||||
|
const nostrLinks: NostrLink[] = []; |
||||||
|
const seen = new Set<string>(); |
||||||
|
|
||||||
|
// Extract nostr: prefixed links (valid bech32 format)
|
||||||
|
const nostrMatches = content.match(/nostr:((?:npub|nprofile|nevent|naddr|note)1[a-z0-9]{6,})/gi) || []; |
||||||
|
nostrMatches.forEach(match => { |
||||||
|
const id = match.substring(6); // Remove 'nostr:'
|
||||||
|
const type = getNostrType(id); |
||||||
|
if (type && !seen.has(id)) { |
||||||
|
seen.add(id); |
||||||
|
nostrLinks.push({ |
||||||
|
type, |
||||||
|
id, |
||||||
|
text: match, |
||||||
|
bech32: id, |
||||||
|
}); |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
return nostrLinks; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Extract wikilinks from content |
||||||
|
*/ |
||||||
|
function extractWikilinks(content: string): Wikilink[] { |
||||||
|
const wikilinks: Wikilink[] = []; |
||||||
|
const seen = new Set<string>(); |
||||||
|
|
||||||
|
// Match [[target]] or [[target|display]]
|
||||||
|
const wikilinkPattern = /\[\[([^|\]]+)(?:\|([^\]]+))?\]\]/g; |
||||||
|
let match; |
||||||
|
|
||||||
|
while ((match = wikilinkPattern.exec(content)) !== null) { |
||||||
|
const target = match[1].trim(); |
||||||
|
const display = match[2] ? match[2].trim() : target; |
||||||
|
const dtag = normalizeDtag(target); |
||||||
|
const key = `${dtag}|${display}`; |
||||||
|
|
||||||
|
if (!seen.has(key)) { |
||||||
|
seen.add(key); |
||||||
|
wikilinks.push({ |
||||||
|
dtag, |
||||||
|
display, |
||||||
|
original: match[0], |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return wikilinks; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Extract hashtags from content |
||||||
|
* Excludes hashtags in URLs, code blocks, and inline code |
||||||
|
*/ |
||||||
|
function extractHashtags(content: string): string[] { |
||||||
|
const hashtags: string[] = []; |
||||||
|
const seen = new Set<string>(); |
||||||
|
|
||||||
|
// Remove code blocks first to avoid matching inside them
|
||||||
|
const codeBlockPattern = /```[\s\S]*?```/g; |
||||||
|
const inlineCodePattern = /`[^`]+`/g; |
||||||
|
const urlPattern = /https?:\/\/[^\s<>"']+/g; |
||||||
|
|
||||||
|
let processedContent = content |
||||||
|
.replace(codeBlockPattern, '') // Remove code blocks
|
||||||
|
.replace(inlineCodePattern, '') // Remove inline code
|
||||||
|
.replace(urlPattern, ''); // Remove URLs
|
||||||
|
|
||||||
|
// Extract hashtags: #hashtag (word boundary to avoid matching in URLs)
|
||||||
|
const hashtagPattern = /\B#([a-zA-Z0-9_]+)/g; |
||||||
|
let match; |
||||||
|
|
||||||
|
while ((match = hashtagPattern.exec(processedContent)) !== null) { |
||||||
|
const tag = match[1].toLowerCase(); |
||||||
|
if (!seen.has(tag)) { |
||||||
|
hashtags.push(tag); |
||||||
|
seen.add(tag); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return hashtags; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Extract regular links from content |
||||||
|
*/ |
||||||
|
function extractLinks(content: string, linkBaseURL: string): Array<{ url: string; text: string; isExternal: boolean }> { |
||||||
|
const links: Array<{ url: string; text: string; isExternal: boolean }> = []; |
||||||
|
const seen = new Set<string>(); |
||||||
|
|
||||||
|
// Extract markdown links: [text](url) - optimized to avoid double matching
|
||||||
|
const markdownLinkPattern = /\[([^\]]+)\]\(([^)]+)\)/g; |
||||||
|
let markdownMatch; |
||||||
|
while ((markdownMatch = markdownLinkPattern.exec(content)) !== null) { |
||||||
|
const [, text, url] = markdownMatch; |
||||||
|
if (!seen.has(url) && !isNostrUrl(url)) { |
||||||
|
seen.add(url); |
||||||
|
links.push({ |
||||||
|
url, |
||||||
|
text, |
||||||
|
isExternal: isExternalUrl(url, linkBaseURL), |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Extract asciidoc links: link:url[text] - optimized to avoid double matching
|
||||||
|
const asciidocLinkPattern = /link:([^\[]+)\[([^\]]+)\]/g; |
||||||
|
let asciidocMatch; |
||||||
|
while ((asciidocMatch = asciidocLinkPattern.exec(content)) !== null) { |
||||||
|
const [, url, text] = asciidocMatch; |
||||||
|
if (!seen.has(url) && !isNostrUrl(url)) { |
||||||
|
seen.add(url); |
||||||
|
links.push({ |
||||||
|
url, |
||||||
|
text, |
||||||
|
isExternal: isExternalUrl(url, linkBaseURL), |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Extract raw URLs (basic pattern)
|
||||||
|
const urlPattern = /https?:\/\/[^\s<>"']+/g; |
||||||
|
const rawUrls = content.match(urlPattern) || []; |
||||||
|
rawUrls.forEach(url => { |
||||||
|
if (!seen.has(url) && !isNostrUrl(url)) { |
||||||
|
seen.add(url); |
||||||
|
links.push({ |
||||||
|
url, |
||||||
|
text: url, |
||||||
|
isExternal: isExternalUrl(url, linkBaseURL), |
||||||
|
}); |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
return links; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Extract media URLs from content |
||||||
|
*/ |
||||||
|
function extractMedia(content: string): string[] { |
||||||
|
const media: string[] = []; |
||||||
|
const seen = new Set<string>(); |
||||||
|
|
||||||
|
// Extract markdown images:  - optimized to avoid double matching
|
||||||
|
const markdownImagePattern = /!\[[^\]]*\]\(([^)]+)\)/g; |
||||||
|
let markdownImageMatch; |
||||||
|
while ((markdownImageMatch = markdownImagePattern.exec(content)) !== null) { |
||||||
|
const url = markdownImageMatch[1]; |
||||||
|
if (url && !seen.has(url)) { |
||||||
|
if (isImageUrl(url) || isVideoUrl(url)) { |
||||||
|
media.push(url); |
||||||
|
seen.add(url); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Extract asciidoc images: image::url[alt] - optimized to avoid double matching
|
||||||
|
const asciidocImagePattern = /image::([^\[]+)\[/g; |
||||||
|
let asciidocImageMatch; |
||||||
|
while ((asciidocImageMatch = asciidocImagePattern.exec(content)) !== null) { |
||||||
|
const url = asciidocImageMatch[1]; |
||||||
|
if (url && !seen.has(url)) { |
||||||
|
if (isImageUrl(url) || isVideoUrl(url)) { |
||||||
|
media.push(url); |
||||||
|
seen.add(url); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Extract raw image/video URLs
|
||||||
|
const urlPattern = /https?:\/\/[^\s<>"']+/g; |
||||||
|
const rawUrls = content.match(urlPattern) || []; |
||||||
|
rawUrls.forEach(url => { |
||||||
|
if (!seen.has(url) && (isImageUrl(url) || isVideoUrl(url))) { |
||||||
|
media.push(url); |
||||||
|
seen.add(url); |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
return media; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Get Nostr identifier type |
||||||
|
*/ |
||||||
|
function getNostrType(id: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'note' | null { |
||||||
|
if (id.startsWith('npub')) return 'npub'; |
||||||
|
if (id.startsWith('nprofile')) return 'nprofile'; |
||||||
|
if (id.startsWith('nevent')) return 'nevent'; |
||||||
|
if (id.startsWith('naddr')) return 'naddr'; |
||||||
|
if (id.startsWith('note')) return 'note'; |
||||||
|
return null; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Normalize text to d-tag format |
||||||
|
*/ |
||||||
|
function normalizeDtag(text: string): string { |
||||||
|
return text |
||||||
|
.toLowerCase() |
||||||
|
.replace(/[^a-z0-9]+/g, '-') |
||||||
|
.replace(/^-+|-+$/g, ''); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Check if URL is external |
||||||
|
*/ |
||||||
|
function isExternalUrl(url: string, linkBaseURL: string): boolean { |
||||||
|
if (!linkBaseURL) return true; |
||||||
|
try { |
||||||
|
// Use a simple string-based check for Node.js compatibility
|
||||||
|
// Extract hostname from URL string
|
||||||
|
const urlMatch = url.match(/^https?:\/\/([^\/]+)/); |
||||||
|
const baseMatch = linkBaseURL.match(/^https?:\/\/([^\/]+)/); |
||||||
|
|
||||||
|
if (urlMatch && baseMatch) { |
||||||
|
return urlMatch[1] !== baseMatch[1]; |
||||||
|
} |
||||||
|
return true; |
||||||
|
} catch { |
||||||
|
return true; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Check if URL is a Nostr URL |
||||||
|
*/ |
||||||
|
function isNostrUrl(url: string): boolean { |
||||||
|
return url.startsWith('nostr:') || getNostrType(url) !== null; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Check if URL is an image |
||||||
|
*/ |
||||||
|
function isImageUrl(url: string): boolean { |
||||||
|
return /\.(jpeg|jpg|png|gif|webp|svg)$/i.test(url); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Check if URL is a video |
||||||
|
*/ |
||||||
|
function isVideoUrl(url: string): boolean { |
||||||
|
return /\.(mp4|webm|ogg)$/i.test(url); |
||||||
|
} |
||||||
@ -1,562 +0,0 @@ |
|||||||
import { ParserOptions, NostrLink, Wikilink } from './types'; |
|
||||||
|
|
||||||
/** |
|
||||||
* Extract and process wikilinks, hashtags, and nostr: addresses from HTML |
|
||||||
*/ |
|
||||||
export interface PostProcessResult { |
|
||||||
html: string; |
|
||||||
nostrLinks: NostrLink[]; |
|
||||||
wikilinks: Wikilink[]; |
|
||||||
hashtags: string[]; |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* Post-process HTML to convert wikilinks, hashtags, and nostr: addresses |
|
||||||
* @param skipWikilinksAndHashtags - If true, skip processing wikilinks and hashtags (already processed) |
|
||||||
*/ |
|
||||||
export function postProcess(html: string, options: ParserOptions, skipWikilinksAndHashtags: boolean = false): PostProcessResult { |
|
||||||
let processed = html; |
|
||||||
const nostrLinks: NostrLink[] = []; |
|
||||||
const wikilinks: Wikilink[] = []; |
|
||||||
const hashtags: string[] = []; |
|
||||||
|
|
||||||
// First, mark code blocks to avoid processing inside them
|
|
||||||
const codeBlockMarkers: Array<{ start: number; end: number }> = []; |
|
||||||
const codeBlockRegex = /<(pre|code)[^>]*>[\s\S]*?<\/\1>/gi; |
|
||||||
let match; |
|
||||||
while ((match = codeBlockRegex.exec(html)) !== null) { |
|
||||||
codeBlockMarkers.push({ start: match.index, end: match.index + match[0].length }); |
|
||||||
} |
|
||||||
|
|
||||||
function isInCodeBlock(index: number): boolean { |
|
||||||
return codeBlockMarkers.some(marker => index >= marker.start && index < marker.end); |
|
||||||
} |
|
||||||
|
|
||||||
// Process nostr: addresses (but not in code blocks)
|
|
||||||
if (options.enableNostrAddresses !== false) { |
|
||||||
const nostrRegex = /nostr:([np][a-z0-9]+1[a-z0-9]+)/gi; |
|
||||||
const replacements: Array<{ match: string; replacement: string; index: number }> = []; |
|
||||||
|
|
||||||
while ((match = nostrRegex.exec(processed)) !== null) { |
|
||||||
if (isInCodeBlock(match.index)) continue; |
|
||||||
|
|
||||||
const bech32 = match[1]; |
|
||||||
const type = getNostrType(bech32); |
|
||||||
if (!type) continue; |
|
||||||
|
|
||||||
const link: NostrLink = { |
|
||||||
type, |
|
||||||
id: bech32, |
|
||||||
text: match[0], |
|
||||||
bech32: bech32 |
|
||||||
}; |
|
||||||
nostrLinks.push(link); |
|
||||||
|
|
||||||
const url = options.linkBaseURL
|
|
||||||
? `${options.linkBaseURL}/nostr/${bech32}` |
|
||||||
: `#nostr-${bech32}`; |
|
||||||
|
|
||||||
replacements.push({ |
|
||||||
match: match[0], |
|
||||||
replacement: `<a href="${escapeHtml(url)}" class="nostr-link" data-nostr-type="${type}" data-nostr-id="${escapeHtml(bech32)}">${escapeHtml(match[0])}</a>`, |
|
||||||
index: match.index |
|
||||||
}); |
|
||||||
} |
|
||||||
|
|
||||||
// Apply replacements in reverse order to preserve indices
|
|
||||||
replacements.reverse().forEach(({ match, replacement, index }) => { |
|
||||||
processed = processed.substring(0, index) + replacement + processed.substring(index + match.length); |
|
||||||
}); |
|
||||||
} |
|
||||||
|
|
||||||
// Process wikilinks: [[dtag]] or [[dtag|display]] (but not in code blocks)
|
|
||||||
// Skip if already processed (for AsciiDoc)
|
|
||||||
if (!skipWikilinksAndHashtags) { |
|
||||||
const wikilinkRegex = /\[\[([^\]]+)\]\]/g; |
|
||||||
const wikilinkReplacements: Array<{ match: string; replacement: string; index: number }> = []; |
|
||||||
|
|
||||||
while ((match = wikilinkRegex.exec(processed)) !== null) { |
|
||||||
if (isInCodeBlock(match.index)) continue; |
|
||||||
|
|
||||||
// Skip if already inside a link tag
|
|
||||||
const beforeMatch = processed.substring(0, match.index); |
|
||||||
const lastOpenTag = beforeMatch.lastIndexOf('<a'); |
|
||||||
const lastCloseTag = beforeMatch.lastIndexOf('</a>'); |
|
||||||
if (lastOpenTag > lastCloseTag) continue; // Inside a link
|
|
||||||
|
|
||||||
const content = match[1]; |
|
||||||
const parts = content.split('|'); |
|
||||||
const dtag = parts[0].trim(); |
|
||||||
const display = parts.length > 1 ? parts.slice(1).join('|').trim() : dtag; |
|
||||||
|
|
||||||
const wikilink: Wikilink = { |
|
||||||
dtag, |
|
||||||
display, |
|
||||||
original: match[0] |
|
||||||
}; |
|
||||||
wikilinks.push(wikilink); |
|
||||||
|
|
||||||
let url: string; |
|
||||||
if (typeof options.wikilinkUrl === 'function') { |
|
||||||
url = options.wikilinkUrl(dtag); |
|
||||||
} else if (typeof options.wikilinkUrl === 'string') { |
|
||||||
url = options.wikilinkUrl.replace('{dtag}', encodeURIComponent(dtag)); |
|
||||||
} else { |
|
||||||
url = options.linkBaseURL
|
|
||||||
? `${options.linkBaseURL}/events?d=${encodeURIComponent(dtag)}` |
|
||||||
: `#${encodeURIComponent(dtag)}`; |
|
||||||
} |
|
||||||
|
|
||||||
wikilinkReplacements.push({ |
|
||||||
match: match[0], |
|
||||||
replacement: `<a href="${escapeHtml(url)}" class="wikilink" data-dtag="${escapeHtml(dtag)}">${escapeHtml(display)}</a>`, |
|
||||||
index: match.index |
|
||||||
}); |
|
||||||
} |
|
||||||
|
|
||||||
// Apply wikilink replacements in reverse order
|
|
||||||
wikilinkReplacements.reverse().forEach(({ match, replacement, index }) => { |
|
||||||
processed = processed.substring(0, index) + replacement + processed.substring(index + match.length); |
|
||||||
}); |
|
||||||
|
|
||||||
// Process hashtags: #hashtag (but not in code blocks or inside HTML tags)
|
|
||||||
// Match hashtag at start of string, after whitespace, after >, or immediately after opening tags
|
|
||||||
const hashtagRegex = /(#[\w-]+)/g; |
|
||||||
const hashtagReplacements: Array<{ match: string; replacement: string; index: number }> = []; |
|
||||||
|
|
||||||
while ((match = hashtagRegex.exec(processed)) !== null) { |
|
||||||
if (isInCodeBlock(match.index)) continue; |
|
||||||
|
|
||||||
// Check if we're inside an HTML tag
|
|
||||||
const beforeMatch = processed.substring(0, match.index); |
|
||||||
const lastOpenTag = beforeMatch.lastIndexOf('<'); |
|
||||||
const lastCloseTag = beforeMatch.lastIndexOf('>'); |
|
||||||
if (lastOpenTag > lastCloseTag) continue; // Inside a tag
|
|
||||||
|
|
||||||
// Skip if already inside a link or span
|
|
||||||
const lastLinkOpen = beforeMatch.lastIndexOf('<a'); |
|
||||||
const lastLinkClose = beforeMatch.lastIndexOf('</a>'); |
|
||||||
const lastSpanOpen = beforeMatch.lastIndexOf('<span'); |
|
||||||
const lastSpanClose = beforeMatch.lastIndexOf('</span>'); |
|
||||||
if (lastLinkOpen > lastLinkClose || lastSpanOpen > lastSpanClose) continue; |
|
||||||
|
|
||||||
// Check what's before the hashtag
|
|
||||||
const charBefore = match.index > 0 ? processed[match.index - 1] : ''; |
|
||||||
const beforeHashtag = processed.substring(Math.max(0, match.index - 100), match.index); |
|
||||||
const lastTagClose = beforeHashtag.lastIndexOf('>'); |
|
||||||
const textAfterTag = beforeHashtag.substring(lastTagClose + 1); |
|
||||||
|
|
||||||
// Hashtag is valid if:
|
|
||||||
// 1. At start of string
|
|
||||||
// 2. Preceded by whitespace
|
|
||||||
// 3. Preceded by >
|
|
||||||
// 4. Immediately after opening tag (like <p>#hashtag)
|
|
||||||
const isValidPosition =
|
|
||||||
match.index === 0 || |
|
||||||
/\s/.test(charBefore) || |
|
||||||
charBefore === '>' || |
|
||||||
(lastTagClose >= 0 && /^[\s\n]*$/.test(textAfterTag)); |
|
||||||
|
|
||||||
if (!isValidPosition) continue; |
|
||||||
|
|
||||||
const hashtag = match[1]; |
|
||||||
const topic = hashtag.substring(1); |
|
||||||
const prefix = (match.index === 0 || charBefore === '>' || (lastTagClose >= 0 && /^[\s\n]*$/.test(textAfterTag)))
|
|
||||||
? ''
|
|
||||||
: charBefore; |
|
||||||
|
|
||||||
if (!hashtags.includes(topic)) { |
|
||||||
hashtags.push(topic); |
|
||||||
} |
|
||||||
|
|
||||||
let url: string | undefined; |
|
||||||
if (typeof options.hashtagUrl === 'function') { |
|
||||||
url = options.hashtagUrl(topic); |
|
||||||
} else if (typeof options.hashtagUrl === 'string') { |
|
||||||
url = options.hashtagUrl.replace('{topic}', encodeURIComponent(topic)); |
|
||||||
} |
|
||||||
|
|
||||||
const replacement = url |
|
||||||
? `${prefix}<a href="${escapeHtml(url)}" class="hashtag" data-topic="${escapeHtml(topic)}">${escapeHtml(hashtag)}</a>` |
|
||||||
: `${prefix}<span class="hashtag" data-topic="${escapeHtml(topic)}">${escapeHtml(hashtag)}</span>`; |
|
||||||
|
|
||||||
hashtagReplacements.push({ |
|
||||||
match: match[0], |
|
||||||
replacement, |
|
||||||
index: match.index |
|
||||||
}); |
|
||||||
} |
|
||||||
|
|
||||||
// Apply hashtag replacements in reverse order
|
|
||||||
hashtagReplacements.reverse().forEach(({ match, replacement, index }) => { |
|
||||||
processed = processed.substring(0, index) + replacement + processed.substring(index + match.length); |
|
||||||
}); |
|
||||||
} |
|
||||||
|
|
||||||
// Extract wikilinks and hashtags from already-processed HTML (for AsciiDoc)
|
|
||||||
if (skipWikilinksAndHashtags) { |
|
||||||
// Extract wikilinks from existing links
|
|
||||||
const wikilinkLinkRegex = /<a[^>]+class="wikilink"[^>]+data-dtag="([^"]+)"[^>]*>([^<]+)<\/a>/g; |
|
||||||
while ((match = wikilinkLinkRegex.exec(processed)) !== null) { |
|
||||||
wikilinks.push({ |
|
||||||
dtag: match[1], |
|
||||||
display: match[2], |
|
||||||
original: match[0] |
|
||||||
}); |
|
||||||
} |
|
||||||
|
|
||||||
// Extract hashtags from existing spans/links
|
|
||||||
const hashtagRegex = /<(?:a|span)[^>]+class="hashtag"[^>]+data-topic="([^"]+)"[^>]*>#\1<\/\w+>/g; |
|
||||||
while ((match = hashtagRegex.exec(processed)) !== null) { |
|
||||||
const topic = match[1]; |
|
||||||
if (!hashtags.includes(topic)) { |
|
||||||
hashtags.push(topic); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
// Remove links inside code blocks (both <code> and <pre> tags)
|
|
||||||
// This ensures URLs in code blocks remain as plain text
|
|
||||||
const codeBlockLinkRegex = /(<(?:code|pre)[^>]*>)([\s\S]*?)(<\/(?:code|pre)>)/gi; |
|
||||||
processed = processed.replace(codeBlockLinkRegex, (match, openTag, content, closeTag) => { |
|
||||||
// Remove all <a> tags inside code blocks, keeping only the text content
|
|
||||||
const cleanedContent = content.replace(/<a[^>]*>(.*?)<\/a>/gi, '$1'); |
|
||||||
return openTag + cleanedContent + closeTag; |
|
||||||
}); |
|
||||||
|
|
||||||
// Process YouTube URLs - ORDER IS CRITICAL to avoid double-parsing
|
|
||||||
// 1. FIRST: Fix video tags that contain YouTube URLs (before they get processed as bare URLs)
|
|
||||||
// AsciiDoc's video:: macro creates <video> tags, but YouTube URLs should be iframes
|
|
||||||
const youtubeVideoTagRegex = /<video[^>]+src="(https?:\/\/(?:www\.)?(?:youtube\.com\/(?:watch\?v=|shorts\/)|youtu\.be\/)([a-zA-Z0-9_-]+))"[^>]*>[\s\S]*?<\/video>/gi; |
|
||||||
processed = processed.replace(youtubeVideoTagRegex, (match, url, videoId) => { |
|
||||||
const embedUrl = `https://www.youtube.com/embed/${videoId}?enablejsapi=1`; |
|
||||||
return `<iframe class="youtube-embed" frameborder="0" allow="encrypted-media; fullscreen; picture-in-picture; web-share" referrerpolicy='strict-origin-when-cross-origin' width="100%" height="360" src="${escapeHtml(embedUrl)}"></iframe>`; |
|
||||||
}); |
|
||||||
|
|
||||||
// 2. SECOND: Process YouTube links in <a> tags
|
|
||||||
// IMPORTANT: Be very specific with YouTube regex to avoid matching Spotify URLs
|
|
||||||
const youtubeLinkRegex = /<a[^>]+href="(https?:\/\/(?:www\.)?(?:youtube\.com\/(?:watch\?v=|shorts\/)|youtu\.be\/)([a-zA-Z0-9_-]+))"[^>]*>.*?<\/a>/gi; |
|
||||||
processed = processed.replace(youtubeLinkRegex, (match, url, videoId) => { |
|
||||||
if (isInCodeBlock(processed.indexOf(match))) return match; |
|
||||||
const embedUrl = `https://www.youtube.com/embed/${videoId}?enablejsapi=1`; |
|
||||||
return `<iframe class="youtube-embed" frameborder="0" allow="encrypted-media; fullscreen; picture-in-picture; web-share" referrerpolicy='strict-origin-when-cross-origin' width="100%" height="360" src="${escapeHtml(embedUrl)}"></iframe>`; |
|
||||||
}); |
|
||||||
|
|
||||||
// 3. THIRD: Fix malformed YouTube iframes from AsciiDoc video:: macro
|
|
||||||
// AsciiDoc sometimes creates iframes with malformed YouTube URLs (watch?v= or shorts/ instead of embed/)
|
|
||||||
// Match the entire iframe element including closing tag to avoid duplicates
|
|
||||||
const malformedYoutubeIframeRegex = /<iframe[^>]+src="[^"]*youtube[^"]*(?:watch\?v=|shorts\/)([a-zA-Z0-9_-]+)[^"]*"[^>]*(?:\/>|>[\s\S]*?<\/iframe>)/gi; |
|
||||||
processed = processed.replace(malformedYoutubeIframeRegex, (match, videoId) => { |
|
||||||
const embedUrl = `https://www.youtube.com/embed/${videoId}?enablejsapi=1`; |
|
||||||
return `<iframe class="youtube-embed" frameborder="0" allow="encrypted-media; fullscreen; picture-in-picture; web-share" referrerpolicy='strict-origin-when-cross-origin' width="100%" height="360" src="${escapeHtml(embedUrl)}"></iframe>`; |
|
||||||
}); |
|
||||||
|
|
||||||
// 3.5: Fix YouTube iframes with embed URLs but wrong parameters or missing required attributes
|
|
||||||
// AsciiDoc's video:: macro creates iframes with ?rel=0 or missing allow/referrerpolicy attributes
|
|
||||||
// Match iframes with embed URLs that don't have enablejsapi=1 or are missing required attributes
|
|
||||||
const incompleteYoutubeIframeRegex = /<iframe[^>]+src="https?:\/\/(?:www\.)?youtube\.com\/embed\/([a-zA-Z0-9_-]+)(\?[^"]*)?"[^>]*(?:\/>|>[\s\S]*?<\/iframe>)/gi; |
|
||||||
processed = processed.replace(incompleteYoutubeIframeRegex, (match, videoId, params) => { |
|
||||||
// Check if this iframe already has the correct format (has enablejsapi=1 and required attributes)
|
|
||||||
if (match.includes('enablejsapi=1') &&
|
|
||||||
match.includes('allow=') &&
|
|
||||||
match.includes('referrerpolicy=') && |
|
||||||
match.includes('class="youtube-embed"')) { |
|
||||||
return match; // Already correct, don't modify
|
|
||||||
} |
|
||||||
// Fix the iframe with proper attributes
|
|
||||||
const embedUrl = `https://www.youtube.com/embed/${videoId}?enablejsapi=1`; |
|
||||||
return `<iframe class="youtube-embed" frameborder="0" allow="encrypted-media; fullscreen; picture-in-picture; web-share" referrerpolicy='strict-origin-when-cross-origin' width="100%" height="360" src="${escapeHtml(embedUrl)}"></iframe>`; |
|
||||||
}); |
|
||||||
|
|
||||||
// 4. FOURTH: Fix any existing YouTube iframes that have malformed embed URLs (AsciiDoc sometimes creates broken embed URLs)
|
|
||||||
// Match the entire iframe element including closing tag to avoid duplicates
|
|
||||||
const brokenYoutubeIframeRegex = /<iframe[^>]+src="[^"]*youtube\.com\/embed\/[^"]*watch\?v=([a-zA-Z0-9_-]+)[^"]*"[^>]*(?:\/>|>[\s\S]*?<\/iframe>)/gi; |
|
||||||
processed = processed.replace(brokenYoutubeIframeRegex, (match, videoId) => { |
|
||||||
const embedUrl = `https://www.youtube.com/embed/${videoId}?enablejsapi=1`; |
|
||||||
return `<iframe class="youtube-embed" frameborder="0" allow="encrypted-media; fullscreen; picture-in-picture; web-share" referrerpolicy='strict-origin-when-cross-origin' width="100%" height="360" src="${escapeHtml(embedUrl)}"></iframe>`; |
|
||||||
}); |
|
||||||
|
|
||||||
// 5. LAST: Handle bare YouTube URLs (not in links, video tags, or iframes)
|
|
||||||
// IMPORTANT: Match must be specific to youtube.com or youtu.be to avoid matching Spotify
|
|
||||||
// This must come AFTER processing video tags and links to avoid double-parsing
|
|
||||||
const bareYoutubeRegex = /(https?:\/\/(?:www\.)?(?:youtube\.com\/(?:watch\?v=|shorts\/)|youtu\.be\/)([a-zA-Z0-9_-]+)(?:\?[^"\s<>]*)?)/gi; |
|
||||||
const youtubeReplacements: Array<{ match: string; replacement: string; index: number }> = []; |
|
||||||
while ((match = bareYoutubeRegex.exec(processed)) !== null) { |
|
||||||
if (isInCodeBlock(match.index)) continue; |
|
||||||
|
|
||||||
// Check if it's already in a tag (link, iframe, video, etc.)
|
|
||||||
// Simple approach: check if we're inside quotes (attribute value) or between <tag and >
|
|
||||||
const before = processed.substring(Math.max(0, match.index - 500), match.index); |
|
||||||
const after = processed.substring(match.index, match.index + match[0].length + 100); |
|
||||||
|
|
||||||
// Check if URL is inside quotes (attribute value like src="..." or href="...")
|
|
||||||
const beforeContext = before.substring(Math.max(0, before.length - 100)); |
|
||||||
if (beforeContext.match(/<(iframe|video|a|img|audio|source)[^>]*\s+(src|href)="[^"]*$/i)) { |
|
||||||
continue; // Inside an attribute value, skip
|
|
||||||
} |
|
||||||
|
|
||||||
// Check if we're between an opening tag and its closing bracket
|
|
||||||
const lastOpenTag = before.lastIndexOf('<'); |
|
||||||
const lastCloseBracket = before.lastIndexOf('>'); |
|
||||||
if (lastOpenTag > lastCloseBracket) { |
|
||||||
// We're inside a tag, check what kind
|
|
||||||
const tagContent = before.substring(lastOpenTag); |
|
||||||
if (/<(iframe|video|a|img|audio|source)[^>]*$/i.test(tagContent)) { |
|
||||||
continue; // Skip URLs inside these tags
|
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
const videoId = match[2]; |
|
||||||
const embedUrl = `https://www.youtube.com/embed/${videoId}?enablejsapi=1`; |
|
||||||
youtubeReplacements.push({ |
|
||||||
match: match[0], |
|
||||||
replacement: `<iframe class="youtube-embed" frameborder="0" allowfullscreen allow="accelerometer; autoplay; clipboard-write; encrypted-media; fullscreen; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" width="100%" height="360" src="${escapeHtml(embedUrl)}"></iframe>`, |
|
||||||
index: match.index |
|
||||||
}); |
|
||||||
} |
|
||||||
youtubeReplacements.reverse().forEach(({ match, replacement, index }) => { |
|
||||||
processed = processed.substring(0, index) + replacement + processed.substring(index + match.length); |
|
||||||
}); |
|
||||||
|
|
||||||
// Fix double-closed iframes (safety net)
|
|
||||||
processed = processed.replace(/<\/iframe><\/iframe>/gi, '</iframe>'); |
|
||||||
|
|
||||||
// Spotify: https://open.spotify.com/episode/ID or https://open.spotify.com/track/ID or https://open.spotify.com/album/ID
|
|
||||||
const spotifyLinkRegex = /<a[^>]+href="(https?:\/\/open\.spotify\.com\/(episode|track|album|playlist)\/([a-zA-Z0-9]+))[^"]*"[^>]*>.*?<\/a>/gi; |
|
||||||
processed = processed.replace(spotifyLinkRegex, (match, url, type, id) => { |
|
||||||
if (isInCodeBlock(processed.indexOf(match))) return match; |
|
||||||
const embedUrl = `https://open.spotify.com/embed/${type}/${id}`; |
|
||||||
return `<iframe src="${escapeHtml(embedUrl)}" width="100%" height="352" frameborder="0" allowtransparency="true" allow="encrypted-media" class="spotify-embed"></iframe>`; |
|
||||||
}); |
|
||||||
|
|
||||||
// Also handle bare Spotify URLs (not in links)
|
|
||||||
const bareSpotifyRegex = /(https?:\/\/open\.spotify\.com\/(episode|track|album|playlist)\/([a-zA-Z0-9]+)(?:\?[^"\s<>]*)?)/gi; |
|
||||||
const spotifyReplacements: Array<{ match: string; replacement: string; index: number }> = []; |
|
||||||
while ((match = bareSpotifyRegex.exec(processed)) !== null) { |
|
||||||
if (isInCodeBlock(match.index)) continue; |
|
||||||
// Check if it's already in a tag
|
|
||||||
const before = processed.substring(0, match.index); |
|
||||||
const lastOpenTag = before.lastIndexOf('<'); |
|
||||||
const lastCloseTag = before.lastIndexOf('>'); |
|
||||||
if (lastOpenTag > lastCloseTag) continue; // Inside a tag
|
|
||||||
|
|
||||||
const type = match[2]; |
|
||||||
const id = match[3]; |
|
||||||
const embedUrl = `https://open.spotify.com/embed/${type}/${id}`; |
|
||||||
spotifyReplacements.push({ |
|
||||||
match: match[0], |
|
||||||
replacement: `<iframe src="${escapeHtml(embedUrl)}" width="100%" height="352" frameborder="0" allowtransparency="true" allow="encrypted-media" class="spotify-embed"></iframe>`, |
|
||||||
index: match.index |
|
||||||
}); |
|
||||||
} |
|
||||||
spotifyReplacements.reverse().forEach(({ match, replacement, index }) => { |
|
||||||
processed = processed.substring(0, index) + replacement + processed.substring(index + match.length); |
|
||||||
}); |
|
||||||
|
|
||||||
// Process bare image/media URLs that aren't already in tags
|
|
||||||
// First, convert bare links (class="bare") that contain image/video/audio URLs to actual media elements
|
|
||||||
// This handles cases where AsciiDoc has already converted URLs to links
|
|
||||||
// IMPORTANT: Check YouTube FIRST, then Spotify, BEFORE checking file extensions to avoid conflicts
|
|
||||||
const bareLinkRegex = /<a[^>]+href="(https?:\/\/[^"]+)"[^>]*class="[^"]*bare[^"]*"[^>]*>([^<]*)<\/a>/gi; |
|
||||||
processed = processed.replace(bareLinkRegex, (match, url, linkText) => { |
|
||||||
if (isInCodeBlock(processed.indexOf(match))) return match; |
|
||||||
|
|
||||||
// Check YouTube URLs FIRST (be very specific - must be youtube.com or youtu.be)
|
|
||||||
// This prevents accidentally matching Spotify URLs
|
|
||||||
const youtubeMatch = url.match(/https?:\/\/(?:www\.)?(?:youtube\.com\/(?:watch\?v=|shorts\/)|youtu\.be\/)([a-zA-Z0-9_-]+)/); |
|
||||||
if (youtubeMatch) { |
|
||||||
const videoId = youtubeMatch[1]; |
|
||||||
const embedUrl = `https://www.youtube.com/embed/${videoId}?enablejsapi=1`; |
|
||||||
return `<iframe class="youtube-embed" frameborder="0" allow="encrypted-media; fullscreen; picture-in-picture; web-share" referrerpolicy='strict-origin-when-cross-origin' width="100%" height="360" src="${escapeHtml(embedUrl)}"></iframe>`; |
|
||||||
} |
|
||||||
|
|
||||||
// Check Spotify URLs (be very specific - must be open.spotify.com)
|
|
||||||
const spotifyMatch = url.match(/https?:\/\/open\.spotify\.com\/(episode|track|album|playlist)\/([a-zA-Z0-9]+)/); |
|
||||||
if (spotifyMatch) { |
|
||||||
const type = spotifyMatch[1]; |
|
||||||
const id = spotifyMatch[2]; |
|
||||||
const embedUrl = `https://open.spotify.com/embed/${type}/${id}`; |
|
||||||
return `<iframe src="${escapeHtml(embedUrl)}" width="100%" height="352" frameborder="0" allowtransparency="true" allow="encrypted-media" class="spotify-embed"></iframe>`; |
|
||||||
} |
|
||||||
|
|
||||||
// Check if it's an image URL
|
|
||||||
if (/\.(jpg|jpeg|png|gif|webp|svg|bmp)(\?|$)/i.test(url)) { |
|
||||||
return `<img src="${escapeHtml(url)}" alt="${escapeHtml(linkText)}" class="bare-image" />`; |
|
||||||
} |
|
||||||
// Check if it's a video URL (but not YouTube)
|
|
||||||
if (/\.(mp4|webm|ogg|mov|avi)(\?|$)/i.test(url)) { |
|
||||||
return `<video src="${escapeHtml(url)}" controls class="bare-video"></video>`; |
|
||||||
} |
|
||||||
// Check if it's an audio URL (but not Spotify)
|
|
||||||
if (/\.(mp3|wav|ogg|flac|aac|m4a)(\?|$)/i.test(url)) { |
|
||||||
return `<audio src="${escapeHtml(url)}" controls class="bare-audio"></audio>`; |
|
||||||
} |
|
||||||
|
|
||||||
// Not a media URL, return as-is
|
|
||||||
return match; |
|
||||||
}); |
|
||||||
|
|
||||||
// Now process bare URLs that aren't in any tags at all
|
|
||||||
// IMPORTANT: Skip YouTube and Spotify URLs - they're already processed above
|
|
||||||
const imageUrlRegex = /(https?:\/\/[^\s<>"']+\.(jpg|jpeg|png|gif|webp|svg|bmp))(?![^<]*>)/gi; |
|
||||||
const videoUrlRegex = /(https?:\/\/[^\s<>"']+\.(mp4|webm|ogg|mov|avi))(?![^<]*>)/gi; |
|
||||||
const audioUrlRegex = /(https?:\/\/[^\s<>"']+\.(mp3|wav|ogg|flac|aac|m4a))(?![^<]*>)/gi; |
|
||||||
|
|
||||||
// Check if URL is already in a tag
|
|
||||||
function isUrlInTag(url: string, index: number): boolean { |
|
||||||
const before = processed.substring(0, index); |
|
||||||
const after = processed.substring(index); |
|
||||||
|
|
||||||
// Check if it's inside an existing tag
|
|
||||||
const lastOpenTag = before.lastIndexOf('<'); |
|
||||||
const lastCloseTag = before.lastIndexOf('>'); |
|
||||||
if (lastOpenTag > lastCloseTag) { |
|
||||||
const tagContent = processed.substring(lastOpenTag, index + url.length); |
|
||||||
if (/<(img|video|audio|a|source|iframe)[^>]*>/i.test(tagContent)) { |
|
||||||
return true; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
return false; |
|
||||||
} |
|
||||||
|
|
||||||
const mediaReplacements: Array<{ match: string; replacement: string; index: number }> = []; |
|
||||||
|
|
||||||
// Process images
|
|
||||||
while ((match = imageUrlRegex.exec(processed)) !== null) { |
|
||||||
if (isInCodeBlock(match.index)) continue; |
|
||||||
if (isUrlInTag(match[0], match.index)) continue; |
|
||||||
|
|
||||||
const url = match[0]; |
|
||||||
mediaReplacements.push({ |
|
||||||
match: url, |
|
||||||
replacement: `<img src="${escapeHtml(url)}" alt="" class="bare-image" />`, |
|
||||||
index: match.index |
|
||||||
}); |
|
||||||
} |
|
||||||
|
|
||||||
// Process videos (but skip YouTube URLs - they're handled above)
|
|
||||||
while ((match = videoUrlRegex.exec(processed)) !== null) { |
|
||||||
if (isInCodeBlock(match.index)) continue; |
|
||||||
if (isUrlInTag(match[0], match.index)) continue; |
|
||||||
// Skip YouTube URLs - they should be embeds, not video tags
|
|
||||||
if (/youtube\.com|youtu\.be/i.test(match[0])) continue; |
|
||||||
|
|
||||||
const url = match[0]; |
|
||||||
mediaReplacements.push({ |
|
||||||
match: url, |
|
||||||
replacement: `<video src="${escapeHtml(url)}" controls class="bare-video"></video>`, |
|
||||||
index: match.index |
|
||||||
}); |
|
||||||
} |
|
||||||
|
|
||||||
// Process audio
|
|
||||||
while ((match = audioUrlRegex.exec(processed)) !== null) { |
|
||||||
if (isInCodeBlock(match.index)) continue; |
|
||||||
if (isUrlInTag(match[0], match.index)) continue; |
|
||||||
|
|
||||||
const url = match[0]; |
|
||||||
mediaReplacements.push({ |
|
||||||
match: url, |
|
||||||
replacement: `<audio src="${escapeHtml(url)}" controls class="bare-audio"></audio>`, |
|
||||||
index: match.index |
|
||||||
}); |
|
||||||
} |
|
||||||
|
|
||||||
// Apply media replacements in reverse order
|
|
||||||
mediaReplacements.reverse().forEach(({ match, replacement, index }) => { |
|
||||||
processed = processed.substring(0, index) + replacement + processed.substring(index + match.length); |
|
||||||
}); |
|
||||||
|
|
||||||
// Process markdown table alignment
|
|
||||||
// Marked generates tables with align attributes or style attributes, we need to add CSS classes for styling
|
|
||||||
// Match tables and process alignment on th/td elements
|
|
||||||
const tableRegex = /<table[^>]*>([\s\S]*?)<\/table>/gi; |
|
||||||
processed = processed.replace(tableRegex, (tableMatch: string, tableContent: string) => { |
|
||||||
// Process each row
|
|
||||||
let processedTable = tableContent; |
|
||||||
|
|
||||||
// Find all th and td elements - check for align attribute or style with text-align
|
|
||||||
const cellRegex = /<(th|td)([^>]*)>([\s\S]*?)<\/\1>/gi; |
|
||||||
processedTable = processedTable.replace(cellRegex, (cellMatch: string, tag: string, attrs: string, content: string) => { |
|
||||||
let align: string | null = null; |
|
||||||
let newAttrs = attrs; |
|
||||||
|
|
||||||
// Check for align attribute
|
|
||||||
const alignMatch = attrs.match(/align=["'](left|center|right)["']/i); |
|
||||||
if (alignMatch) { |
|
||||||
align = alignMatch[1].toLowerCase(); |
|
||||||
newAttrs = newAttrs.replace(/\s*align=["'](left|center|right)["']/i, ''); |
|
||||||
} else { |
|
||||||
// Check for style attribute with text-align
|
|
||||||
const styleMatch = attrs.match(/style=["']([^"']*text-align:\s*(left|center|right)[^"']*)["']/i); |
|
||||||
if (styleMatch) { |
|
||||||
align = styleMatch[2].toLowerCase(); |
|
||||||
// Remove text-align from style
|
|
||||||
const styleContent = styleMatch[1].replace(/text-align:\s*(left|center|right);?/gi, '').trim(); |
|
||||||
if (styleContent) { |
|
||||||
newAttrs = newAttrs.replace(/style=["'][^"']+["']/, `style="${styleContent}"`); |
|
||||||
} else { |
|
||||||
newAttrs = newAttrs.replace(/\s*style=["'][^"']+["']/, ''); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
// If we found alignment, add CSS class
|
|
||||||
if (align) { |
|
||||||
const alignClass = align === 'left' ? 'halign-left' :
|
|
||||||
align === 'center' ? 'halign-center' : 'halign-right'; |
|
||||||
|
|
||||||
// If there's already a class attribute, merge them
|
|
||||||
if (newAttrs.includes('class=')) { |
|
||||||
const classMatch = newAttrs.match(/class=["']([^"']+)["']/); |
|
||||||
if (classMatch) { |
|
||||||
const existingClass = classMatch[1]; |
|
||||||
if (!existingClass.includes(alignClass)) { |
|
||||||
newAttrs = newAttrs.replace(/class=["'][^"']+["']/, `class="${existingClass} ${alignClass}"`); |
|
||||||
} |
|
||||||
} |
|
||||||
} else { |
|
||||||
newAttrs = `${newAttrs} class="${alignClass}"`.trim(); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
return `<${tag}${newAttrs}>${content}</${tag}>`; |
|
||||||
}); |
|
||||||
|
|
||||||
return `<table>${processedTable}</table>`; |
|
||||||
}); |
|
||||||
|
|
||||||
return { |
|
||||||
html: processed, |
|
||||||
nostrLinks, |
|
||||||
wikilinks, |
|
||||||
hashtags |
|
||||||
}; |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* Get Nostr identifier type from bech32 string |
|
||||||
*/ |
|
||||||
function getNostrType(bech32: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'note' | null { |
|
||||||
if (bech32.startsWith('npub')) return 'npub'; |
|
||||||
if (bech32.startsWith('nprofile')) return 'nprofile'; |
|
||||||
if (bech32.startsWith('nevent')) return 'nevent'; |
|
||||||
if (bech32.startsWith('naddr')) return 'naddr'; |
|
||||||
if (bech32.startsWith('note')) return 'note'; |
|
||||||
return null; |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* Escape HTML special characters |
|
||||||
*/ |
|
||||||
function escapeHtml(text: string): string { |
|
||||||
const map: Record<string, string> = { |
|
||||||
'&': '&', |
|
||||||
'<': '<', |
|
||||||
'>': '>', |
|
||||||
'"': '"', |
|
||||||
"'": ''' |
|
||||||
}; |
|
||||||
return text.replace(/[&<>"']/g, (m) => map[m]); |
|
||||||
} |
|
||||||
@ -1,175 +0,0 @@ |
|||||||
import { ParserOptions, Wikilink } from './types'; |
|
||||||
import * as emoji from 'node-emoji'; |
|
||||||
|
|
||||||
/** |
|
||||||
* Pre-process raw content to handle wikilinks and hashtags before AsciiDoc conversion |
|
||||||
* This prevents AsciiDoc from converting them to anchors or other formats |
|
||||||
*/ |
|
||||||
export interface PreProcessResult { |
|
||||||
content: string; |
|
||||||
wikilinks: Wikilink[]; |
|
||||||
hashtags: string[]; |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* Pre-process content to convert wikilinks and hashtags to placeholders |
|
||||||
* that will be processed after HTML conversion |
|
||||||
*/ |
|
||||||
export function preProcessAsciiDoc(content: string, options: ParserOptions): PreProcessResult { |
|
||||||
let processed = content; |
|
||||||
const wikilinks: Wikilink[] = []; |
|
||||||
const hashtags: string[] = []; |
|
||||||
|
|
||||||
// Process emojis first
|
|
||||||
processed = emoji.emojify(processed); |
|
||||||
|
|
||||||
// Process wikilinks: [[dtag]] or [[dtag|display]]
|
|
||||||
// Replace with a placeholder that AsciiDoc won't touch
|
|
||||||
const wikilinkRegex = /\[\[([^\]]+)\]\]/g; |
|
||||||
const wikilinkPlaceholders: Map<string, Wikilink> = new Map(); |
|
||||||
let placeholderCounter = 0; |
|
||||||
|
|
||||||
processed = processed.replace(wikilinkRegex, (match, content) => { |
|
||||||
const parts = content.split('|'); |
|
||||||
const dtag = parts[0].trim(); |
|
||||||
const display = parts.length > 1 ? parts.slice(1).join('|').trim() : dtag; |
|
||||||
|
|
||||||
const wikilink: Wikilink = { |
|
||||||
dtag, |
|
||||||
display, |
|
||||||
original: match |
|
||||||
}; |
|
||||||
wikilinks.push(wikilink); |
|
||||||
|
|
||||||
// Use a unique placeholder that won't be processed by AsciiDoc
|
|
||||||
// Use angle brackets to avoid AsciiDoc formatting interpretation
|
|
||||||
const placeholder = `<WIKILINK_PLACEHOLDER_${placeholderCounter}>`; |
|
||||||
wikilinkPlaceholders.set(placeholder, wikilink); |
|
||||||
placeholderCounter++; |
|
||||||
|
|
||||||
return placeholder; |
|
||||||
}); |
|
||||||
|
|
||||||
// Process hashtags: #hashtag (but not in code blocks)
|
|
||||||
// Mark code blocks first
|
|
||||||
const codeBlockMarkers: Array<{ start: number; end: number }> = []; |
|
||||||
const codeBlockRegex = /\[source,[^\]]+\]|\[abc\]|\[plantuml\]|```|`[^`]+`/g; |
|
||||||
let match; |
|
||||||
while ((match = codeBlockRegex.exec(processed)) !== null) { |
|
||||||
// Find the end of the code block
|
|
||||||
const start = match.index; |
|
||||||
let end = start + match[0].length; |
|
||||||
|
|
||||||
// For source blocks, find the closing ----
|
|
||||||
if (match[0].startsWith('[source')) { |
|
||||||
const afterStart = processed.substring(end); |
|
||||||
const closeMatch = afterStart.match(/^[\s\S]*?----/); |
|
||||||
if (closeMatch) { |
|
||||||
end = start + match[0].length + closeMatch[0].length; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
codeBlockMarkers.push({ start, end }); |
|
||||||
} |
|
||||||
|
|
||||||
function isInCodeBlock(index: number): boolean { |
|
||||||
return codeBlockMarkers.some(marker => index >= marker.start && index < marker.end); |
|
||||||
} |
|
||||||
|
|
||||||
// Process hashtags
|
|
||||||
const hashtagPlaceholders: Map<string, string> = new Map(); |
|
||||||
let hashtagCounter = 0; |
|
||||||
|
|
||||||
// Match hashtags at start of line, after whitespace, or after > (for blockquotes)
|
|
||||||
const hashtagRegex = /(^|\s|>)(#[\w-]+)/gm; |
|
||||||
|
|
||||||
processed = processed.replace(hashtagRegex, (match, prefix, hashtag, offset) => { |
|
||||||
if (isInCodeBlock(offset)) return match; |
|
||||||
|
|
||||||
const topic = hashtag.substring(1); |
|
||||||
if (!hashtags.includes(topic)) { |
|
||||||
hashtags.push(topic); |
|
||||||
} |
|
||||||
|
|
||||||
// Use angle brackets to avoid AsciiDoc formatting interpretation
|
|
||||||
const placeholder = `<HASHTAG_PLACEHOLDER_${hashtagCounter}>`; |
|
||||||
hashtagPlaceholders.set(placeholder, topic); |
|
||||||
hashtagCounter++; |
|
||||||
|
|
||||||
return `${prefix}${placeholder}`; |
|
||||||
}); |
|
||||||
|
|
||||||
return { |
|
||||||
content: processed, |
|
||||||
wikilinks, |
|
||||||
hashtags |
|
||||||
}; |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* Restore wikilinks and hashtags from placeholders in HTML |
|
||||||
*/ |
|
||||||
export function restorePlaceholders( |
|
||||||
html: string, |
|
||||||
wikilinks: Wikilink[], |
|
||||||
hashtags: string[], |
|
||||||
options: ParserOptions |
|
||||||
): string { |
|
||||||
let processed = html; |
|
||||||
|
|
||||||
// Restore wikilinks (handle both escaped and unescaped placeholders)
|
|
||||||
const wikilinkPlaceholderRegex = /<WIKILINK_PLACEHOLDER_(\d+)>|<WIKILINK_PLACEHOLDER_(\d+)>/g; |
|
||||||
processed = processed.replace(wikilinkPlaceholderRegex, (match, escapedIndex, unescapedIndex) => { |
|
||||||
const index = escapedIndex !== undefined ? parseInt(escapedIndex) : parseInt(unescapedIndex); |
|
||||||
const wikilink = wikilinks[index]; |
|
||||||
if (!wikilink) return match; |
|
||||||
|
|
||||||
let url: string; |
|
||||||
if (typeof options.wikilinkUrl === 'function') { |
|
||||||
url = options.wikilinkUrl(wikilink.dtag); |
|
||||||
} else if (typeof options.wikilinkUrl === 'string') { |
|
||||||
url = options.wikilinkUrl.replace('{dtag}', encodeURIComponent(wikilink.dtag)); |
|
||||||
} else { |
|
||||||
url = options.linkBaseURL
|
|
||||||
? `${options.linkBaseURL}/events?d=${encodeURIComponent(wikilink.dtag)}` |
|
||||||
: `#${encodeURIComponent(wikilink.dtag)}`; |
|
||||||
} |
|
||||||
|
|
||||||
return `<a href="${escapeHtml(url)}" class="wikilink" data-dtag="${escapeHtml(wikilink.dtag)}">${escapeHtml(wikilink.display)}</a>`; |
|
||||||
}); |
|
||||||
|
|
||||||
// Restore hashtags (handle both escaped and unescaped placeholders)
|
|
||||||
const hashtagPlaceholderRegex = /<HASHTAG_PLACEHOLDER_(\d+)>|<HASHTAG_PLACEHOLDER_(\d+)>/g; |
|
||||||
processed = processed.replace(hashtagPlaceholderRegex, (match, escapedIndex, unescapedIndex) => { |
|
||||||
const index = escapedIndex !== undefined ? parseInt(escapedIndex) : parseInt(unescapedIndex); |
|
||||||
const topic = hashtags[index]; |
|
||||||
if (!topic) return match; |
|
||||||
|
|
||||||
let url: string | undefined; |
|
||||||
if (typeof options.hashtagUrl === 'function') { |
|
||||||
url = options.hashtagUrl(topic); |
|
||||||
} else if (typeof options.hashtagUrl === 'string') { |
|
||||||
url = options.hashtagUrl.replace('{topic}', encodeURIComponent(topic)); |
|
||||||
} |
|
||||||
|
|
||||||
const hashtag = `#${topic}`; |
|
||||||
if (url) { |
|
||||||
return `<a href="${escapeHtml(url)}" class="hashtag" data-topic="${escapeHtml(topic)}">${escapeHtml(hashtag)}</a>`; |
|
||||||
} else { |
|
||||||
return `<span class="hashtag" data-topic="${escapeHtml(topic)}">${escapeHtml(hashtag)}</span>`; |
|
||||||
} |
|
||||||
}); |
|
||||||
|
|
||||||
return processed; |
|
||||||
} |
|
||||||
|
|
||||||
function escapeHtml(text: string): string { |
|
||||||
const map: Record<string, string> = { |
|
||||||
'&': '&', |
|
||||||
'<': '<', |
|
||||||
'>': '>', |
|
||||||
'"': '"', |
|
||||||
"'": ''' |
|
||||||
}; |
|
||||||
return text.replace(/[&<>"']/g, (m) => map[m]); |
|
||||||
} |
|
||||||
@ -1,56 +1,174 @@ |
|||||||
import asciidoctor from '@asciidoctor/core'; |
import asciidoctor from '@asciidoctor/core'; |
||||||
import { ParserOptions } from '../types'; |
import { ProcessResult } from '../types'; |
||||||
import * as emoji from 'node-emoji'; |
import { extractTOC, sanitizeHTML, processLinks } from './html-utils'; |
||||||
|
import { postProcessHtml } from './html-postprocess'; |
||||||
export interface AsciiDocResult { |
|
||||||
html: string; |
const asciidoctorInstance = asciidoctor(); |
||||||
tableOfContents: string; |
|
||||||
hasLaTeX: boolean; |
export interface ProcessOptions { |
||||||
hasMusicalNotation: boolean; |
enableCodeHighlighting?: boolean; |
||||||
|
enableLaTeX?: boolean; |
||||||
|
enableMusicalNotation?: boolean; |
||||||
|
originalContent?: string; // Original content for LaTeX detection
|
||||||
|
linkBaseURL?: string; // Base URL for link processing
|
||||||
} |
} |
||||||
|
|
||||||
/** |
/** |
||||||
* Process AsciiDoc content to HTML |
* Processes AsciiDoc content to HTML using AsciiDoctor |
||||||
|
* Uses AsciiDoctor's built-in highlight.js and LaTeX support |
||||||
*/ |
*/ |
||||||
export function processAsciiDoc(content: string, options: ParserOptions): AsciiDocResult { |
export async function processAsciidoc( |
||||||
const hasLaTeX = /\[source,latex\]|`\$\[|`\$\\|`\$\$|`\$\{|\$\$|\$\{|\$[^$]/.test(content); |
content: string, |
||||||
const hasMusicalNotation = /\[abc\]|\[source,abc\]/i.test(content); |
options: ProcessOptions = {} |
||||||
|
): Promise<ProcessResult> { |
||||||
// Process emojis before AsciiDoc conversion
|
const { |
||||||
const processedContent = emoji.emojify(content); |
enableCodeHighlighting = true, |
||||||
|
enableLaTeX = true, |
||||||
const asciidoctorOptions: any = { |
enableMusicalNotation = true, |
||||||
safe: 'unsafe', |
} = options; |
||||||
attributes: { |
|
||||||
'showtitle': true, |
// Check if content starts with level 3+ headers
|
||||||
'icons': 'font', |
// Asciidoctor article doctype requires level 1 (=) or level 2 (==) before level 3 (===)
|
||||||
'source-highlighter': options.enableCodeHighlighting !== false ? 'highlight.js' : undefined, |
// If content starts with level 3+, use book doctype
|
||||||
'highlightjs-theme': 'github', |
const firstHeaderMatch = content.match(/^(={1,6})\s+/m); |
||||||
'toc': 'left', |
let doctype: 'article' | 'book' = 'article'; |
||||||
'toclevels': 6, |
|
||||||
'sectanchors': true, |
if (firstHeaderMatch) { |
||||||
'sectlinks': true, |
const firstHeaderLevel = firstHeaderMatch[1].length; |
||||||
'idprefix': '_', |
if (firstHeaderLevel >= 3) { |
||||||
'idseparator': '_' |
doctype = 'book'; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
try { |
||||||
|
const result = asciidoctorInstance.convert(content, { |
||||||
|
safe: 'safe', |
||||||
|
backend: 'html5', |
||||||
|
doctype: doctype, |
||||||
|
attributes: { |
||||||
|
'showtitle': true, |
||||||
|
'sectanchors': true, |
||||||
|
'sectlinks': true, |
||||||
|
'toc': 'left', |
||||||
|
'toclevels': 6, |
||||||
|
'toc-title': 'Table of Contents', |
||||||
|
'source-highlighter': enableCodeHighlighting ? 'highlight.js' : 'none', |
||||||
|
'stem': enableLaTeX ? 'latexmath' : 'none', |
||||||
|
'data-uri': true, |
||||||
|
'imagesdir': '', |
||||||
|
'linkcss': false, |
||||||
|
'stylesheet': '', |
||||||
|
'stylesdir': '', |
||||||
|
'prewrap': true, |
||||||
|
'sectnums': false, |
||||||
|
'sectnumlevels': 6, |
||||||
|
'experimental': true, |
||||||
|
'compat-mode': false, |
||||||
|
'attribute-missing': 'warn', |
||||||
|
'attribute-undefined': 'warn', |
||||||
|
'skip-front-matter': true, |
||||||
|
'source-indent': 0, |
||||||
|
'indent': 0, |
||||||
|
'tabsize': 2, |
||||||
|
'tabwidth': 2, |
||||||
|
'hardbreaks': false, |
||||||
|
'paragraph-rewrite': 'normal', |
||||||
|
'sectids': true, |
||||||
|
'idprefix': '', |
||||||
|
'idseparator': '-', |
||||||
|
'sectidprefix': '', |
||||||
|
'sectidseparator': '-' |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
const htmlString = typeof result === 'string' ? result : result.toString(); |
||||||
|
|
||||||
|
// Extract table of contents from HTML
|
||||||
|
const { toc, contentWithoutTOC } = extractTOC(htmlString); |
||||||
|
|
||||||
|
// Sanitize HTML to prevent XSS
|
||||||
|
const sanitized = sanitizeHTML(contentWithoutTOC); |
||||||
|
|
||||||
|
// Post-process HTML: convert macros to HTML, add styling, etc.
|
||||||
|
const processed = postProcessHtml(sanitized, { |
||||||
|
enableMusicalNotation, |
||||||
|
linkBaseURL: options.linkBaseURL, |
||||||
|
}); |
||||||
|
|
||||||
|
// Process links: add target="_blank" to external links
|
||||||
|
const processedWithLinks = options.linkBaseURL
|
||||||
|
? processLinks(processed, options.linkBaseURL) |
||||||
|
: processed; |
||||||
|
|
||||||
|
// Also process TOC
|
||||||
|
const tocSanitized = sanitizeHTML(toc); |
||||||
|
const tocProcessed = postProcessHtml(tocSanitized, { |
||||||
|
enableMusicalNotation: false, // Don't process music in TOC
|
||||||
|
linkBaseURL: options.linkBaseURL, |
||||||
|
}); |
||||||
|
|
||||||
|
// Process links in TOC as well
|
||||||
|
const tocProcessedWithLinks = options.linkBaseURL |
||||||
|
? processLinks(tocProcessed, options.linkBaseURL) |
||||||
|
: tocProcessed; |
||||||
|
|
||||||
|
// Check for LaTeX in original content (more reliable than checking HTML)
|
||||||
|
const contentToCheck = options.originalContent || content; |
||||||
|
const hasLaTeX = enableLaTeX && hasMathContent(contentToCheck); |
||||||
|
|
||||||
|
// Check for musical notation in processed HTML
|
||||||
|
const hasMusicalNotation = enableMusicalNotation && ( |
||||||
|
/class="abc-notation"|class="lilypond-notation"|class="chord"|class="musicxml-notation"/.test(processed) |
||||||
|
); |
||||||
|
|
||||||
|
return { |
||||||
|
content: processedWithLinks, |
||||||
|
tableOfContents: tocProcessedWithLinks, |
||||||
|
hasLaTeX, |
||||||
|
hasMusicalNotation, |
||||||
|
nostrLinks: [], // Will be populated by metadata extraction
|
||||||
|
wikilinks: [], |
||||||
|
hashtags: [], |
||||||
|
links: [], |
||||||
|
media: [], |
||||||
|
}; |
||||||
|
} catch (error) { |
||||||
|
// Fallback to plain text with error logging
|
||||||
|
const errorMessage = error instanceof Error ? error.message : String(error); |
||||||
|
// Use process.stderr.write for Node.js compatibility instead of console.error
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
const nodeProcess = (globalThis as any).process; |
||||||
|
if (nodeProcess?.stderr) { |
||||||
|
nodeProcess.stderr.write(`Error processing AsciiDoc: ${errorMessage}\n`); |
||||||
} |
} |
||||||
}; |
|
||||||
|
// Escape HTML in content for safe display
|
||||||
// Convert to HTML
|
const escapedContent = sanitizeHTML(content); |
||||||
const Asciidoctor = asciidoctor(); |
|
||||||
const htmlResult = Asciidoctor.convert(processedContent, asciidoctorOptions); |
return { |
||||||
const html = typeof htmlResult === 'string' ? htmlResult : htmlResult.toString(); |
content: `<p>${escapedContent}</p>`, |
||||||
|
tableOfContents: '', |
||||||
// Extract table of contents if present
|
hasLaTeX: false, |
||||||
const tocMatch = html.match(/<div id="toc"[^>]*>([\s\S]*?)<\/div>/); |
hasMusicalNotation: false, |
||||||
const tableOfContents = tocMatch ? tocMatch[1] : ''; |
nostrLinks: [], |
||||||
|
wikilinks: [], |
||||||
// Remove TOC from main content if present
|
hashtags: [], |
||||||
const contentWithoutToc = html.replace(/<div id="toc"[^>]*>[\s\S]*?<\/div>/, ''); |
links: [], |
||||||
|
media: [], |
||||||
return { |
}; |
||||||
html: contentWithoutToc, |
} |
||||||
tableOfContents, |
} |
||||||
hasLaTeX, |
|
||||||
hasMusicalNotation |
/** |
||||||
}; |
* Check if content has LaTeX math |
||||||
|
* Based on jumble's detection pattern |
||||||
|
*/ |
||||||
|
function hasMathContent(content: string): boolean { |
||||||
|
// Check for inline math: $...$ or \(...\)
|
||||||
|
const inlineMath = /\$[^$]+\$|\\\([^)]+\\\)/.test(content); |
||||||
|
|
||||||
|
// Check for block math: $$...$$ or \[...\]
|
||||||
|
const blockMath = /\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]/.test(content); |
||||||
|
|
||||||
|
return inlineMath || blockMath; |
||||||
} |
} |
||||||
|
|||||||
@ -0,0 +1,212 @@ |
|||||||
|
import { processMusicalNotation } from './music'; |
||||||
|
|
||||||
|
export interface PostProcessOptions { |
||||||
|
enableMusicalNotation?: boolean; |
||||||
|
linkBaseURL?: string; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Post-processes HTML output from AsciiDoctor |
||||||
|
* Converts AsciiDoc macros to HTML with data attributes and CSS classes |
||||||
|
*/ |
||||||
|
export function postProcessHtml(html: string, options: PostProcessOptions = {}): string { |
||||||
|
let processed = html; |
||||||
|
|
||||||
|
// Convert bookstr markers to HTML placeholders
|
||||||
|
processed = processed.replace(/BOOKSTR:([^<>\s]+)/g, (_match, bookContent) => { |
||||||
|
const escaped = bookContent.replace(/"/g, '"').replace(/'/g, '''); |
||||||
|
return `<span data-bookstr="${escaped}" class="bookstr-placeholder"></span>`; |
||||||
|
}); |
||||||
|
|
||||||
|
// Convert hashtag links to HTML
|
||||||
|
processed = processed.replace(/hashtag:([^[]+)\[([^\]]+)\]/g, (_match, normalizedHashtag, displayText) => { |
||||||
|
// URL encode the hashtag to prevent XSS
|
||||||
|
const encodedHashtag = encodeURIComponent(normalizedHashtag); |
||||||
|
// HTML escape the display text
|
||||||
|
const escapedDisplay = displayText |
||||||
|
.replace(/&/g, '&') |
||||||
|
.replace(/</g, '<') |
||||||
|
.replace(/>/g, '>') |
||||||
|
.replace(/"/g, '"') |
||||||
|
.replace(/'/g, '''); |
||||||
|
return `<a href="/notes?t=${encodedHashtag}" class="hashtag-link text-green-600 dark:text-green-400 hover:text-green-700 dark:hover:text-green-300 hover:underline">${escapedDisplay}</a>`; |
||||||
|
}); |
||||||
|
|
||||||
|
// Convert WIKILINK:dtag|display placeholder format to HTML
|
||||||
|
// Match WIKILINK:dtag|display, ensuring we don't match across HTML tags
|
||||||
|
processed = processed.replace(/WIKILINK:([^|<>]+)\|([^<>\s]+)/g, (_match, dTag, displayText) => { |
||||||
|
const escapedDtag = dTag.trim().replace(/"/g, '"'); |
||||||
|
const escapedDisplay = displayText.trim() |
||||||
|
.replace(/&/g, '&') |
||||||
|
.replace(/</g, '<') |
||||||
|
.replace(/>/g, '>') |
||||||
|
.replace(/"/g, '"') |
||||||
|
.replace(/'/g, '''); |
||||||
|
|
||||||
|
// Always use relative URL for wikilinks (works on any domain)
|
||||||
|
const url = `/events?d=${escapedDtag}`; |
||||||
|
|
||||||
|
return `<a class="wikilink text-primary-600 dark:text-primary-500 hover:underline" data-dtag="${escapedDtag}" data-url="${url}" href="${url}">${escapedDisplay}</a>`; |
||||||
|
}); |
||||||
|
|
||||||
|
// Convert nostr: links to HTML
|
||||||
|
processed = processed.replace(/link:nostr:([^[]+)\[([^\]]+)\]/g, (_match, bech32Id, displayText) => { |
||||||
|
const nostrType = getNostrType(bech32Id); |
||||||
|
|
||||||
|
if (nostrType === 'nevent' || nostrType === 'naddr' || nostrType === 'note') { |
||||||
|
// Render as embedded event placeholder
|
||||||
|
const escaped = bech32Id.replace(/"/g, '"'); |
||||||
|
return `<div data-embedded-note="${escaped}" class="embedded-note-container">Loading embedded event...</div>`; |
||||||
|
} else if (nostrType === 'npub' || nostrType === 'nprofile') { |
||||||
|
// Render as user handle
|
||||||
|
const escaped = bech32Id.replace(/"/g, '"'); |
||||||
|
return `<span class="user-handle" data-pubkey="${escaped}">@${displayText}</span>`; |
||||||
|
} else { |
||||||
|
// Fallback to regular link
|
||||||
|
const escaped = bech32Id.replace(/"/g, '"'); |
||||||
|
return `<a href="nostr:${bech32Id}" class="nostr-link text-blue-600 hover:text-blue-800 hover:underline" data-nostr-type="${nostrType || 'unknown'}" data-bech32="${escaped}">${displayText}</a>`; |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
// Process images: add max-width styling and data attributes
|
||||||
|
processed = processImages(processed); |
||||||
|
|
||||||
|
// Process musical notation if enabled
|
||||||
|
if (options.enableMusicalNotation) { |
||||||
|
processed = processMusicalNotation(processed); |
||||||
|
} |
||||||
|
|
||||||
|
// Clean up any leftover markdown syntax
|
||||||
|
processed = cleanupMarkdown(processed); |
||||||
|
|
||||||
|
// Add styling classes
|
||||||
|
processed = addStylingClasses(processed); |
||||||
|
|
||||||
|
// Hide raw ToC text
|
||||||
|
processed = hideRawTocText(processed); |
||||||
|
|
||||||
|
return processed; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Get Nostr identifier type |
||||||
|
*/ |
||||||
|
function getNostrType(id: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'note' | null { |
||||||
|
if (id.startsWith('npub')) return 'npub'; |
||||||
|
if (id.startsWith('nprofile')) return 'nprofile'; |
||||||
|
if (id.startsWith('nevent')) return 'nevent'; |
||||||
|
if (id.startsWith('naddr')) return 'naddr'; |
||||||
|
if (id.startsWith('note')) return 'note'; |
||||||
|
return null; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Process images: add max-width styling and data attributes |
||||||
|
*/ |
||||||
|
function processImages(html: string): string { |
||||||
|
const imageUrls: string[] = []; |
||||||
|
const imageUrlRegex = /<img[^>]+src=["']([^"']+)["'][^>]*>/gi; |
||||||
|
let match; |
||||||
|
|
||||||
|
while ((match = imageUrlRegex.exec(html)) !== null) { |
||||||
|
const url = match[1]; |
||||||
|
if (url && !imageUrls.includes(url)) { |
||||||
|
imageUrls.push(url); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return html.replace(/<img([^>]+)>/gi, (imgTag, attributes) => { |
||||||
|
const srcMatch = attributes.match(/src=["']([^"']+)["']/i); |
||||||
|
if (!srcMatch) return imgTag; |
||||||
|
|
||||||
|
const src = srcMatch[1]; |
||||||
|
const currentIndex = imageUrls.indexOf(src); |
||||||
|
|
||||||
|
let updatedAttributes = attributes; |
||||||
|
|
||||||
|
if (updatedAttributes.match(/class=["']/i)) { |
||||||
|
updatedAttributes = updatedAttributes.replace(/class=["']([^"']*)["']/i, (_match: string, classes: string) => { |
||||||
|
const cleanedClasses = classes.replace(/max-w-\[?[^\s\]]+\]?/g, '').trim(); |
||||||
|
const newClasses = cleanedClasses
|
||||||
|
? `${cleanedClasses} max-w-[400px] object-contain cursor-zoom-in` |
||||||
|
: 'max-w-[400px] object-contain cursor-zoom-in'; |
||||||
|
return `class="${newClasses}"`; |
||||||
|
}); |
||||||
|
} else { |
||||||
|
updatedAttributes += ` class="max-w-[400px] h-auto object-contain cursor-zoom-in"`; |
||||||
|
} |
||||||
|
|
||||||
|
updatedAttributes += ` data-asciidoc-image="true" data-image-index="${currentIndex}" data-image-src="${src.replace(/"/g, '"')}"`; |
||||||
|
|
||||||
|
return `<img${updatedAttributes}>`; |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Clean up leftover markdown syntax |
||||||
|
*/ |
||||||
|
function cleanupMarkdown(html: string): string { |
||||||
|
let cleaned = html; |
||||||
|
|
||||||
|
// Clean up markdown image syntax
|
||||||
|
cleaned = cleaned.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (_match, alt, url) => { |
||||||
|
const altText = alt || ''; |
||||||
|
return `<img src="${url}" alt="${altText}" class="max-w-[400px] object-contain my-0" />`; |
||||||
|
}); |
||||||
|
|
||||||
|
// Clean up markdown link syntax
|
||||||
|
cleaned = cleaned.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, text, url) => { |
||||||
|
if (cleaned.includes(`href="${url}"`)) { |
||||||
|
return _match; |
||||||
|
} |
||||||
|
return `<a href="${url}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${text} <svg class="size-3" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`; |
||||||
|
}); |
||||||
|
|
||||||
|
return cleaned; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Add proper CSS classes for styling |
||||||
|
*/ |
||||||
|
function addStylingClasses(html: string): string { |
||||||
|
let styled = html; |
||||||
|
|
||||||
|
// Add strikethrough styling
|
||||||
|
styled = styled.replace(/<span class="line-through">([^<]+)<\/span>/g, '<span class="line-through line-through-2">$1</span>'); |
||||||
|
|
||||||
|
// Add subscript styling
|
||||||
|
styled = styled.replace(/<span class="subscript">([^<]+)<\/span>/g, '<span class="subscript text-xs align-sub">$1</span>'); |
||||||
|
|
||||||
|
// Add superscript styling
|
||||||
|
styled = styled.replace(/<span class="superscript">([^<]+)<\/span>/g, '<span class="superscript text-xs align-super">$1</span>'); |
||||||
|
|
||||||
|
// Add code highlighting classes
|
||||||
|
styled = styled.replace(/<pre class="highlightjs[^"]*">/g, '<pre class="highlightjs hljs">'); |
||||||
|
styled = styled.replace(/<code class="highlightjs[^"]*">/g, '<code class="highlightjs hljs">'); |
||||||
|
|
||||||
|
return styled; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Hide raw AsciiDoc ToC text |
||||||
|
*/ |
||||||
|
function hideRawTocText(html: string): string { |
||||||
|
let cleaned = html; |
||||||
|
|
||||||
|
cleaned = cleaned.replace( |
||||||
|
/<h[1-6][^>]*>.*?Table of Contents.*?\(\d+\).*?<\/h[1-6]>/gi, |
||||||
|
'' |
||||||
|
); |
||||||
|
|
||||||
|
cleaned = cleaned.replace( |
||||||
|
/<p[^>]*>.*?Table of Contents.*?\(\d+\).*?<\/p>/gi, |
||||||
|
'' |
||||||
|
); |
||||||
|
|
||||||
|
cleaned = cleaned.replace( |
||||||
|
/<p[^>]*>.*?Assumptions.*?\[n=0\].*?<\/p>/gi, |
||||||
|
'' |
||||||
|
); |
||||||
|
|
||||||
|
return cleaned; |
||||||
|
} |
||||||
@ -0,0 +1,211 @@ |
|||||||
|
/** |
||||||
|
* Extracts the table of contents from AsciiDoc HTML output |
||||||
|
* Returns the TOC HTML and the content HTML without the TOC |
||||||
|
*/ |
||||||
|
export function extractTOC(html: string): { toc: string; contentWithoutTOC: string } { |
||||||
|
// AsciiDoc with toc: 'left' generates a TOC in a div with id="toc" or class="toc"
|
||||||
|
let tocContent = ''; |
||||||
|
let contentWithoutTOC = html; |
||||||
|
|
||||||
|
// Find the start of the TOC div - try multiple patterns
|
||||||
|
const tocStartPatterns = [ |
||||||
|
/<div\s+id=["']toc["']\s+class=["']toc["'][^>]*>/i, |
||||||
|
/<div\s+id=["']toc["'][^>]*>/i, |
||||||
|
/<div\s+class=["']toc["'][^>]*>/i, |
||||||
|
/<nav\s+id=["']toc["'][^>]*>/i, |
||||||
|
]; |
||||||
|
|
||||||
|
let tocStartIdx = -1; |
||||||
|
let tocStartTag = ''; |
||||||
|
|
||||||
|
for (const pattern of tocStartPatterns) { |
||||||
|
const match = html.match(pattern); |
||||||
|
if (match && match.index !== undefined) { |
||||||
|
tocStartIdx = match.index; |
||||||
|
tocStartTag = match[0]; |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (tocStartIdx === -1) { |
||||||
|
// No TOC found
|
||||||
|
return { toc: '', contentWithoutTOC: html }; |
||||||
|
} |
||||||
|
|
||||||
|
// Find the matching closing tag by counting div/nav tags
|
||||||
|
const searchStart = tocStartIdx + tocStartTag.length; |
||||||
|
let depth = 1; |
||||||
|
let i = searchStart; |
||||||
|
|
||||||
|
while (i < html.length && depth > 0) { |
||||||
|
// Look for opening or closing div/nav tags
|
||||||
|
if (i + 4 < html.length && html.substring(i, i + 4).toLowerCase() === '<div') { |
||||||
|
// Check if it's a closing tag
|
||||||
|
if (i + 5 < html.length && html[i + 4] === '/') { |
||||||
|
depth--; |
||||||
|
const closeIdx = html.indexOf('>', i); |
||||||
|
if (closeIdx === -1) break; |
||||||
|
i = closeIdx + 1; |
||||||
|
} else { |
||||||
|
// Opening tag - find the end (handle attributes and self-closing)
|
||||||
|
const closeIdx = html.indexOf('>', i); |
||||||
|
if (closeIdx === -1) break; |
||||||
|
// Check if it's self-closing (look for /> before the >)
|
||||||
|
const tagContent = html.substring(i, closeIdx); |
||||||
|
if (!tagContent.endsWith('/')) { |
||||||
|
depth++; |
||||||
|
} |
||||||
|
i = closeIdx + 1; |
||||||
|
} |
||||||
|
} else if (i + 5 < html.length && html.substring(i, i + 5).toLowerCase() === '</div') { |
||||||
|
depth--; |
||||||
|
const closeIdx = html.indexOf('>', i); |
||||||
|
if (closeIdx === -1) break; |
||||||
|
i = closeIdx + 1; |
||||||
|
} else if (i + 5 < html.length && html.substring(i, i + 5).toLowerCase() === '</nav') { |
||||||
|
depth--; |
||||||
|
const closeIdx = html.indexOf('>', i); |
||||||
|
if (closeIdx === -1) break; |
||||||
|
i = closeIdx + 1; |
||||||
|
} else if (i + 4 < html.length && html.substring(i, i + 4).toLowerCase() === '<nav') { |
||||||
|
// Handle opening nav tags
|
||||||
|
const closeIdx = html.indexOf('>', i); |
||||||
|
if (closeIdx === -1) break; |
||||||
|
const tagContent = html.substring(i, closeIdx); |
||||||
|
if (!tagContent.endsWith('/')) { |
||||||
|
depth++; |
||||||
|
} |
||||||
|
i = closeIdx + 1; |
||||||
|
} else { |
||||||
|
i++; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (depth === 0) { |
||||||
|
// Found the matching closing tag
|
||||||
|
const tocEndIdx = i; |
||||||
|
// Extract the TOC content (inner HTML)
|
||||||
|
const tocFullHTML = html.substring(tocStartIdx, tocEndIdx); |
||||||
|
// Extract just the inner content (without the outer div tags)
|
||||||
|
let innerStart = tocStartTag.length; |
||||||
|
let innerEnd = tocFullHTML.length; |
||||||
|
// Find the last </div> or </nav>
|
||||||
|
if (tocFullHTML.endsWith('</div>')) { |
||||||
|
innerEnd -= 6; |
||||||
|
} else if (tocFullHTML.endsWith('</nav>')) { |
||||||
|
innerEnd -= 7; |
||||||
|
} |
||||||
|
tocContent = tocFullHTML.substring(innerStart, innerEnd).trim(); |
||||||
|
|
||||||
|
// Remove the toctitle div if present (AsciiDoc adds "Table of Contents" title)
|
||||||
|
tocContent = tocContent.replace(/<div\s+id=["']toctitle["'][^>]*>.*?<\/div>\s*/gis, ''); |
||||||
|
tocContent = tocContent.trim(); |
||||||
|
|
||||||
|
// Remove the TOC from the content
|
||||||
|
contentWithoutTOC = html.substring(0, tocStartIdx) + html.substring(tocEndIdx); |
||||||
|
} |
||||||
|
|
||||||
|
return { toc: tocContent, contentWithoutTOC }; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Performs basic HTML sanitization to prevent XSS |
||||||
|
*/ |
||||||
|
export function sanitizeHTML(html: string): string { |
||||||
|
// Remove script tags and their content
|
||||||
|
html = html.replace(/<script[^>]*>.*?<\/script>/gis, ''); |
||||||
|
|
||||||
|
// Remove event handlers (onclick, onerror, etc.)
|
||||||
|
html = html.replace(/\s*on\w+\s*=\s*["'][^"']*["']/gi, ''); |
||||||
|
|
||||||
|
// Remove javascript: protocol in links
|
||||||
|
html = html.replace(/javascript:/gi, ''); |
||||||
|
|
||||||
|
// Remove data: URLs that could be dangerous
|
||||||
|
html = html.replace(/data:\s*text\/html/gi, ''); |
||||||
|
|
||||||
|
return html; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Processes HTML links to add target="_blank" to external links |
||||||
|
* This function is available for use but not currently called automatically. |
||||||
|
* It can be used in post-processing if needed. |
||||||
|
*/ |
||||||
|
export function processLinks(html: string, linkBaseURL: string): string { |
||||||
|
// Extract domain from linkBaseURL for comparison
|
||||||
|
let linkBaseDomain = ''; |
||||||
|
if (linkBaseURL) { |
||||||
|
try { |
||||||
|
// Use URL constructor if available (Node.js 10+)
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
const URLConstructor = (globalThis as any).URL; |
||||||
|
if (URLConstructor) { |
||||||
|
const url = new URLConstructor(linkBaseURL); |
||||||
|
linkBaseDomain = url.hostname; |
||||||
|
} else { |
||||||
|
throw new Error('URL not available'); |
||||||
|
} |
||||||
|
} catch { |
||||||
|
// Fallback to simple string parsing if URL constructor fails
|
||||||
|
const url = linkBaseURL.replace(/^https?:\/\//, ''); |
||||||
|
const parts = url.split('/'); |
||||||
|
if (parts.length > 0) { |
||||||
|
linkBaseDomain = parts[0]; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Regex to match <a> tags with href attributes
|
||||||
|
const linkRegex = /<a\s+([^>]*?)href\s*=\s*["']([^"']+)["']([^>]*?)>/g; |
||||||
|
|
||||||
|
return html.replace(linkRegex, (match, before, href, after) => { |
||||||
|
// Check if it's an external link (starts with http:// or https://)
|
||||||
|
const isExternal = href.startsWith('http://') || href.startsWith('https://'); |
||||||
|
|
||||||
|
if (isExternal) { |
||||||
|
// Check if it's pointing to our own domain
|
||||||
|
if (linkBaseDomain) { |
||||||
|
try { |
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
const URLConstructor = (globalThis as any).URL; |
||||||
|
if (URLConstructor) { |
||||||
|
const hrefUrl = new URLConstructor(href); |
||||||
|
if (hrefUrl.hostname === linkBaseDomain) { |
||||||
|
// Same domain - open in same tab (remove any existing target attribute)
|
||||||
|
return match.replace(/\s*target\s*=\s*["'][^"']*["']/gi, ''); |
||||||
|
} |
||||||
|
} else { |
||||||
|
throw new Error('URL not available'); |
||||||
|
} |
||||||
|
} catch { |
||||||
|
// If URL parsing fails, use simple string check
|
||||||
|
if (href.includes(linkBaseDomain)) { |
||||||
|
return match.replace(/\s*target\s*=\s*["'][^"']*["']/gi, ''); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// External link - add target="_blank" and rel="noopener noreferrer" if not already present
|
||||||
|
if (!match.includes('target=')) { |
||||||
|
if (!match.includes('rel=')) { |
||||||
|
return match.replace('>', ' target="_blank" rel="noopener noreferrer">'); |
||||||
|
} else { |
||||||
|
// Update existing rel attribute to include noopener if not present
|
||||||
|
const updatedMatch = match.replace(/rel\s*=\s*["']([^"']*)["']/gi, (relMatch, relValue) => { |
||||||
|
if (!relValue.includes('noopener')) { |
||||||
|
return `rel="${relValue} noopener noreferrer"`; |
||||||
|
} |
||||||
|
return relMatch; |
||||||
|
}); |
||||||
|
return updatedMatch.replace('>', ' target="_blank">'); |
||||||
|
} |
||||||
|
} |
||||||
|
} else { |
||||||
|
// Local/relative link - ensure it opens in same tab (remove target if present)
|
||||||
|
return match.replace(/\s*target\s*=\s*["'][^"']*["']/gi, ''); |
||||||
|
} |
||||||
|
|
||||||
|
return match; |
||||||
|
}); |
||||||
|
} |
||||||
@ -1,244 +0,0 @@ |
|||||||
import { marked } from 'marked'; |
|
||||||
// @ts-ignore - marked is ESM but we need it to work in Jest
|
|
||||||
import { ParserOptions } from '../types'; |
|
||||||
import * as emoji from 'node-emoji'; |
|
||||||
|
|
||||||
export interface MarkdownResult { |
|
||||||
html: string; |
|
||||||
frontmatter?: Record<string, any>; |
|
||||||
hasLaTeX: boolean; |
|
||||||
hasMusicalNotation: boolean; |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* Extract YAML frontmatter from markdown content |
|
||||||
*/ |
|
||||||
function extractFrontmatter(content: string): { frontmatter?: Record<string, any>; content: string } { |
|
||||||
const frontmatterRegex = /^---\s*\n([\s\S]*?)\n---\s*\n/; |
|
||||||
const match = content.match(frontmatterRegex); |
|
||||||
|
|
||||||
if (!match) { |
|
||||||
return { content }; |
|
||||||
} |
|
||||||
|
|
||||||
try { |
|
||||||
// Simple YAML parser for basic key-value pairs
|
|
||||||
const yamlContent = match[1]; |
|
||||||
const frontmatter: Record<string, any> = {}; |
|
||||||
const lines = yamlContent.split('\n'); |
|
||||||
|
|
||||||
for (const line of lines) { |
|
||||||
const trimmed = line.trim(); |
|
||||||
if (!trimmed || trimmed.startsWith('#')) continue; |
|
||||||
|
|
||||||
const colonIndex = trimmed.indexOf(':'); |
|
||||||
if (colonIndex === -1) continue; |
|
||||||
|
|
||||||
const key = trimmed.substring(0, colonIndex).trim(); |
|
||||||
let value = trimmed.substring(colonIndex + 1).trim(); |
|
||||||
|
|
||||||
// Remove quotes if present
|
|
||||||
if ((value.startsWith('"') && value.endsWith('"')) ||
|
|
||||||
(value.startsWith("'") && value.endsWith("'"))) { |
|
||||||
value = value.slice(1, -1); |
|
||||||
} |
|
||||||
|
|
||||||
// Handle arrays (simple case)
|
|
||||||
if (value.startsWith('[') && value.endsWith(']')) { |
|
||||||
const arrayContent = value.slice(1, -1); |
|
||||||
frontmatter[key] = arrayContent.split(',').map(v => v.trim().replace(/^["']|["']$/g, '')); |
|
||||||
} else { |
|
||||||
frontmatter[key] = value; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
return { |
|
||||||
frontmatter: Object.keys(frontmatter).length > 0 ? frontmatter : undefined, |
|
||||||
content: content.substring(match[0].length) |
|
||||||
}; |
|
||||||
} catch (e) { |
|
||||||
return { content }; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* Process Markdown content to HTML (minimal markdown support) |
|
||||||
*/ |
|
||||||
export function processMarkdown(content: string, options: ParserOptions): MarkdownResult { |
|
||||||
// Extract frontmatter
|
|
||||||
const { frontmatter, content: contentWithoutFrontmatter } = extractFrontmatter(content); |
|
||||||
|
|
||||||
// Detect LaTeX and musical notation
|
|
||||||
const hasLaTeX = /```latex|`\$\[|`\$\\|`\$\$|`\$\{|\$\$|\$\{|\$[^$]/.test(content); |
|
||||||
const hasMusicalNotation = /```abc|```music/i.test(content); |
|
||||||
|
|
||||||
// Configure marked for minimal markdown
|
|
||||||
marked.setOptions({ |
|
||||||
gfm: true, |
|
||||||
breaks: false |
|
||||||
}); |
|
||||||
|
|
||||||
// Process emoji shortcodes before markdown processing
|
|
||||||
let processedContent = emoji.emojify(contentWithoutFrontmatter); |
|
||||||
|
|
||||||
// Extract and process footnotes before markdown parsing
|
|
||||||
// Footnotes format: [^1] in text and [^1]: definition at end
|
|
||||||
const footnoteDefinitions: Map<string, string> = new Map(); |
|
||||||
let placeholderCounter = 0; |
|
||||||
|
|
||||||
// First, extract footnote definitions
|
|
||||||
const lines = processedContent.split('\n'); |
|
||||||
const processedLines: string[] = []; |
|
||||||
let i = 0; |
|
||||||
|
|
||||||
while (i < lines.length) { |
|
||||||
const line = lines[i]; |
|
||||||
const footnoteDefMatch = line.match(/^\[\^([^\]]+)\]:\s*(.*)$/); |
|
||||||
if (footnoteDefMatch) { |
|
||||||
const id = footnoteDefMatch[1]; |
|
||||||
let definition = footnoteDefMatch[2]; |
|
||||||
|
|
||||||
// Collect multi-line definition (until next definition or blank line)
|
|
||||||
i++; |
|
||||||
while (i < lines.length) { |
|
||||||
const nextLine = lines[i]; |
|
||||||
if (nextLine.match(/^\[\^[^\]]+\]:/) || (nextLine.trim() === '' && i + 1 < lines.length && lines[i + 1].trim() !== '' && !lines[i + 1].match(/^\[\^[^\]]+\]:/))) { |
|
||||||
break; |
|
||||||
} |
|
||||||
if (nextLine.trim() === '' && i + 1 < lines.length && lines[i + 1].match(/^\[\^[^\]]+\]:/)) { |
|
||||||
break; |
|
||||||
} |
|
||||||
definition += '\n' + nextLine; |
|
||||||
i++; |
|
||||||
} |
|
||||||
|
|
||||||
footnoteDefinitions.set(id, definition.trim()); |
|
||||||
// Skip adding this line to processedLines (removing the definition)
|
|
||||||
continue; |
|
||||||
} |
|
||||||
|
|
||||||
processedLines.push(line); |
|
||||||
i++; |
|
||||||
} |
|
||||||
|
|
||||||
processedContent = processedLines.join('\n'); |
|
||||||
|
|
||||||
// Now replace footnote references with placeholders before markdown parsing
|
|
||||||
// Use HTML-like placeholder that markdown will pass through as-is
|
|
||||||
const footnoteRefRegex = /\[\^([^\]]+)\]/g; |
|
||||||
let refMatch; |
|
||||||
while ((refMatch = footnoteRefRegex.exec(processedContent)) !== null) { |
|
||||||
const id = refMatch[1]; |
|
||||||
if (footnoteDefinitions.has(id)) { |
|
||||||
const placeholder = `<span data-footnote-placeholder="${placeholderCounter++}" data-footnote-id="${id}"></span>`; |
|
||||||
processedContent = processedContent.substring(0, refMatch.index) +
|
|
||||||
placeholder +
|
|
||||||
processedContent.substring(refMatch.index + refMatch[0].length); |
|
||||||
// Reset regex since we modified the string
|
|
||||||
footnoteRefRegex.lastIndex = 0; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
// Convert markdown to HTML
|
|
||||||
let html = marked.parse(processedContent) as string; |
|
||||||
|
|
||||||
// Process superscripts in HTML (X^2^ syntax) - after markdown parsing to avoid conflicts
|
|
||||||
// But skip inside code blocks
|
|
||||||
const codeBlockRegex = /<(pre|code)[^>]*>[\s\S]*?<\/\1>/gi; |
|
||||||
const codeBlocks: Array<{ start: number; end: number; content: string }> = []; |
|
||||||
let codeMatch; |
|
||||||
while ((codeMatch = codeBlockRegex.exec(html)) !== null) { |
|
||||||
codeBlocks.push({ |
|
||||||
start: codeMatch.index, |
|
||||||
end: codeMatch.index + codeMatch[0].length, |
|
||||||
content: codeMatch[0] |
|
||||||
}); |
|
||||||
} |
|
||||||
|
|
||||||
function isInCodeBlock(index: number): boolean { |
|
||||||
return codeBlocks.some(block => index >= block.start && index < block.end); |
|
||||||
} |
|
||||||
|
|
||||||
// Process superscripts
|
|
||||||
const superscriptRegex = /\^([^\^<>\n]+)\^/g; |
|
||||||
const superscriptReplacements: Array<{ match: string; replacement: string; index: number }> = []; |
|
||||||
let supMatch; |
|
||||||
while ((supMatch = superscriptRegex.exec(html)) !== null) { |
|
||||||
if (isInCodeBlock(supMatch.index)) continue; |
|
||||||
superscriptReplacements.push({ |
|
||||||
match: supMatch[0], |
|
||||||
replacement: `<sup>${supMatch[1]}</sup>`, |
|
||||||
index: supMatch.index |
|
||||||
}); |
|
||||||
} |
|
||||||
|
|
||||||
// Apply superscript replacements in reverse order
|
|
||||||
superscriptReplacements.reverse().forEach(({ match, replacement, index }) => { |
|
||||||
html = html.substring(0, index) + replacement + html.substring(index + match.length); |
|
||||||
}); |
|
||||||
|
|
||||||
// Replace footnote placeholders with actual footnote HTML
|
|
||||||
let footnoteCounter = 1; |
|
||||||
const footnoteRefs: Array<{ id: string; num: number; definition: string }> = []; |
|
||||||
const footnoteRefMap: Map<string, number> = new Map(); |
|
||||||
|
|
||||||
// First, assign numbers to all footnote definitions
|
|
||||||
footnoteDefinitions.forEach((definition, id) => { |
|
||||||
const num = footnoteCounter++; |
|
||||||
footnoteRefMap.set(id, num); |
|
||||||
footnoteRefs.push({ id, num, definition }); |
|
||||||
}); |
|
||||||
|
|
||||||
// Replace HTML span placeholders with footnote HTML
|
|
||||||
// Find all span elements with data-footnote-placeholder attribute
|
|
||||||
const placeholderRegex = /<span data-footnote-placeholder="(\d+)" data-footnote-id="([^"]+)"><\/span>/g; |
|
||||||
html = html.replace(placeholderRegex, (match, placeholderNum, id) => { |
|
||||||
const num = footnoteRefMap.get(id); |
|
||||||
if (num !== undefined) { |
|
||||||
return `<sup class="footnote"><a id="footnoteref_${num}" class="footnote" href="#footnotedef_${num}" title="View footnote.">${num}</a></sup>`; |
|
||||||
} |
|
||||||
return match; // Return original if no definition found
|
|
||||||
}); |
|
||||||
|
|
||||||
// Add footnotes section at the end if there are any
|
|
||||||
if (footnoteRefs.length > 0) { |
|
||||||
let footnotesHtml = '<div id="footnotes"><hr>'; |
|
||||||
footnoteRefs.forEach(({ id, num, definition }) => { |
|
||||||
// Process the definition through markdown again to handle formatting
|
|
||||||
const defHtml = marked.parse(definition) as string; |
|
||||||
footnotesHtml += `<div class="footnote" id="footnotedef_${num}"><a href="#footnoteref_${num}">${num}</a>. ${defHtml}</div>`; |
|
||||||
}); |
|
||||||
footnotesHtml += '</div>'; |
|
||||||
html += footnotesHtml; |
|
||||||
} |
|
||||||
|
|
||||||
// Fix anchor links - markdown headers need IDs
|
|
||||||
// Marked generates headers but may not have proper IDs for anchor links
|
|
||||||
// Process headers to add IDs based on their text content (if they don't already have one)
|
|
||||||
html = html.replace(/<h([1-6])([^>]*)>([^<]+)<\/h[1-6]>/gi, (match: string, level: string, attrs: string, text: string) => { |
|
||||||
// Skip if header already has an id attribute
|
|
||||||
if (attrs && /id=["'][^"']+["']/i.test(attrs)) { |
|
||||||
return match; |
|
||||||
} |
|
||||||
|
|
||||||
// Generate ID from header text (similar to GitHub markdown)
|
|
||||||
const id = text |
|
||||||
.toLowerCase() |
|
||||||
.trim() |
|
||||||
.replace(/[^\w\s-]/g, '') // Remove special chars
|
|
||||||
.replace(/\s+/g, '-') // Replace spaces with hyphens
|
|
||||||
.replace(/-+/g, '-') // Replace multiple hyphens with single
|
|
||||||
.replace(/^-|-$/g, ''); // Remove leading/trailing hyphens
|
|
||||||
|
|
||||||
// Add id attribute
|
|
||||||
const newAttrs = attrs ? `${attrs} id="${id}"` : `id="${id}"`; |
|
||||||
return `<h${level} ${newAttrs}>${text}</h${level}>`; |
|
||||||
}); |
|
||||||
|
|
||||||
return { |
|
||||||
html, |
|
||||||
frontmatter, |
|
||||||
hasLaTeX, |
|
||||||
hasMusicalNotation |
|
||||||
}; |
|
||||||
} |
|
||||||
@ -0,0 +1,47 @@ |
|||||||
|
/** |
||||||
|
* Processes musical notation in HTML content |
||||||
|
* Wraps musical notation in appropriate HTML for rendering |
||||||
|
*/ |
||||||
|
export function processMusicalNotation(html: string): string { |
||||||
|
// Process ABC notation blocks
|
||||||
|
const abcBlockPattern = /(X:\s*\d+[^\n]*\n(?:[^\n]+\n)*)/gs; |
||||||
|
html = html.replace(abcBlockPattern, (match) => { |
||||||
|
const abcContent = match.trim(); |
||||||
|
return `<div class="abc-notation" data-abc="${escapeForAttr(abcContent)}">${abcContent}</div>`; |
||||||
|
}); |
||||||
|
|
||||||
|
// Process LilyPond notation blocks
|
||||||
|
const lilypondPattern = /(\\relative[^}]+})/gs; |
||||||
|
html = html.replace(lilypondPattern, (match) => { |
||||||
|
const lilypondContent = match.trim(); |
||||||
|
return `<div class="lilypond-notation" data-lilypond="${escapeForAttr(lilypondContent)}">${lilypondContent}</div>`; |
||||||
|
}); |
||||||
|
|
||||||
|
// Process inline chord notation: [C], [Am], [F#m7], etc.
|
||||||
|
const chordPattern = /\[([A-G][#b]?m?[0-9]?[^\[\]]*)\]/g; |
||||||
|
html = html.replace(chordPattern, (match, chord) => { |
||||||
|
return `<span class="chord" data-chord="${escapeForAttr(chord)}">[${chord}]</span>`; |
||||||
|
}); |
||||||
|
|
||||||
|
// Process MusicXML-like notation
|
||||||
|
const musicxmlPattern = /(<music[^>]*>.*?<\/music>)/gs; |
||||||
|
html = html.replace(musicxmlPattern, (match) => { |
||||||
|
const musicxmlContent = match.trim(); |
||||||
|
return `<div class="musicxml-notation" data-musicxml="${escapeForAttr(musicxmlContent)}">${musicxmlContent}</div>`; |
||||||
|
}); |
||||||
|
|
||||||
|
return html; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Escapes a string for use in HTML attributes |
||||||
|
*/ |
||||||
|
function escapeForAttr(text: string): string { |
||||||
|
return text |
||||||
|
.replace(/"/g, '"') |
||||||
|
.replace(/'/g, ''') |
||||||
|
.replace(/</g, '<') |
||||||
|
.replace(/>/g, '>') |
||||||
|
.replace(/\n/g, ' ') |
||||||
|
.replace(/\r/g, ''); |
||||||
|
} |
||||||
@ -0,0 +1,20 @@ |
|||||||
|
/** |
||||||
|
* Type declarations for @asciidoctor/core |
||||||
|
* These are minimal types - the actual types should come from the package |
||||||
|
*/ |
||||||
|
declare module '@asciidoctor/core' { |
||||||
|
interface ConvertOptions { |
||||||
|
safe?: string; |
||||||
|
backend?: string; |
||||||
|
doctype?: string; |
||||||
|
attributes?: Record<string, any>; |
||||||
|
extension_registry?: any; |
||||||
|
} |
||||||
|
|
||||||
|
interface Asciidoctor { |
||||||
|
convert(content: string, options?: ConvertOptions): string | any; |
||||||
|
} |
||||||
|
|
||||||
|
function asciidoctor(): Asciidoctor; |
||||||
|
export default asciidoctor; |
||||||
|
} |
||||||
Loading…
Reference in new issue