Compare commits
10 Commits
ff8f3c47a9
...
3498f764d4
| Author | SHA1 | Date |
|---|---|---|
|
|
3498f764d4 | 2 weeks ago |
|
|
48b25cb39d | 2 weeks ago |
|
|
e0abef84ab | 2 weeks ago |
|
|
f6e21c6110 | 2 weeks ago |
|
|
352ef1646b | 2 weeks ago |
|
|
23bd727ccd | 2 weeks ago |
|
|
85484852ca | 2 weeks ago |
|
|
9cedf345b3 | 2 weeks ago |
|
|
5fcfc11dfb | 2 weeks ago |
|
|
e0213b6c2e | 2 weeks ago |
23 changed files with 2556 additions and 1443 deletions
@ -0,0 +1,388 @@
@@ -0,0 +1,388 @@
|
||||
= AsciiDoc Test Document |
||||
Kismet Lee |
||||
2.9, October 31, 2021: Fall incarnation |
||||
:description: Test description |
||||
:author: Kismet Lee |
||||
:date: 2021-10-31 |
||||
:version: 2.9 |
||||
:status: Draft |
||||
:keywords: AsciiDoc, Test, Document |
||||
:category: Test |
||||
:language: English |
||||
|
||||
== Bullet list |
||||
|
||||
This is a test unordered list with mixed bullets: |
||||
|
||||
* First item with a number 2. in it |
||||
* Second item |
||||
* Third item |
||||
** Indented item |
||||
** Indented item |
||||
* Fourth item |
||||
|
||||
Another unordered list: |
||||
|
||||
* 1st item |
||||
* 2nd item |
||||
* third item containing _italic_ text |
||||
** indented item |
||||
** second indented item |
||||
* fourth item |
||||
|
||||
This is a test ordered list with indented items: |
||||
|
||||
. First item |
||||
. Second item |
||||
. Third item |
||||
.. Indented item |
||||
.. Indented item |
||||
. Fourth item |
||||
|
||||
Ordered list where everything has no number: |
||||
|
||||
. First item |
||||
. Second item |
||||
. Third item |
||||
. Fourth item |
||||
|
||||
This is a mixed list with indented items: |
||||
|
||||
. First item |
||||
. Second item |
||||
. Third item |
||||
* Indented item |
||||
* Indented item |
||||
. Fourth item |
||||
|
||||
This is another mixed list with indented items: |
||||
|
||||
* First item |
||||
* Second item |
||||
* Third item |
||||
. Indented item |
||||
. Indented item |
||||
* Fourth item |
||||
|
||||
== Headers |
||||
|
||||
=== Third-level header |
||||
|
||||
==== Fourth-level header |
||||
|
||||
===== Fifth-level header |
||||
|
||||
====== Sixth-level header |
||||
|
||||
== Media and Links |
||||
|
||||
=== Nostr address |
||||
|
||||
This should be ignored and rendered as plaintext: naddr1qvzqqqr4gupzplfq3m5v3u5r0q9f255fdeyz8nyac6lagssx8zy4wugxjs8ajf7pqyghwumn8ghj7mn0wd68ytnvv9hxgtcqy4sj6ar9wd6xv6tvv5kkvmmj94kkzuntv3hhwm3dvfuj6enyxgcrset98p3nsve2v5l |
||||
|
||||
This is also plaintext: |
||||
|
||||
npub1gv069u6q7zkl393ad47xutpqmyfj0rrfrlnqnlfc2ld38k8nnl4st9wa6q |
||||
|
||||
These should be turned into links: |
||||
|
||||
nostr:naddr1qvzqqqr4gupzplfq3m5v3u5r0q9f255fdeyz8nyac6lagssx8zy4wugxjs8ajf7pqyghwumn8ghj7mn0wd68ytnvv9hxgtcqy4sj6ar9wd6xv6tvv5kkvmmj94kkzuntv3hhwm3dvfuj6enyxgcrset98p3nsve2v5l |
||||
|
||||
nostr:npub1l5sga6xg72phsz5422ykujprejwud075ggrr3z2hwyrfgr7eylqstegx9z |
||||
|
||||
nostr:nevent1qvzqqqqqqypzp382htsmu08k277ps40wqhnfm60st89h5pvjyutghq9cjasuh38qqythwumn8ghj7un9d3shjtnswf5k6ctv9ehx2ap0qqsysletg3lqnl4uy59xsj4rp9rgw67wg23l827f4uvn5ckn20fuxcq45d8pj |
||||
|
||||
nostr:nprofile1qqsxhedgkuneycxpcdjlg6tgtxdy8gurdz64nq2h0flc288a0jag98qguy3nh |
||||
|
||||
nostr:note1txyefcha2xt3pgungx4k6j077dsteyef6hzpyuuku00s4h0eymzq4k33yg |
||||
|
||||
=== Hashtag |
||||
|
||||
#testhashtag at the start of the line and #inlinehashtag in the middle |
||||
|
||||
=== Wikilinks |
||||
|
||||
[[NKBIP-01|Specification]] and [[mirepoix]] |
||||
|
||||
=== URL |
||||
|
||||
https://www.welt.de/politik/ausland/article69a7ca00ad41f3cd65a1bc63/iran-drohte-jedes-schiff-zu-verbrennen-trump-will-oel-tanker-durch-strasse-von-hormus-eskortieren.html |
||||
|
||||
link:https://www.welt.de/politik/ausland/article69a7ca00ad41f3cd65a1bc63/iran-drohte-jedes-schiff-zu-verbrennen-trump-will-oel-tanker-durch-strasse-von-hormus-eskortieren.html[Welt Online link] |
||||
|
||||
this should render as plaintext: `http://www.example.com` |
||||
|
||||
this should be a hyperlink to the http URL with the same address link:https://theforest.nostr1.com[wss://theforest.nostr1.com] |
||||
|
||||
=== Images |
||||
|
||||
https://blog.ronin.cloud/content/images/size/w2000/2022/02/markdown.png |
||||
|
||||
image::https://blog.ronin.cloud/content/images/size/w2000/2022/02/markdown.png[width=400] |
||||
|
||||
=== Media |
||||
|
||||
==== YouTube |
||||
|
||||
Normal |
||||
|
||||
https://www.youtube.com/watch?v=KGIAS0cslSU |
||||
|
||||
https://youtu.be/KGIAS0cslSU |
||||
|
||||
video::KGIAS0cslSU[youtube] |
||||
|
||||
Shorts |
||||
|
||||
https://www.youtube.com/shorts/s-BQhXdCs8Y |
||||
|
||||
video::s-BQhXdCs8Y[youtube] |
||||
|
||||
==== Spotify |
||||
|
||||
https://open.spotify.com/episode/1GSZFA8vWltPyxYkArdRKx |
||||
|
||||
link:https://open.spotify.com/episode/1GSZFA8vWltPyxYkArdRKx[] |
||||
|
||||
==== Audio |
||||
|
||||
https://media.blubrry.com/takeituneasy/ins.blubrry.com/takeituneasy/lex_ai_rick_beato.mp3 |
||||
|
||||
audio::https://media.blubrry.com/takeituneasy/ins.blubrry.com/takeituneasy/lex_ai_rick_beato.mp3[] |
||||
|
||||
==== Video |
||||
|
||||
https://v.nostr.build/MTjaYib4upQuf8zn.mp4 |
||||
|
||||
video::https://v.nostr.build/MTjaYib4upQuf8zn.mp4[] |
||||
|
||||
== Tables |
||||
|
||||
=== Orderly |
||||
|
||||
[cols="1,2"] |
||||
|=== |
||||
|Syntax|Description |
||||
|
||||
|Header |
||||
|Title |
||||
|
||||
|Paragraph |
||||
|Text |
||||
|=== |
||||
|
||||
=== Unorderly |
||||
|
||||
[cols="1,2"] |
||||
|=== |
||||
|Syntax|Description |
||||
|
||||
|Header |
||||
|Title |
||||
|
||||
|Paragraph |
||||
|Text |
||||
|=== |
||||
|
||||
=== With alignment |
||||
|
||||
[cols="<,^,>"] |
||||
|=== |
||||
|Syntax|Description|Test Text |
||||
|
||||
|Header |
||||
|Title |
||||
|Here's this |
||||
|
||||
|Paragraph |
||||
|Text |
||||
|And more |
||||
|=== |
||||
|
||||
== Code blocks |
||||
|
||||
=== json |
||||
|
||||
[source,json] |
||||
---- |
||||
{ |
||||
"id": "<event_id>", |
||||
"pubkey": "<event_originator_pubkey>", |
||||
"created_at": 1725087283, |
||||
"kind": 30040, |
||||
"tags": [ |
||||
["d", "aesop's-fables-by-aesop"], |
||||
["title", "Aesop's Fables"], |
||||
["author", "Aesop"], |
||||
], |
||||
"sig": "<event_signature>" |
||||
} |
||||
---- |
||||
|
||||
=== typescript |
||||
|
||||
[source,typescript] |
||||
---- |
||||
/** |
||||
* Get Nostr identifier type |
||||
*/ |
||||
function getNostrType(id: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'note' | null { |
||||
if (id.startsWith('npub')) return 'npub'; |
||||
if (id.startsWith('nprofile')) return 'nprofile'; |
||||
if (id.startsWith('nevent')) return 'nevent'; |
||||
if (id.startsWith('naddr')) return 'naddr'; |
||||
if (id.startsWith('note')) return 'note'; |
||||
return null; |
||||
} |
||||
---- |
||||
|
||||
=== shell |
||||
|
||||
[source,shell] |
||||
---- |
||||
|
||||
mkdir new_directory |
||||
cp source.txt destination.txt |
||||
|
||||
---- |
||||
|
||||
=== LaTeX |
||||
|
||||
[source,latex] |
||||
---- |
||||
$$ |
||||
M = |
||||
\begin{bmatrix} |
||||
\frac{5}{6} & \frac{1}{6} & 0 \\[0.3em] |
||||
\frac{5}{6} & 0 & \frac{1}{6} \\[0.3em] |
||||
0 & \frac{5}{6} & \frac{1}{6} |
||||
\end{bmatrix} |
||||
$$ |
||||
---- |
||||
|
||||
[source,latex] |
||||
---- |
||||
$$ |
||||
f(x)= |
||||
\begin{cases} |
||||
1/d_{ij} & \quad \text{when $d_{ij} \leq 160$}\\ |
||||
0 & \quad \text{otherwise} |
||||
\end{cases} |
||||
$$ |
||||
---- |
||||
|
||||
=== ABC Notation |
||||
|
||||
[abc] |
||||
---- |
||||
X:1 |
||||
T:Ohne Titel |
||||
C:Aufgezeichnet 1784 |
||||
A:Seibis nahe Lichtenberg in Oberfranken |
||||
S:Handschrift, bezeichnet und datiert: "Heinrich Nicol Philipp zu Seibis den 30 Junius 1784" |
||||
M:4/4 |
||||
L:1/4 |
||||
K:D |
||||
dd d2 | ee e2 | fg ad | cB cA |\ |
||||
dd d2 | ee e2 | fg ad | ed/c/ d2 :| |
||||
|:\ |
||||
fg ad | cB cA | fg ad | cB cA |\ |
||||
dd d2 | ee e2 | fg ad | ed/c/ d2 :| |
||||
---- |
||||
|
||||
=== PlantUML |
||||
|
||||
[plantuml] |
||||
---- |
||||
@startuml |
||||
Alice -> Bob: Authentication Request |
||||
Bob --> Alice: Authentication Response |
||||
@enduml |
||||
---- |
||||
|
||||
=== BPMN |
||||
|
||||
[plantuml] |
||||
---- |
||||
@startbpmn |
||||
start |
||||
:Task 1; |
||||
:Task 2; |
||||
stop |
||||
@endbpmn |
||||
---- |
||||
|
||||
== LaTeX |
||||
|
||||
=== LaTeX in inline-code |
||||
|
||||
`$[ x^n + y^n = z^n \]$` and `$[\sqrt{x^2+1}\]$` and `$\color{blue}{X \sim Normal \; (\mu,\sigma^2)}$` |
||||
|
||||
== Footnotes |
||||
|
||||
Here's a simple footnote,footnote:[This is the first footnote.] and here's a longer one.footnote:[Here's one with multiple paragraphs and code.] |
||||
|
||||
== Anchor links |
||||
|
||||
<<_bullet_list,Link to bullet list section>> |
||||
|
||||
== Formatting |
||||
|
||||
=== Strikethrough |
||||
|
||||
[line-through]#The world is flat.# We now know that the world is round. This should not be ~struck~ through. |
||||
|
||||
=== Bold |
||||
|
||||
This is *bold* text. So is this *bold* text. |
||||
|
||||
=== Italic |
||||
|
||||
This is _italic_ text. So is this _italic_ text. |
||||
|
||||
=== Task List |
||||
|
||||
* [x] Write the press release |
||||
* [ ] Update the website |
||||
* [ ] Contact the media |
||||
|
||||
=== Emoji shortcodes |
||||
|
||||
Gone camping! :tent: Be back soon. |
||||
|
||||
That is so funny! :joy: |
||||
|
||||
=== Marking and highlighting text |
||||
|
||||
I need to highlight these [highlight]#very important words#. |
||||
|
||||
=== Subscript and Superscript |
||||
|
||||
H~2~O |
||||
|
||||
X^2^ |
||||
|
||||
=== Delimiter |
||||
|
||||
based upon a single quote |
||||
|
||||
''' |
||||
|
||||
based upon a dashes |
||||
|
||||
--- |
||||
|
||||
=== Quotes |
||||
|
||||
[quote] |
||||
____ |
||||
This is a single line blockequote sdfjsdlfkjasldkfjsdölfkjsdlfkjsadlöfkjsdlöfkjsadölfkjsdlf kjsldfkjsdalkjslkdfjlöskdfjlösdkjfsldkfjsöldkfjlösdkfjalsd kfjlsdkfjlödkfjlaksdfjlkjdfslkjalsdkfjlasdkfj alsdkjflskdfj sdfklj |
||||
____ |
||||
|
||||
[quote] |
||||
____ |
||||
This is a multi line blockequote sdfjsdlfkjasldkfjsdölfkjsdlfkjsadlöfkjsdlöfkjsadölfkjsdlf kjsldfkjsdalkjslkdfjlöskdfjlösdkjfsldkfjsöldkfjlösdkfjalsd kfjlsdkfjlödkfjlaksdfjlkjdfslkjalsdkfjlasdkfj alsdkjflskdfj sdfklj |
||||
This is a multi line blockequote sdfjsdlfkjasldkfjsdölfkjsdlfkjsadlöfkjsdlöfkjsadölfkjsdlf kjsldfkjsdalkjslkdfjlöskdfjlösdkjfsldkfjsöldkfjlösdkfjalsd kfjlsdkfjlödkfjlaksdfjlkjdfslkjalsdkfjlasdkfj alsdkjflskdfj sdfklj |
||||
This is a multi line blockequote sdfjsdlfkjasldkfjsdölfkjsdlfkjsadlöfkjsdlöfkjsadölfkjsdlf kjsldfkjsdalkjslkdfjlöskdfjlösdkjfsldkfjsöldkfjlösdkfjalsd kfjlsdkfjlödkfjlaksdfjlkjdfslkjalsdkfjlasdkfj alsdkjflskdfj sdfklj |
||||
This is a multi line blockequote sdfjsdlfkjasldkfjsdölfkjsdlfkjsadlöfkjsdlöfkjsadölfkjsdlf kjsldfkjsdalkjslkdfjlöskdfjlösdkjfsldkfjsöldkfjlösdkfjalsd kfjlsdkfjlödkfjlaksdfjlkjdfslkjalsdkfjlasdkfj alsdkjflskdfj sdfklj |
||||
____ |
||||
@ -1,55 +0,0 @@
@@ -1,55 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/** |
||||
* Example usage of gc-parser |
||||
* This can be called from Go or used directly in Node.js |
||||
*/ |
||||
|
||||
const { Parser, defaultOptions } = require('./dist/index.js'); |
||||
|
||||
async function main() { |
||||
// Create parser with default options
|
||||
const opts = defaultOptions(); |
||||
opts.linkBaseURL = process.env.LINK_BASE_URL || 'https://example.com'; |
||||
|
||||
const parser = new Parser(opts); |
||||
|
||||
// Get content from command line argument or stdin
|
||||
let content = ''; |
||||
if (process.argv[2]) { |
||||
content = process.argv[2]; |
||||
} else { |
||||
// Read from stdin
|
||||
const readline = require('readline'); |
||||
const rl = readline.createInterface({ |
||||
input: process.stdin, |
||||
output: process.stdout, |
||||
terminal: false |
||||
}); |
||||
|
||||
for await (const line of rl) { |
||||
content += line + '\n'; |
||||
} |
||||
} |
||||
|
||||
if (!content) { |
||||
console.error('No content provided'); |
||||
process.exit(1); |
||||
} |
||||
|
||||
try { |
||||
const result = await parser.process(content); |
||||
|
||||
// Output as JSON for easy parsing
|
||||
console.log(JSON.stringify(result, null, 2)); |
||||
} catch (error) { |
||||
console.error('Error processing content:', error); |
||||
process.exit(1); |
||||
} |
||||
} |
||||
|
||||
if (require.main === module) { |
||||
main(); |
||||
} |
||||
|
||||
module.exports = { main }; |
||||
@ -0,0 +1,23 @@
@@ -0,0 +1,23 @@
|
||||
module.exports = { |
||||
preset: 'ts-jest', |
||||
testEnvironment: 'node', |
||||
roots: ['<rootDir>/src'], |
||||
testMatch: ['**/__tests__/**/*.test.ts', '**/?(*.)+(spec|test).ts'], |
||||
testPathIgnorePatterns: ['/node_modules/', '/dist/', 'asciidoc.test.ts'], |
||||
transform: { |
||||
'^.+\\.ts$': ['ts-jest', { |
||||
tsconfig: { |
||||
esModuleInterop: true, |
||||
}, |
||||
}], |
||||
'^.+\\.js$': 'babel-jest', |
||||
}, |
||||
moduleFileExtensions: ['ts', 'js', 'json'], |
||||
moduleNameMapper: { |
||||
'^marked$': '<rootDir>/node_modules/marked/lib/marked.umd.js', |
||||
}, |
||||
collectCoverageFrom: [ |
||||
'src/**/*.ts', |
||||
'!src/**/*.d.ts', |
||||
], |
||||
}; |
||||
@ -0,0 +1,277 @@
@@ -0,0 +1,277 @@
|
||||
--- |
||||
# this is YAML front matter |
||||
author: James Smith |
||||
summary: This is a summary |
||||
topics: list, of, topics |
||||
variable: one |
||||
array: |
||||
- one thing |
||||
- two things |
||||
- several things |
||||
# all of this data is available to our layout |
||||
--- |
||||
|
||||
# Markdown Test Document |
||||
|
||||
## Bullet list |
||||
|
||||
This is a test unordered list with mixed bullets: |
||||
|
||||
* First item with a number 2. in it |
||||
* Second item |
||||
* Third item |
||||
- Indented item |
||||
- Indented item |
||||
* Fourth item |
||||
|
||||
Another unordered list: |
||||
|
||||
- 1st item |
||||
- 2nd item |
||||
- third item containing _italic_ text |
||||
- indented item |
||||
- second indented item |
||||
- fourth item |
||||
|
||||
This is a test ordered list with indented items: |
||||
|
||||
1. First item |
||||
2. Second item |
||||
3. Third item |
||||
1. Indented item |
||||
2. Indented item |
||||
4. Fourth item |
||||
|
||||
Ordered list that is wrongly numbered: |
||||
|
||||
1. First item |
||||
8. Second item |
||||
3. Third item |
||||
5. Fourth item |
||||
|
||||
This is a mixed list with indented items: |
||||
|
||||
1. First item |
||||
2. Second item |
||||
3. Third item |
||||
* Indented item |
||||
* Indented item |
||||
4. Fourth item |
||||
|
||||
This is another mixed list with indented items: |
||||
|
||||
- First item |
||||
- Second item |
||||
- Third item |
||||
1. Indented item |
||||
2. Indented item |
||||
- Fourth item |
||||
|
||||
## Headers |
||||
|
||||
### Third-level header |
||||
|
||||
#### Fourth-level header |
||||
|
||||
##### Fifth-level header |
||||
|
||||
###### Sixth-level header |
||||
|
||||
## Media and Links |
||||
|
||||
### Nostr address |
||||
|
||||
This should be ignored and rendered as plaintext: naddr1qvzqqqr4gupzplfq3m5v3u5r0q9f255fdeyz8nyac6lagssx8zy4wugxjs8ajf7pqyghwumn8ghj7mn0wd68ytnvv9hxgtcqy4sj6ar9wd6xv6tvv5kkvmmj94kkzuntv3hhwm3dvfuj6enyxgcrset98p3nsve2v5l |
||||
|
||||
This is also plaintext: |
||||
|
||||
npub1gv069u6q7zkl393ad47xutpqmyfj0rrfrlnqnlfc2ld38k8nnl4st9wa6q |
||||
|
||||
These should be turned into links: |
||||
|
||||
nostr:naddr1qvzqqqr4gupzplfq3m5v3u5r0q9f255fdeyz8nyac6lagssx8zy4wugxjs8ajf7pqyghwumn8ghj7mn0wd68ytnvv9hxgtcqy4sj6ar9wd6xv6tvv5kkvmmj94kkzuntv3hhwm3dvfuj6enyxgcrset98p3nsve2v5l |
||||
|
||||
nostr:npub1l5sga6xg72phsz5422ykujprejwud075ggrr3z2hwyrfgr7eylqstegx9z |
||||
|
||||
nostr:nevent1qvzqqqqqqypzp382htsmu08k277ps40wqhnfm60st89h5pvjyutghq9cjasuh38qqythwumn8ghj7un9d3shjtnswf5k6ctv9ehx2ap0qqsysletg3lqnl4uy59xsj4rp9rgw67wg23l827f4uvn5ckn20fuxcq45d8pj |
||||
|
||||
nostr:nprofile1qqsxhedgkuneycxpcdjlg6tgtxdy8gurdz64nq2h0flc288a0jag98qguy3nh |
||||
|
||||
nostr:note1txyefcha2xt3pgungx4k6j077dsteyef6hzpyuuku00s4h0eymzq4k33yg |
||||
|
||||
### Hashtag |
||||
|
||||
#testhashtag at the start of the line and #inlinehashtag in the middle |
||||
|
||||
### Wikilinks |
||||
|
||||
[[NKBIP-01|Specification]] and [[mirepoix]] |
||||
|
||||
### URL |
||||
|
||||
https://www.welt.de/politik/ausland/article69a7ca00ad41f3cd65a1bc63/iran-drohte-jedes-schiff-zu-verbrennen-trump-will-oel-tanker-durch-strasse-von-hormus-eskortieren.html |
||||
|
||||
[Welt Online link](https://www.welt.de/politik/ausland/article69a7ca00ad41f3cd65a1bc63/iran-drohte-jedes-schiff-zu-verbrennen-trump-will-oel-tanker-durch-strasse-von-hormus-eskortieren.html) |
||||
|
||||
this should render as plaintext: `http://www.example.com` |
||||
|
||||
this shouild be a hyperlink to the http URL with the same address [wss://theforest.nostr1.com](https://theforest.nostr1.com) |
||||
|
||||
### Images |
||||
|
||||
https://blog.ronin.cloud/content/images/size/w2000/2022/02/markdown.png |
||||
|
||||
 |
||||
|
||||
### Media |
||||
|
||||
#### YouTube |
||||
|
||||
https://youtube.com/shorts/ZWfvChb-i0w |
||||
|
||||
#### Spotify |
||||
|
||||
https://open.spotify.com/episode/1GSZFA8vWltPyxYkArdRKx?si=bq6-az28TcuP596feTkRFQ |
||||
|
||||
#### Audio |
||||
|
||||
https://media.blubrry.com/takeituneasy/ins.blubrry.com/takeituneasy/lex_ai_rick_beato.mp3 |
||||
|
||||
#### Video |
||||
|
||||
https://v.nostr.build/MTjaYib4upQuf8zn.mp4 |
||||
|
||||
## Tables |
||||
|
||||
### Orderly |
||||
|
||||
| Syntax | Description | |
||||
| ----------- | ----------- | |
||||
| Header | Title | |
||||
| Paragraph | Text | |
||||
|
||||
### Unorderly |
||||
|
||||
| Syntax | Description | |
||||
| --- | ----------- | |
||||
| Header | Title | |
||||
| Paragraph | Text | |
||||
|
||||
## Code blocks |
||||
|
||||
### json |
||||
|
||||
```json |
||||
{ |
||||
"id": "<event_id>", |
||||
"pubkey": "<event_originator_pubkey>", |
||||
"created_at": 1725087283, |
||||
"kind": 30040, |
||||
"tags": [ |
||||
["d", "aesop's-fables-by-aesop"], |
||||
["title", "Aesop's Fables"], |
||||
["author", "Aesop"], |
||||
], |
||||
"sig": "<event_signature>" |
||||
} |
||||
``` |
||||
|
||||
### typescript |
||||
|
||||
```typescript |
||||
/** |
||||
* Get Nostr identifier type |
||||
*/ |
||||
function getNostrType(id: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'note' | null { |
||||
if (id.startsWith('npub')) return 'npub'; |
||||
if (id.startsWith('nprofile')) return 'nprofile'; |
||||
if (id.startsWith('nevent')) return 'nevent'; |
||||
if (id.startsWith('naddr')) return 'naddr'; |
||||
if (id.startsWith('note')) return 'note'; |
||||
return null; |
||||
} |
||||
``` |
||||
|
||||
### shell |
||||
|
||||
```shell |
||||
|
||||
mkdir new_directory |
||||
cp source.txt destination.txt |
||||
|
||||
``` |
||||
|
||||
### LaTeX |
||||
|
||||
```latex |
||||
$$ |
||||
M = |
||||
\begin{bmatrix} |
||||
\frac{5}{6} & \frac{1}{6} & 0 \\[0.3em] |
||||
\frac{5}{6} & 0 & \frac{1}{6} \\[0.3em] |
||||
0 & \frac{5}{6} & \frac{1}{6} |
||||
\end{bmatrix} |
||||
$$ |
||||
``` |
||||
|
||||
## LateX |
||||
|
||||
### LaTex in inline-code |
||||
|
||||
`$[ x^n + y^n = z^n \]$` and `$[\sqrt{x^2+1}\]$` and `$\color{blue}{X \sim Normal \; (\mu,\sigma^2)}$` |
||||
|
||||
## Footnotes |
||||
|
||||
Here's a simple footnote,[^1] and here's a longer one.[^bignote] |
||||
|
||||
[^1]: This is the first footnote. |
||||
|
||||
[^bignote]: Here's one with multiple paragraphs and code. |
||||
|
||||
## Anchor links |
||||
|
||||
[Link to bullet list section](#bullet-list) |
||||
|
||||
## Formatting |
||||
|
||||
### Strikethrough |
||||
|
||||
~~The world is flat.~~ We now know that the world is round. |
||||
|
||||
### Bold |
||||
|
||||
This is *italic* text. So is this **bold** text. |
||||
|
||||
### Task List |
||||
|
||||
- [x] Write the press release |
||||
- [ ] Update the website |
||||
- [ ] Contact the media |
||||
|
||||
### Emoji shortcodes |
||||
|
||||
Gone camping! :tent: Be back soon. |
||||
|
||||
That is so funny! :joy: |
||||
|
||||
### Subscript and Superscript |
||||
|
||||
X^2^ |
||||
|
||||
### Delimiter |
||||
|
||||
based upon a - |
||||
|
||||
--- |
||||
|
||||
based upon a * |
||||
|
||||
*** |
||||
|
||||
### Quotes |
||||
|
||||
> This is a single line blockequote sdfjsdlfkjasldkfjsdölfkjsdlfkjsadlöfkjsdlöfkjsadölfkjsdlf kjsldfkjsdalkjslkdfjlöskdfjlösdkjfsldkfjsöldkfjlösdkfjalsd kfjlsdkfjlödkfjlaksdfjlkjdfslkjalsdkfjlasdkfj alsdkjflskdfj sdfklj |
||||
|
||||
> This is a multi line blockequote sdfjsdlfkjasldkfjsdölfkjsdlfkjsadlöfkjsdlöfkjsadölfkjsdlf kjsldfkjsdalkjslkdfjlöskdfjlösdkjfsldkfjsöldkfjlösdkfjalsd kfjlsdkfjlödkfjlaksdfjlkjdfslkjalsdkfjlasdkfj alsdkjflskdfj sdfklj |
||||
> This is a multi line blockequote sdfjsdlfkjasldkfjsdölfkjsdlfkjsadlöfkjsdlöfkjsadölfkjsdlf kjsldfkjsdalkjslkdfjlöskdfjlösdkjfsldkfjsöldkfjlösdkfjalsd kfjlsdkfjlödkfjlaksdfjlkjdfslkjalsdkfjlasdkfj alsdkjflskdfj sdfklj |
||||
> This is a multi line blockequote sdfjsdlfkjasldkfjsdölfkjsdlfkjsadlöfkjsdlöfkjsadölfkjsdlf kjsldfkjsdalkjslkdfjlöskdfjlösdkjfsldkfjsöldkfjlösdkfjalsd kfjlsdkfjlödkfjlaksdfjlkjdfslkjalsdkfjlasdkfj alsdkjflskdfj sdfklj |
||||
@ -0,0 +1,353 @@
@@ -0,0 +1,353 @@
|
||||
import { Parser } from '../parser'; |
||||
import { readFileSync, writeFileSync, mkdirSync } from 'fs'; |
||||
import { join } from 'path'; |
||||
|
||||
/** |
||||
* Simple test runner for AsciiDoc tests (separate from Jest due to Opal compatibility issues) |
||||
*/ |
||||
async function runAsciiDocTests() { |
||||
console.log('Running AsciiDoc tests...\n'); |
||||
|
||||
const asciidocContent = readFileSync(join(__dirname, '../../asciidoc_testdoc.adoc'), 'utf-8'); |
||||
const parser = new Parser({ |
||||
linkBaseURL: 'https://example.com', |
||||
enableNostrAddresses: true, |
||||
wikilinkUrl: '/events?d={dtag}', |
||||
hashtagUrl: '/hashtag/{topic}' |
||||
}); |
||||
|
||||
let passed = 0; |
||||
let failed = 0; |
||||
const failures: string[] = []; |
||||
|
||||
const testPromises: Promise<void>[] = []; |
||||
|
||||
function test(name: string, fn: () => void | Promise<void>) { |
||||
const testPromise = (async () => { |
||||
try { |
||||
const result = fn(); |
||||
if (result instanceof Promise) { |
||||
await result; |
||||
} |
||||
passed++; |
||||
console.log(`✓ ${name}`); |
||||
} catch (error: any) { |
||||
failed++; |
||||
failures.push(`${name}: ${error.message}`); |
||||
console.error(`✗ ${name}: ${error.message}`); |
||||
} |
||||
})(); |
||||
testPromises.push(testPromise); |
||||
} |
||||
|
||||
function expect(actual: any) { |
||||
return { |
||||
toBeDefined: () => { |
||||
if (actual === undefined || actual === null) { |
||||
throw new Error(`Expected value to be defined, but got ${actual}`); |
||||
} |
||||
}, |
||||
toBe: (expected: any) => { |
||||
if (actual !== expected) { |
||||
throw new Error(`Expected ${expected}, but got ${actual}`); |
||||
} |
||||
}, |
||||
toContain: (substring: string) => { |
||||
if (typeof actual === 'string' && !actual.includes(substring)) { |
||||
throw new Error(`Expected string to contain "${substring}"`); |
||||
} |
||||
}, |
||||
toMatch: (regex: RegExp) => { |
||||
if (typeof actual === 'string' && !regex.test(actual)) { |
||||
throw new Error(`Expected string to match ${regex}`); |
||||
} |
||||
}, |
||||
toHaveProperty: (prop: string) => { |
||||
if (!(prop in actual)) { |
||||
throw new Error(`Expected object to have property "${prop}"`); |
||||
} |
||||
}, |
||||
toBeGreaterThan: (value: number) => { |
||||
if (typeof actual !== 'number' || actual <= value) { |
||||
throw new Error(`Expected ${actual} to be greater than ${value}`); |
||||
} |
||||
}, |
||||
length: { |
||||
toBeGreaterThan: (value: number) => { |
||||
if (!Array.isArray(actual) || actual.length <= value) { |
||||
throw new Error(`Expected array length to be greater than ${value}, but got ${actual.length}`); |
||||
} |
||||
} |
||||
} |
||||
}; |
||||
} |
||||
|
||||
// Run tests
|
||||
const result = await parser.process(asciidocContent); |
||||
|
||||
// Write HTML output to file for inspection
|
||||
const outputDir = join(__dirname, '../../test-output'); |
||||
try { |
||||
mkdirSync(outputDir, { recursive: true }); |
||||
} catch (e) { |
||||
// Directory might already exist
|
||||
} |
||||
|
||||
const htmlOutput = `<!DOCTYPE html>
|
||||
<html lang="en"> |
||||
<head> |
||||
<meta charset="UTF-8"> |
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
||||
<meta name="referrer" content="strict-origin-when-cross-origin"> |
||||
<meta http-equiv="Content-Security-Policy" content="default-src 'self'; script-src 'unsafe-inline' 'unsafe-eval' https://www.youtube.com https://s.ytimg.com https://www.gstatic.com https://*.googlevideo.com; frame-src https://www.youtube.com https://youtube.com https://open.spotify.com https://*.googlevideo.com; style-src 'unsafe-inline'; img-src 'self' data: https:; media-src 'self' https:; connect-src https:; child-src https://www.youtube.com https://youtube.com;"> |
||||
<title>AsciiDoc Test Output</title> |
||||
<style> |
||||
body { font-family: sans-serif; max-width: 1200px; margin: 0 auto; padding: 20px; line-height: 1.6; } |
||||
.hashtag { color: #1da1f2; font-weight: 500; } |
||||
.wikilink { color: #0066cc; text-decoration: underline; } |
||||
.nostr-link { color: #8b5cf6; text-decoration: underline; } |
||||
pre { background: #f5f5f5; padding: 10px; border-radius: 4px; overflow-x: auto; } |
||||
code { background: #f5f5f5; padding: 2px 4px; border-radius: 3px; font-family: 'Courier New', monospace; } |
||||
.line-through { text-decoration: line-through; } |
||||
.highlight { background-color: #ffeb3b; padding: 2px 4px; border-radius: 3px; } |
||||
.bare-image { max-width: 100%; width: auto; height: auto; margin: 10px 0; display: block; } |
||||
.bare-video, .bare-audio { width: 100%; max-width: 800px; margin: 10px 0; display: block; } |
||||
.youtube-embed, .spotify-embed { max-width: 100%; margin: 10px 0; border-radius: 8px; display: block; } |
||||
.youtube-embed { width: 100%; max-width: 640px; height: auto; aspect-ratio: 16/9; border: 0; display: block; } |
||||
.spotify-embed { width: 100%; max-width: 800px; } |
||||
/* Table styles */ |
||||
table { border-collapse: collapse; width: 100%; margin: 1em 0; } |
||||
table thead { background-color: #f2f2f2; } |
||||
table th { font-weight: bold; padding: 8px; border: 1px solid #ddd; background-color: #f2f2f2; } |
||||
table td { padding: 8px; border: 1px solid #ddd; } |
||||
/* Alignment classes - AsciiDoc uses halign-* and valign-* classes */ |
||||
.halign-left { text-align: left !important; } |
||||
.halign-center { text-align: center !important; } |
||||
.halign-right { text-align: right !important; } |
||||
.valign-top { vertical-align: top !important; } |
||||
.valign-middle { vertical-align: middle !important; } |
||||
.valign-bottom { vertical-align: bottom !important; } |
||||
/* Also handle tableblock classes */ |
||||
.tableblock.halign-left { text-align: left !important; } |
||||
.tableblock.halign-center { text-align: center !important; } |
||||
.tableblock.halign-right { text-align: right !important; } |
||||
.tableblock.valign-top { vertical-align: top !important; } |
||||
.tableblock.valign-middle { vertical-align: middle !important; } |
||||
.tableblock.valign-bottom { vertical-align: bottom !important; } |
||||
/* Task list styles */ |
||||
.checklist { list-style: none; padding-left: 0; } |
||||
.checklist li { padding-left: 1.5em; position: relative; margin: 0.5em 0; } |
||||
.checklist li i.fa-check-square-o::before { content: "☑ "; font-style: normal; font-family: sans-serif; } |
||||
.checklist li i.fa-square-o::before { content: "☐ "; font-style: normal; font-family: sans-serif; } |
||||
.checklist li i { position: absolute; left: 0; font-style: normal; } |
||||
/* Fallback if Font Awesome doesn't load */ |
||||
.checklist li i.fa-check-square-o { display: inline-block; width: 1em; } |
||||
.checklist li i.fa-check-square-o:before { content: "☑"; } |
||||
.checklist li i.fa-square-o { display: inline-block; width: 1em; } |
||||
.checklist li i.fa-square-o:before { content: "☐"; } |
||||
/* AsciiDoc specific styles */ |
||||
.sect1, .sect2, .sect3, .sect4, .sect5 { margin-top: 1.5em; margin-bottom: 1em; } |
||||
.paragraph { margin: 1em 0; } |
||||
table { border-collapse: collapse; width: 100%; margin: 1em 0; } |
||||
table th, table td { border: 1px solid #ddd; padding: 8px; text-align: left; } |
||||
table th { background-color: #f2f2f2; } |
||||
blockquote { border-left: 4px solid #ddd; padding-left: 1em; margin: 1em 0; color: #666; } |
||||
</style> |
||||
</head> |
||||
<body> |
||||
<h1>AsciiDoc Test Document - Parsed Output</h1> |
||||
<hr> |
||||
${result.content} |
||||
<hr> |
||||
<h2>Metadata</h2> |
||||
<pre>${JSON.stringify({ |
||||
hasLaTeX: result.hasLaTeX, |
||||
hasMusicalNotation: result.hasMusicalNotation, |
||||
nostrLinks: result.nostrLinks, |
||||
wikilinks: result.wikilinks, |
||||
hashtags: result.hashtags, |
||||
links: result.links, |
||||
media: result.media |
||||
}, null, 2)}</pre> |
||||
</body> |
||||
</html>`;
|
||||
|
||||
const outputPath = join(outputDir, 'asciidoc-output.html'); |
||||
writeFileSync(outputPath, htmlOutput, 'utf-8'); |
||||
console.log(`\n📄 HTML output written to: ${outputPath}\n`); |
||||
|
||||
test('should parse AsciiDoc content', () => { |
||||
expect(result).toBeDefined(); |
||||
expect(result.content).toBeDefined(); |
||||
expect(typeof result.content).toBe('string'); |
||||
expect(result.content.length).toBeGreaterThan(0); |
||||
}); |
||||
|
||||
test('should have HTML content', () => { |
||||
expect(result.content).toContain('<'); |
||||
expect(result.content).toContain('>'); |
||||
}); |
||||
|
||||
test('should extract table of contents', () => { |
||||
expect(result.tableOfContents).toBeDefined(); |
||||
expect(typeof result.tableOfContents).toBe('string'); |
||||
}); |
||||
|
||||
test('should detect LaTeX', () => { |
||||
expect(result.hasLaTeX).toBeDefined(); |
||||
expect(typeof result.hasLaTeX).toBe('boolean'); |
||||
expect(result.hasLaTeX).toBe(true); |
||||
}); |
||||
|
||||
test('should detect musical notation', () => { |
||||
expect(result.hasMusicalNotation).toBeDefined(); |
||||
expect(typeof result.hasMusicalNotation).toBe('boolean'); |
||||
expect(result.hasMusicalNotation).toBe(true); |
||||
}); |
||||
|
||||
test('should extract nostr links', () => { |
||||
expect(result.nostrLinks).toBeDefined(); |
||||
expect(Array.isArray(result.nostrLinks)).toBe(true); |
||||
expect(result.nostrLinks.length).toBeGreaterThan(0); |
||||
|
||||
const nostrLink = result.nostrLinks[0]; |
||||
expect(nostrLink).toHaveProperty('type'); |
||||
expect(nostrLink).toHaveProperty('id'); |
||||
expect(nostrLink).toHaveProperty('text'); |
||||
expect(nostrLink).toHaveProperty('bech32'); |
||||
const validTypes = ['npub', 'nprofile', 'nevent', 'naddr', 'note']; |
||||
if (!validTypes.includes(nostrLink.type)) { |
||||
throw new Error(`Invalid nostr type: ${nostrLink.type}`); |
||||
} |
||||
}); |
||||
|
||||
test('should extract wikilinks', () => { |
||||
expect(result.wikilinks).toBeDefined(); |
||||
expect(Array.isArray(result.wikilinks)).toBe(true); |
||||
expect(result.wikilinks.length).toBeGreaterThan(0); |
||||
|
||||
const wikilink = result.wikilinks[0]; |
||||
expect(wikilink).toHaveProperty('dtag'); |
||||
expect(wikilink).toHaveProperty('display'); |
||||
expect(wikilink).toHaveProperty('original'); |
||||
}); |
||||
|
||||
test('should extract hashtags', () => { |
||||
expect(result.hashtags).toBeDefined(); |
||||
expect(Array.isArray(result.hashtags)).toBe(true); |
||||
expect(result.hashtags.length).toBeGreaterThan(0); |
||||
|
||||
result.hashtags.forEach((tag: string) => { |
||||
if (tag.includes('#')) { |
||||
throw new Error(`Hashtag should not include #: ${tag}`); |
||||
} |
||||
}); |
||||
}); |
||||
|
||||
test('should extract regular links', () => { |
||||
expect(result.links).toBeDefined(); |
||||
expect(Array.isArray(result.links)).toBe(true); |
||||
|
||||
if (result.links.length > 0) { |
||||
const link = result.links[0]; |
||||
expect(link).toHaveProperty('url'); |
||||
expect(link).toHaveProperty('text'); |
||||
expect(link).toHaveProperty('isExternal'); |
||||
expect(typeof link.isExternal).toBe('boolean'); |
||||
} |
||||
}); |
||||
|
||||
test('should extract media URLs', () => { |
||||
expect(result.media).toBeDefined(); |
||||
expect(Array.isArray(result.media)).toBe(true); |
||||
}); |
||||
|
||||
test('should process nostr: addresses in HTML', () => { |
||||
const nostrAddresses = result.nostrLinks; |
||||
expect(nostrAddresses.length).toBeGreaterThan(0); |
||||
|
||||
nostrAddresses.forEach((link: any) => { |
||||
if (!result.content.includes(`data-nostr-type="${link.type}"`)) { |
||||
throw new Error(`Missing nostr type attribute for ${link.type}`); |
||||
} |
||||
if (!result.content.includes(`data-nostr-id="${link.bech32}"`)) { |
||||
throw new Error(`Missing nostr id attribute for ${link.bech32}`); |
||||
} |
||||
}); |
||||
}); |
||||
|
||||
test('should process wikilinks in HTML', () => { |
||||
const wikilinks = result.wikilinks; |
||||
expect(wikilinks.length).toBeGreaterThan(0); |
||||
|
||||
wikilinks.forEach((wikilink: any) => { |
||||
if (!result.content.includes(`class="wikilink"`)) { |
||||
throw new Error('Missing wikilink class'); |
||||
} |
||||
if (!result.content.includes(`data-dtag="${wikilink.dtag}"`)) { |
||||
throw new Error(`Missing dtag attribute for ${wikilink.dtag}`); |
||||
} |
||||
}); |
||||
}); |
||||
|
||||
test('should process hashtags in HTML', () => { |
||||
const hashtags = result.hashtags; |
||||
expect(hashtags.length).toBeGreaterThan(0); |
||||
|
||||
hashtags.forEach((tag: string) => { |
||||
if (!result.content.includes(`data-topic="${tag}"`)) { |
||||
throw new Error(`Missing topic attribute for ${tag}`); |
||||
} |
||||
if (!result.content.includes('class="hashtag"')) { |
||||
throw new Error('Missing hashtag class'); |
||||
} |
||||
}); |
||||
}); |
||||
|
||||
test('should contain expected content sections', () => { |
||||
if (!/Bullet list|bullet/i.test(result.content)) { |
||||
throw new Error('Missing bullet list section'); |
||||
} |
||||
if (!/Headers|header/i.test(result.content)) { |
||||
throw new Error('Missing headers section'); |
||||
} |
||||
if (!/Media and Links|media|links/i.test(result.content)) { |
||||
throw new Error('Missing media and links section'); |
||||
} |
||||
}); |
||||
|
||||
test('should return consistent structure', () => { |
||||
expect(result).toHaveProperty('content'); |
||||
expect(result).toHaveProperty('tableOfContents'); |
||||
expect(result).toHaveProperty('hasLaTeX'); |
||||
expect(result).toHaveProperty('hasMusicalNotation'); |
||||
expect(result).toHaveProperty('nostrLinks'); |
||||
expect(result).toHaveProperty('wikilinks'); |
||||
expect(result).toHaveProperty('hashtags'); |
||||
expect(result).toHaveProperty('links'); |
||||
expect(result).toHaveProperty('media'); |
||||
}); |
||||
|
||||
// Wait for all tests to complete
|
||||
await Promise.all(testPromises); |
||||
|
||||
// Print summary
|
||||
console.log(`\n${'='.repeat(50)}`); |
||||
console.log(`Tests passed: ${passed}`); |
||||
console.log(`Tests failed: ${failed}`); |
||||
|
||||
if (failures.length > 0) { |
||||
console.log('\nFailures:'); |
||||
failures.forEach(f => console.error(` - ${f}`)); |
||||
process.exit(1); |
||||
} else { |
||||
console.log('\nAll tests passed!'); |
||||
process.exit(0); |
||||
} |
||||
} |
||||
|
||||
// Run tests
|
||||
runAsciiDocTests().catch(error => { |
||||
console.error('Test runner error:', error); |
||||
process.exit(1); |
||||
}); |
||||
@ -0,0 +1,238 @@
@@ -0,0 +1,238 @@
|
||||
import { Parser } from '../parser'; |
||||
import { readFileSync, writeFileSync, mkdirSync } from 'fs'; |
||||
import { join } from 'path'; |
||||
|
||||
describe('Parser', () => { |
||||
let asciidocContent: string; |
||||
let markdownContent: string; |
||||
|
||||
beforeAll(() => { |
||||
asciidocContent = readFileSync(join(__dirname, '../../asciidoc_testdoc.adoc'), 'utf-8'); |
||||
markdownContent = readFileSync(join(__dirname, '../../markdown_testdoc.md'), 'utf-8'); |
||||
}); |
||||
|
||||
// AsciiDoc tests are run separately using a Node.js script (asciidoc.test.ts)
|
||||
// due to Jest/Opal runtime compatibility issues
|
||||
// Run with: npm run test:asciidoc
|
||||
|
||||
describe('Markdown Test Document', () => { |
||||
let result: any; |
||||
|
||||
beforeAll(async () => { |
||||
const parser = new Parser({ |
||||
linkBaseURL: 'https://example.com', |
||||
enableNostrAddresses: true, |
||||
wikilinkUrl: '/events?d={dtag}', |
||||
hashtagUrl: '/hashtag/{topic}' |
||||
}); |
||||
result = await parser.process(markdownContent); |
||||
|
||||
// Write HTML output to file for inspection
|
||||
const outputDir = join(__dirname, '../../test-output'); |
||||
try { |
||||
mkdirSync(outputDir, { recursive: true }); |
||||
} catch (e) { |
||||
// Directory might already exist
|
||||
} |
||||
|
||||
const htmlOutput = `<!DOCTYPE html>
|
||||
<html lang="en"> |
||||
<head> |
||||
<meta charset="UTF-8"> |
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
||||
<title>Markdown Test Output</title> |
||||
<style> |
||||
body { font-family: sans-serif; max-width: 1200px; margin: 0 auto; padding: 20px; line-height: 1.6; } |
||||
.hashtag { color: #1da1f2; font-weight: 500; } |
||||
.wikilink { color: #0066cc; text-decoration: underline; } |
||||
.nostr-link { color: #8b5cf6; text-decoration: underline; } |
||||
pre { background: #f5f5f5; padding: 10px; border-radius: 4px; overflow-x: auto; } |
||||
code { background: #f5f5f5; padding: 2px 4px; border-radius: 3px; font-family: 'Courier New', monospace; } |
||||
.bare-image, .bare-video, .bare-audio { max-width: 100%; margin: 10px 0; } |
||||
.bare-video, .bare-audio { width: 100%; max-width: 600px; } |
||||
blockquote { border-left: 4px solid #ddd; padding-left: 1em; margin: 1em 0; color: #666; } |
||||
table { border-collapse: collapse; width: 100%; margin: 1em 0; } |
||||
table th, table td { border: 1px solid #ddd; padding: 8px; text-align: left; } |
||||
table th { background-color: #f2f2f2; } |
||||
</style> |
||||
</head> |
||||
<body> |
||||
<h1>Markdown Test Document - Parsed Output</h1> |
||||
<hr> |
||||
${result.content} |
||||
<hr> |
||||
<h2>Metadata</h2> |
||||
<pre>${JSON.stringify({ |
||||
frontmatter: result.frontmatter, |
||||
hasLaTeX: result.hasLaTeX, |
||||
hasMusicalNotation: result.hasMusicalNotation, |
||||
nostrLinks: result.nostrLinks, |
||||
wikilinks: result.wikilinks, |
||||
hashtags: result.hashtags, |
||||
links: result.links, |
||||
media: result.media |
||||
}, null, 2)}</pre> |
||||
</body> |
||||
</html>`;
|
||||
|
||||
const outputPath = join(outputDir, 'markdown-output.html'); |
||||
writeFileSync(outputPath, htmlOutput, 'utf-8'); |
||||
// Use console.info to ensure it shows in Jest output
|
||||
console.info(`\n📄 HTML output written to: ${outputPath}\n`); |
||||
}); |
||||
|
||||
it('should parse Markdown content', () => { |
||||
expect(result).toBeDefined(); |
||||
expect(result.content).toBeDefined(); |
||||
expect(typeof result.content).toBe('string'); |
||||
expect(result.content.length).toBeGreaterThan(0); |
||||
}); |
||||
|
||||
it('should have HTML content', () => { |
||||
expect(result.content).toContain('<'); |
||||
expect(result.content).toContain('>'); |
||||
}); |
||||
|
||||
it('should extract frontmatter', () => { |
||||
expect(result.frontmatter).toBeDefined(); |
||||
expect(typeof result.frontmatter).toBe('object'); |
||||
expect(result.frontmatter).toHaveProperty('author'); |
||||
expect(result.frontmatter.author).toBe('James Smith'); |
||||
expect(result.frontmatter).toHaveProperty('summary'); |
||||
expect(result.frontmatter.summary).toBe('This is a summary'); |
||||
}); |
||||
|
||||
it('should detect LaTeX', () => { |
||||
expect(result.hasLaTeX).toBeDefined(); |
||||
expect(typeof result.hasLaTeX).toBe('boolean'); |
||||
// The test doc has LaTeX, so it should be true
|
||||
expect(result.hasLaTeX).toBe(true); |
||||
}); |
||||
|
||||
it('should detect musical notation', () => { |
||||
expect(result.hasMusicalNotation).toBeDefined(); |
||||
expect(typeof result.hasMusicalNotation).toBe('boolean'); |
||||
}); |
||||
|
||||
it('should extract nostr links', () => { |
||||
expect(result.nostrLinks).toBeDefined(); |
||||
expect(Array.isArray(result.nostrLinks)).toBe(true); |
||||
expect(result.nostrLinks.length).toBeGreaterThan(0); |
||||
|
||||
// Check that nostr: addresses are extracted
|
||||
const nostrLink = result.nostrLinks[0]; |
||||
expect(nostrLink).toHaveProperty('type'); |
||||
expect(nostrLink).toHaveProperty('id'); |
||||
expect(nostrLink).toHaveProperty('text'); |
||||
expect(nostrLink).toHaveProperty('bech32'); |
||||
expect(['npub', 'nprofile', 'nevent', 'naddr', 'note']).toContain(nostrLink.type); |
||||
}); |
||||
|
||||
it('should extract wikilinks', () => { |
||||
expect(result.wikilinks).toBeDefined(); |
||||
expect(Array.isArray(result.wikilinks)).toBe(true); |
||||
expect(result.wikilinks.length).toBeGreaterThan(0); |
||||
|
||||
// Check wikilink structure
|
||||
const wikilink = result.wikilinks[0]; |
||||
expect(wikilink).toHaveProperty('dtag'); |
||||
expect(wikilink).toHaveProperty('display'); |
||||
expect(wikilink).toHaveProperty('original'); |
||||
}); |
||||
|
||||
it('should extract hashtags', () => { |
||||
expect(result.hashtags).toBeDefined(); |
||||
expect(Array.isArray(result.hashtags)).toBe(true); |
||||
expect(result.hashtags.length).toBeGreaterThan(0); |
||||
|
||||
// Hashtags should not include the # symbol
|
||||
result.hashtags.forEach((tag: string) => { |
||||
expect(tag).not.toContain('#'); |
||||
}); |
||||
}); |
||||
|
||||
it('should extract regular links', () => { |
||||
expect(result.links).toBeDefined(); |
||||
expect(Array.isArray(result.links)).toBe(true); |
||||
|
||||
if (result.links.length > 0) { |
||||
const link = result.links[0]; |
||||
expect(link).toHaveProperty('url'); |
||||
expect(link).toHaveProperty('text'); |
||||
expect(link).toHaveProperty('isExternal'); |
||||
expect(typeof link.isExternal).toBe('boolean'); |
||||
} |
||||
}); |
||||
|
||||
it('should extract media URLs', () => { |
||||
expect(result.media).toBeDefined(); |
||||
expect(Array.isArray(result.media)).toBe(true); |
||||
}); |
||||
|
||||
it('should process nostr: addresses in HTML', () => { |
||||
// Check that nostr: addresses are converted to links
|
||||
const nostrAddresses = result.nostrLinks; |
||||
expect(nostrAddresses.length).toBeGreaterThan(0); |
||||
|
||||
// Check that HTML contains links for nostr addresses
|
||||
nostrAddresses.forEach((link: any) => { |
||||
expect(result.content).toContain(`data-nostr-type="${link.type}"`); |
||||
expect(result.content).toContain(`data-nostr-id="${link.bech32}"`); |
||||
}); |
||||
}); |
||||
|
||||
it('should process wikilinks in HTML', () => { |
||||
// Check that wikilinks are converted to links
|
||||
const wikilinks = result.wikilinks; |
||||
expect(wikilinks.length).toBeGreaterThan(0); |
||||
|
||||
wikilinks.forEach((wikilink: any) => { |
||||
expect(result.content).toContain(`class="wikilink"`); |
||||
expect(result.content).toContain(`data-dtag="${wikilink.dtag}"`); |
||||
}); |
||||
}); |
||||
|
||||
it('should process hashtags in HTML', () => { |
||||
// Check that hashtags are processed
|
||||
const hashtags = result.hashtags; |
||||
expect(hashtags.length).toBeGreaterThan(0); |
||||
|
||||
hashtags.forEach((tag: string) => { |
||||
expect(result.content).toContain(`data-topic="${tag}"`); |
||||
expect(result.content).toMatch(new RegExp(`class="hashtag"`)); |
||||
}); |
||||
}); |
||||
|
||||
it('should contain expected content sections', () => { |
||||
// Check for some expected content from the test doc
|
||||
expect(result.content).toMatch(/Bullet list|bullet/i); |
||||
expect(result.content).toMatch(/Headers|header/i); |
||||
expect(result.content).toMatch(/Media and Links|media|links/i); |
||||
}); |
||||
|
||||
it('should have empty table of contents for markdown', () => { |
||||
// Markdown doesn't generate TOC by default
|
||||
expect(result.tableOfContents).toBeDefined(); |
||||
expect(typeof result.tableOfContents).toBe('string'); |
||||
}); |
||||
}); |
||||
|
||||
describe('Result structure validation', () => { |
||||
|
||||
it('should return consistent structure for Markdown', async () => { |
||||
const parser = new Parser(); |
||||
const result = await parser.process(markdownContent); |
||||
|
||||
// Check all required fields
|
||||
expect(result).toHaveProperty('content'); |
||||
expect(result).toHaveProperty('tableOfContents'); |
||||
expect(result).toHaveProperty('hasLaTeX'); |
||||
expect(result).toHaveProperty('hasMusicalNotation'); |
||||
expect(result).toHaveProperty('nostrLinks'); |
||||
expect(result).toHaveProperty('wikilinks'); |
||||
expect(result).toHaveProperty('hashtags'); |
||||
expect(result).toHaveProperty('links'); |
||||
expect(result).toHaveProperty('media'); |
||||
}); |
||||
}); |
||||
}); |
||||
@ -1,332 +0,0 @@
@@ -1,332 +0,0 @@
|
||||
import { ContentFormat } from '../types'; |
||||
|
||||
export interface ConvertOptions { |
||||
enableNostrAddresses?: boolean; |
||||
} |
||||
|
||||
/** |
||||
* Converts content to AsciiDoc format based on detected format |
||||
* This is the unified entry point - everything becomes AsciiDoc |
||||
*/ |
||||
export function convertToAsciidoc( |
||||
content: string, |
||||
format: ContentFormat, |
||||
linkBaseURL: string, |
||||
options: ConvertOptions = {} |
||||
): string { |
||||
let asciidoc = ''; |
||||
|
||||
switch (format) { |
||||
case ContentFormat.AsciiDoc: |
||||
// For AsciiDoc content, ensure proper formatting
|
||||
asciidoc = content.replace(/\\n/g, '\n'); |
||||
|
||||
// Ensure headers are on their own lines with proper spacing
|
||||
asciidoc = asciidoc.replace(/(\S[^\n]*)\n(={1,6}\s+[^\n]+)/g, (_match, before, header) => { |
||||
return `${before}\n\n${header}`; |
||||
}); |
||||
break; |
||||
|
||||
case ContentFormat.Wikipedia: |
||||
asciidoc = convertWikipediaToAsciidoc(content); |
||||
break; |
||||
|
||||
case ContentFormat.Markdown: |
||||
asciidoc = convertMarkdownToAsciidoc(content); |
||||
break; |
||||
|
||||
case ContentFormat.Plain: |
||||
default: |
||||
asciidoc = convertPlainTextToAsciidoc(content); |
||||
break; |
||||
} |
||||
|
||||
// Process special elements for all content types
|
||||
// Process wikilinks
|
||||
asciidoc = processWikilinks(asciidoc, linkBaseURL); |
||||
|
||||
// Process nostr: addresses if enabled
|
||||
if (options.enableNostrAddresses !== false) { |
||||
asciidoc = processNostrAddresses(asciidoc, linkBaseURL); |
||||
} |
||||
|
||||
// Process hashtags
|
||||
asciidoc = processHashtags(asciidoc); |
||||
|
||||
return asciidoc; |
||||
} |
||||
|
||||
/** |
||||
* Converts Wikipedia markup to AsciiDoc format |
||||
* Handles Wikipedia-style headings, links, and formatting |
||||
*/ |
||||
function convertWikipediaToAsciidoc(content: string): string { |
||||
let asciidoc = content.replace(/\\n/g, '\n'); |
||||
|
||||
// Convert Wikipedia headings: == Heading == to AsciiDoc == Heading
|
||||
// Wikipedia uses == for level 2, === for level 3, etc.
|
||||
// AsciiDoc uses = for title, == for level 1, === for level 2, etc.
|
||||
// So Wikipedia level 2 (==) maps to AsciiDoc level 1 (==)
|
||||
asciidoc = asciidoc.replace(/^(=+)\s+(.+?)\s+\1$/gm, (match, equals, heading) => { |
||||
const level = equals.length - 1; // Count = signs, subtract 1 for AsciiDoc mapping
|
||||
const asciidocEquals = '='.repeat(level + 1); // AsciiDoc uses one more = for same level
|
||||
return `${asciidocEquals} ${heading.trim()}`; |
||||
}); |
||||
|
||||
// Convert Wikipedia bold: ''text'' to AsciiDoc *text*
|
||||
asciidoc = asciidoc.replace(/''([^']+)''/g, '*$1*'); |
||||
|
||||
// Convert Wikipedia italic: 'text' to AsciiDoc _text_
|
||||
// Be careful not to match apostrophes in words
|
||||
asciidoc = asciidoc.replace(/(^|[^'])'([^']+)'([^']|$)/g, '$1_$2_$3'); |
||||
|
||||
// Convert Wikipedia links: [[Page]] or [[Page|Display]] to wikilinks
|
||||
// These will be processed by processWikilinks later, but we need to ensure
|
||||
// they're in the right format. Wikipedia links are already in [[...]] format
|
||||
// which matches our wikilink format, so they should work as-is.
|
||||
|
||||
// Convert Wikipedia external links: [URL text] to AsciiDoc link:URL[text]
|
||||
asciidoc = asciidoc.replace(/\[(https?:\/\/[^\s\]]+)\s+([^\]]+)\]/g, 'link:$1[$2]'); |
||||
asciidoc = asciidoc.replace(/\[(https?:\/\/[^\s\]]+)\]/g, 'link:$1[$1]'); |
||||
|
||||
// Convert Wikipedia lists (they use * or # similar to Markdown)
|
||||
// This is handled similarly to Markdown, so we can reuse that logic
|
||||
// But Wikipedia also uses : for definition lists and ; for term lists
|
||||
// For now, we'll handle basic lists and let AsciiDoc handle the rest
|
||||
|
||||
// Convert horizontal rules: ---- to AsciiDoc '''
|
||||
asciidoc = asciidoc.replace(/^----+$/gm, "'''"); |
||||
|
||||
return asciidoc; |
||||
} |
||||
|
||||
/** |
||||
* Converts Markdown to AsciiDoc format |
||||
* Based on jumble's conversion patterns |
||||
*/ |
||||
function convertMarkdownToAsciidoc(content: string): string { |
||||
let asciidoc = content.replace(/\\n/g, '\n'); |
||||
|
||||
// Fix spacing issues
|
||||
asciidoc = asciidoc.replace(/`([^`\n]+)`\s*\(([^)]+)\)/g, '`$1` ($2)'); |
||||
asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`([a-zA-Z0-9])/g, '$1 `$2` $3'); |
||||
asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`\s*\(/g, '$1 `$2` ('); |
||||
asciidoc = asciidoc.replace(/\)`([^`\n]+)`([a-zA-Z0-9])/g, ') `$1` $2'); |
||||
asciidoc = asciidoc.replace(/([a-zA-Z0-9])\)([a-zA-Z0-9])/g, '$1) $2'); |
||||
asciidoc = asciidoc.replace(/([a-zA-Z0-9])==/g, '$1 =='); |
||||
|
||||
// Note: nostr: addresses are processed later in processNostrAddresses
|
||||
|
||||
// Convert headers
|
||||
asciidoc = asciidoc.replace(/^#{6}\s+(.+)$/gm, '====== $1 ======'); |
||||
asciidoc = asciidoc.replace(/^#{5}\s+(.+)$/gm, '===== $1 ====='); |
||||
asciidoc = asciidoc.replace(/^#{4}\s+(.+)$/gm, '==== $1 ===='); |
||||
asciidoc = asciidoc.replace(/^#{3}\s+(.+)$/gm, '=== $1 ==='); |
||||
asciidoc = asciidoc.replace(/^#{2}\s+(.+)$/gm, '== $1 =='); |
||||
asciidoc = asciidoc.replace(/^#{1}\s+(.+)$/gm, '= $1 ='); |
||||
asciidoc = asciidoc.replace(/^==\s+(.+?)\s+==$/gm, '== $1 =='); |
||||
asciidoc = asciidoc.replace(/\s==\s+([^=]+?)\s+==\s/g, ' == $1 == '); |
||||
|
||||
// Convert emphasis
|
||||
asciidoc = asciidoc.replace(/\*\*(.+?)\*\*/g, '*$1*'); // Bold
|
||||
asciidoc = asciidoc.replace(/__(.+?)__/g, '*$1*'); // Bold
|
||||
asciidoc = asciidoc.replace(/\*(.+?)\*/g, '_$1_'); // Italic
|
||||
asciidoc = asciidoc.replace(/_(.+?)_/g, '_$1_'); // Italic
|
||||
asciidoc = asciidoc.replace(/~~(.+?)~~/g, '[line-through]#$1#'); // Strikethrough
|
||||
asciidoc = asciidoc.replace(/~(.+?)~/g, '[subscript]#$1#'); // Subscript
|
||||
asciidoc = asciidoc.replace(/\^(.+?)\^/g, '[superscript]#$1#'); // Superscript
|
||||
|
||||
// Convert code blocks (handle both \n and \r\n line endings)
|
||||
asciidoc = asciidoc.replace(/```(\w+)?\r?\n([\s\S]*?)\r?\n```/g, (_match, lang, code) => { |
||||
const trimmedCode = code.trim(); |
||||
if (trimmedCode.length === 0) return ''; |
||||
|
||||
const hasCodePatterns = /[{}();=<>]|function|class|import|export|def |if |for |while |return |const |let |var |public |private |static |console\.log/.test(trimmedCode); |
||||
const isLikelyText = /^[A-Za-z\s.,!?\-'"]+$/.test(trimmedCode) && trimmedCode.length > 50; |
||||
const hasTooManySpaces = (trimmedCode.match(/\s{3,}/g) || []).length > 3; |
||||
const hasMarkdownPatterns = /^#{1,6}\s|^\*\s|^\d+\.\s|^\>\s|^\|.*\|/.test(trimmedCode); |
||||
|
||||
if ((!hasCodePatterns && trimmedCode.length > 100) || isLikelyText || hasTooManySpaces || hasMarkdownPatterns) { |
||||
return _match; |
||||
} |
||||
|
||||
return `[source${lang ? ',' + lang : ''}]\n----\n${trimmedCode}\n----`; |
||||
}); |
||||
asciidoc = asciidoc.replace(/`([^`]+)`/g, '`$1`'); // Inline code
|
||||
asciidoc = asciidoc.replace(/`\$([^$]+)\$`/g, '`$\\$1\\$$`'); // Preserve LaTeX in code
|
||||
|
||||
// Convert images
|
||||
asciidoc = asciidoc.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, 'image::$2[$1,width=100%]'); |
||||
asciidoc = asciidoc.replace(/image::([^\[]+)\[([^\]]+),width=100%\]/g, 'image::$1[$2,width=100%]'); |
||||
|
||||
// Convert links
|
||||
asciidoc = asciidoc.replace(/\[([^\]]+)\]\(([^)]+)\)/g, 'link:$2[$1]'); |
||||
|
||||
// Convert horizontal rules
|
||||
asciidoc = asciidoc.replace(/^---$/gm, '\'\'\''); |
||||
|
||||
// Convert unordered lists
|
||||
asciidoc = asciidoc.replace(/^(\s*)\*\s+(.+)$/gm, '$1* $2'); |
||||
asciidoc = asciidoc.replace(/^(\s*)-\s+(.+)$/gm, '$1* $2'); |
||||
asciidoc = asciidoc.replace(/^(\s*)\+\s+(.+)$/gm, '$1* $2'); |
||||
|
||||
// Convert ordered lists
|
||||
asciidoc = asciidoc.replace(/^(\s*)\d+\.\s+(.+)$/gm, '$1. $2'); |
||||
|
||||
// Convert blockquotes with attribution
|
||||
asciidoc = asciidoc.replace(/^(>\s+.+(?:\n>\s+.+)*)/gm, (match) => { |
||||
const lines = match.split('\n').map(line => line.replace(/^>\s*/, '')); |
||||
|
||||
let quoteBodyLines: string[] = []; |
||||
let attributionLine: string | undefined; |
||||
|
||||
for (let i = lines.length - 1; i >= 0; i--) { |
||||
const line = lines[i].trim(); |
||||
if (line.startsWith('—') || line.startsWith('--')) { |
||||
attributionLine = line; |
||||
quoteBodyLines = lines.slice(0, i); |
||||
break; |
||||
} |
||||
} |
||||
|
||||
const quoteContent = quoteBodyLines.filter(l => l.trim() !== '').join('\n').trim(); |
||||
|
||||
if (attributionLine) { |
||||
let cleanedAttribution = attributionLine.replace(/^[—-]+/, '').trim(); |
||||
|
||||
let author = ''; |
||||
let source = ''; |
||||
|
||||
const linkMatch = cleanedAttribution.match(/^(.*?),?\s*link:([^[\\]]+)\[([^\\]]+)\]$/); |
||||
|
||||
if (linkMatch) { |
||||
author = linkMatch[1].trim(); |
||||
source = `link:${linkMatch[2].trim()}[${linkMatch[3].trim()}]`; |
||||
} else { |
||||
const parts = cleanedAttribution.split(',').map(p => p.trim()); |
||||
author = parts[0]; |
||||
if (parts.length > 1) { |
||||
source = parts.slice(1).join(', ').trim(); |
||||
} |
||||
} |
||||
|
||||
return `[quote, ${author}, ${source}]\n____\n${quoteContent}\n____`; |
||||
} else { |
||||
return `____\n${quoteContent}\n____`; |
||||
} |
||||
}); |
||||
|
||||
// Convert tables
|
||||
asciidoc = asciidoc.replace(/(\|.*\|[\r\n]+\|[\s\-\|]*[\r\n]+(\|.*\|[\r\n]+)*)/g, (match) => { |
||||
const lines = match.trim().split('\n').filter(line => line.trim()); |
||||
if (lines.length < 2) return match; |
||||
|
||||
const headerRow = lines[0]; |
||||
const separatorRow = lines[1]; |
||||
const dataRows = lines.slice(2); |
||||
|
||||
if (!separatorRow.includes('-')) return match; |
||||
|
||||
let tableAsciidoc = '[cols="1,1"]\n|===\n'; |
||||
tableAsciidoc += headerRow + '\n'; |
||||
dataRows.forEach(row => { |
||||
tableAsciidoc += row + '\n'; |
||||
}); |
||||
tableAsciidoc += '|==='; |
||||
|
||||
return tableAsciidoc; |
||||
}); |
||||
|
||||
// Convert footnotes
|
||||
const footnoteDefinitions: { [id: string]: string } = {}; |
||||
let tempAsciidoc = asciidoc; |
||||
|
||||
tempAsciidoc = tempAsciidoc.replace(/^\[\^([^\]]+)\]:\s*([\s\S]*?)(?=\n\[\^|\n---|\n##|\n###|\n####|\n#####|\n######|$)/gm, (_, id, text) => { |
||||
footnoteDefinitions[id] = text.trim(); |
||||
return ''; |
||||
}); |
||||
|
||||
asciidoc = tempAsciidoc.replace(/\[\^([^\]]+)\]/g, (match, id) => { |
||||
if (footnoteDefinitions[id]) { |
||||
return `footnote:[${footnoteDefinitions[id]}]`; |
||||
} |
||||
return match; |
||||
}); |
||||
|
||||
return asciidoc; |
||||
} |
||||
|
||||
/** |
||||
* Converts plain text to AsciiDoc format |
||||
* Preserves line breaks by converting single newlines to line continuations |
||||
*/ |
||||
function convertPlainTextToAsciidoc(content: string): string { |
||||
// Preserve double newlines (paragraph breaks)
|
||||
// Convert single newlines to line continuations ( +\n)
|
||||
return content |
||||
.replace(/\r\n/g, '\n') // Normalize line endings
|
||||
.replace(/\n\n+/g, '\n\n') // Normalize multiple newlines to double
|
||||
.replace(/([^\n])\n([^\n])/g, '$1 +\n$2'); // Single newlines become line continuations
|
||||
} |
||||
|
||||
/** |
||||
* Normalizes text to d-tag format |
||||
*/ |
||||
function normalizeDtag(text: string): string { |
||||
return text |
||||
.toLowerCase() |
||||
.replace(/[^a-z0-9]+/g, '-') |
||||
.replace(/^-+|-+$/g, ''); |
||||
} |
||||
|
||||
/** |
||||
* Processes wikilinks: [[target]] or [[target|display text]] |
||||
* Converts to WIKILINK: placeholder format to protect from AsciiDoc processing |
||||
*/ |
||||
function processWikilinks(content: string, linkBaseURL: string): string { |
||||
// Process bookstr macro wikilinks: [[book::...]]
|
||||
content = content.replace(/\[\[book::([^\]]+)\]\]/g, (_match, bookContent) => { |
||||
const cleanContent = bookContent.trim(); |
||||
return `BOOKSTR:${cleanContent}`; |
||||
}); |
||||
|
||||
// Process standard wikilinks: [[Target Page]] or [[target page|see this]]
|
||||
// Use placeholder format to prevent AsciiDoc from processing the brackets
|
||||
content = content.replace(/\[\[([^|\]]+)(?:\|([^\]]+))?\]\]/g, (_match, target, displayText) => { |
||||
const cleanTarget = target.trim(); |
||||
const cleanDisplay = displayText ? displayText.trim() : cleanTarget; |
||||
const dTag = normalizeDtag(cleanTarget); |
||||
|
||||
// Use placeholder format: WIKILINK:dtag|display
|
||||
// This prevents AsciiDoc from interpreting the brackets
|
||||
return `WIKILINK:${dTag}|${cleanDisplay}`; |
||||
}); |
||||
|
||||
return content; |
||||
} |
||||
|
||||
/** |
||||
* Processes nostr: addresses |
||||
* Converts to link:nostr:...[...] format |
||||
* Valid bech32 prefixes: npub, nprofile, nevent, naddr, note |
||||
*/ |
||||
function processNostrAddresses(content: string, linkBaseURL: string): string { |
||||
// Match nostr: followed by valid bech32 prefix and identifier
|
||||
// Bech32 format: prefix + separator (1) + data (at least 6 chars for valid identifiers)
|
||||
const nostrPattern = /nostr:((?:npub|nprofile|nevent|naddr|note)1[a-z0-9]{6,})/gi; |
||||
return content.replace(nostrPattern, (_match, bech32Id) => { |
||||
return `link:nostr:${bech32Id}[${bech32Id}]`; |
||||
}); |
||||
} |
||||
|
||||
/** |
||||
* Processes hashtags |
||||
* Converts to hashtag:tag[#tag] format |
||||
*/ |
||||
function processHashtags(content: string): string { |
||||
// Match # followed by word characters, avoiding those in URLs, code blocks, etc.
|
||||
return content.replace(/\B#([a-zA-Z0-9_]+)/g, (_match, hashtag) => { |
||||
const normalizedHashtag = hashtag.toLowerCase(); |
||||
return `hashtag:${normalizedHashtag}[#${hashtag}]`; |
||||
}); |
||||
} |
||||
@ -1,274 +0,0 @@
@@ -1,274 +0,0 @@
|
||||
import { NostrLink, Wikilink } from '../types'; |
||||
|
||||
export interface ExtractedMetadata { |
||||
nostrLinks: NostrLink[]; |
||||
wikilinks: Wikilink[]; |
||||
hashtags: string[]; |
||||
links: Array<{ url: string; text: string; isExternal: boolean }>; |
||||
media: string[]; |
||||
} |
||||
|
||||
/** |
||||
* Extracts metadata from content before processing |
||||
*/ |
||||
export function extractMetadata(content: string, linkBaseURL: string): ExtractedMetadata { |
||||
return { |
||||
nostrLinks: extractNostrLinks(content), |
||||
wikilinks: extractWikilinks(content), |
||||
hashtags: extractHashtags(content), |
||||
links: extractLinks(content, linkBaseURL), |
||||
media: extractMedia(content), |
||||
}; |
||||
} |
||||
|
||||
/** |
||||
* Extract Nostr links from content |
||||
*/ |
||||
function extractNostrLinks(content: string): NostrLink[] { |
||||
const nostrLinks: NostrLink[] = []; |
||||
const seen = new Set<string>(); |
||||
|
||||
// Extract nostr: prefixed links (valid bech32 format)
|
||||
const nostrMatches = content.match(/nostr:((?:npub|nprofile|nevent|naddr|note)1[a-z0-9]{6,})/gi) || []; |
||||
nostrMatches.forEach(match => { |
||||
const id = match.substring(6); // Remove 'nostr:'
|
||||
const type = getNostrType(id); |
||||
if (type && !seen.has(id)) { |
||||
seen.add(id); |
||||
nostrLinks.push({ |
||||
type, |
||||
id, |
||||
text: match, |
||||
bech32: id, |
||||
}); |
||||
} |
||||
}); |
||||
|
||||
return nostrLinks; |
||||
} |
||||
|
||||
/** |
||||
* Extract wikilinks from content |
||||
*/ |
||||
function extractWikilinks(content: string): Wikilink[] { |
||||
const wikilinks: Wikilink[] = []; |
||||
const seen = new Set<string>(); |
||||
|
||||
// Match [[target]] or [[target|display]]
|
||||
const wikilinkPattern = /\[\[([^|\]]+)(?:\|([^\]]+))?\]\]/g; |
||||
let match; |
||||
|
||||
while ((match = wikilinkPattern.exec(content)) !== null) { |
||||
const target = match[1].trim(); |
||||
const display = match[2] ? match[2].trim() : target; |
||||
const dtag = normalizeDtag(target); |
||||
const key = `${dtag}|${display}`; |
||||
|
||||
if (!seen.has(key)) { |
||||
seen.add(key); |
||||
wikilinks.push({ |
||||
dtag, |
||||
display, |
||||
original: match[0], |
||||
}); |
||||
} |
||||
} |
||||
|
||||
return wikilinks; |
||||
} |
||||
|
||||
/** |
||||
* Extract hashtags from content |
||||
* Excludes hashtags in URLs, code blocks, and inline code |
||||
*/ |
||||
function extractHashtags(content: string): string[] { |
||||
const hashtags: string[] = []; |
||||
const seen = new Set<string>(); |
||||
|
||||
// Remove code blocks first to avoid matching inside them
|
||||
const codeBlockPattern = /```[\s\S]*?```/g; |
||||
const inlineCodePattern = /`[^`]+`/g; |
||||
const urlPattern = /https?:\/\/[^\s<>"']+/g; |
||||
|
||||
let processedContent = content |
||||
.replace(codeBlockPattern, '') // Remove code blocks
|
||||
.replace(inlineCodePattern, '') // Remove inline code
|
||||
.replace(urlPattern, ''); // Remove URLs
|
||||
|
||||
// Extract hashtags: #hashtag (word boundary to avoid matching in URLs)
|
||||
const hashtagPattern = /\B#([a-zA-Z0-9_]+)/g; |
||||
let match; |
||||
|
||||
while ((match = hashtagPattern.exec(processedContent)) !== null) { |
||||
const tag = match[1].toLowerCase(); |
||||
if (!seen.has(tag)) { |
||||
hashtags.push(tag); |
||||
seen.add(tag); |
||||
} |
||||
} |
||||
|
||||
return hashtags; |
||||
} |
||||
|
||||
/** |
||||
* Extract regular links from content |
||||
*/ |
||||
function extractLinks(content: string, linkBaseURL: string): Array<{ url: string; text: string; isExternal: boolean }> { |
||||
const links: Array<{ url: string; text: string; isExternal: boolean }> = []; |
||||
const seen = new Set<string>(); |
||||
|
||||
// Extract markdown links: [text](url) - optimized to avoid double matching
|
||||
const markdownLinkPattern = /\[([^\]]+)\]\(([^)]+)\)/g; |
||||
let markdownMatch; |
||||
while ((markdownMatch = markdownLinkPattern.exec(content)) !== null) { |
||||
const [, text, url] = markdownMatch; |
||||
if (!seen.has(url) && !isNostrUrl(url)) { |
||||
seen.add(url); |
||||
links.push({ |
||||
url, |
||||
text, |
||||
isExternal: isExternalUrl(url, linkBaseURL), |
||||
}); |
||||
} |
||||
} |
||||
|
||||
// Extract asciidoc links: link:url[text] - optimized to avoid double matching
|
||||
const asciidocLinkPattern = /link:([^\[]+)\[([^\]]+)\]/g; |
||||
let asciidocMatch; |
||||
while ((asciidocMatch = asciidocLinkPattern.exec(content)) !== null) { |
||||
const [, url, text] = asciidocMatch; |
||||
if (!seen.has(url) && !isNostrUrl(url)) { |
||||
seen.add(url); |
||||
links.push({ |
||||
url, |
||||
text, |
||||
isExternal: isExternalUrl(url, linkBaseURL), |
||||
}); |
||||
} |
||||
} |
||||
|
||||
// Extract raw URLs (basic pattern)
|
||||
const urlPattern = /https?:\/\/[^\s<>"']+/g; |
||||
const rawUrls = content.match(urlPattern) || []; |
||||
rawUrls.forEach(url => { |
||||
if (!seen.has(url) && !isNostrUrl(url)) { |
||||
seen.add(url); |
||||
links.push({ |
||||
url, |
||||
text: url, |
||||
isExternal: isExternalUrl(url, linkBaseURL), |
||||
}); |
||||
} |
||||
}); |
||||
|
||||
return links; |
||||
} |
||||
|
||||
/** |
||||
* Extract media URLs from content |
||||
*/ |
||||
function extractMedia(content: string): string[] { |
||||
const media: string[] = []; |
||||
const seen = new Set<string>(); |
||||
|
||||
// Extract markdown images:  - optimized to avoid double matching
|
||||
const markdownImagePattern = /!\[[^\]]*\]\(([^)]+)\)/g; |
||||
let markdownImageMatch; |
||||
while ((markdownImageMatch = markdownImagePattern.exec(content)) !== null) { |
||||
const url = markdownImageMatch[1]; |
||||
if (url && !seen.has(url)) { |
||||
if (isImageUrl(url) || isVideoUrl(url)) { |
||||
media.push(url); |
||||
seen.add(url); |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Extract asciidoc images: image::url[alt] - optimized to avoid double matching
|
||||
const asciidocImagePattern = /image::([^\[]+)\[/g; |
||||
let asciidocImageMatch; |
||||
while ((asciidocImageMatch = asciidocImagePattern.exec(content)) !== null) { |
||||
const url = asciidocImageMatch[1]; |
||||
if (url && !seen.has(url)) { |
||||
if (isImageUrl(url) || isVideoUrl(url)) { |
||||
media.push(url); |
||||
seen.add(url); |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Extract raw image/video URLs
|
||||
const urlPattern = /https?:\/\/[^\s<>"']+/g; |
||||
const rawUrls = content.match(urlPattern) || []; |
||||
rawUrls.forEach(url => { |
||||
if (!seen.has(url) && (isImageUrl(url) || isVideoUrl(url))) { |
||||
media.push(url); |
||||
seen.add(url); |
||||
} |
||||
}); |
||||
|
||||
return media; |
||||
} |
||||
|
||||
/** |
||||
* Get Nostr identifier type |
||||
*/ |
||||
function getNostrType(id: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'note' | null { |
||||
if (id.startsWith('npub')) return 'npub'; |
||||
if (id.startsWith('nprofile')) return 'nprofile'; |
||||
if (id.startsWith('nevent')) return 'nevent'; |
||||
if (id.startsWith('naddr')) return 'naddr'; |
||||
if (id.startsWith('note')) return 'note'; |
||||
return null; |
||||
} |
||||
|
||||
/** |
||||
* Normalize text to d-tag format |
||||
*/ |
||||
function normalizeDtag(text: string): string { |
||||
return text |
||||
.toLowerCase() |
||||
.replace(/[^a-z0-9]+/g, '-') |
||||
.replace(/^-+|-+$/g, ''); |
||||
} |
||||
|
||||
/** |
||||
* Check if URL is external |
||||
*/ |
||||
function isExternalUrl(url: string, linkBaseURL: string): boolean { |
||||
if (!linkBaseURL) return true; |
||||
try { |
||||
// Use a simple string-based check for Node.js compatibility
|
||||
// Extract hostname from URL string
|
||||
const urlMatch = url.match(/^https?:\/\/([^\/]+)/); |
||||
const baseMatch = linkBaseURL.match(/^https?:\/\/([^\/]+)/); |
||||
|
||||
if (urlMatch && baseMatch) { |
||||
return urlMatch[1] !== baseMatch[1]; |
||||
} |
||||
return true; |
||||
} catch { |
||||
return true; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Check if URL is a Nostr URL |
||||
*/ |
||||
function isNostrUrl(url: string): boolean { |
||||
return url.startsWith('nostr:') || getNostrType(url) !== null; |
||||
} |
||||
|
||||
/** |
||||
* Check if URL is an image |
||||
*/ |
||||
function isImageUrl(url: string): boolean { |
||||
return /\.(jpeg|jpg|png|gif|webp|svg)$/i.test(url); |
||||
} |
||||
|
||||
/** |
||||
* Check if URL is a video |
||||
*/ |
||||
function isVideoUrl(url: string): boolean { |
||||
return /\.(mp4|webm|ogg)$/i.test(url); |
||||
} |
||||
@ -1,3 +1,3 @@
@@ -1,3 +1,3 @@
|
||||
export * from './parser'; |
||||
export * from './types'; |
||||
export * from './detector'; |
||||
export * from './detector'; |
||||
|
||||
@ -0,0 +1,562 @@
@@ -0,0 +1,562 @@
|
||||
import { ParserOptions, NostrLink, Wikilink } from './types'; |
||||
|
||||
/** |
||||
* Extract and process wikilinks, hashtags, and nostr: addresses from HTML |
||||
*/ |
||||
export interface PostProcessResult { |
||||
html: string; |
||||
nostrLinks: NostrLink[]; |
||||
wikilinks: Wikilink[]; |
||||
hashtags: string[]; |
||||
} |
||||
|
||||
/** |
||||
* Post-process HTML to convert wikilinks, hashtags, and nostr: addresses |
||||
* @param skipWikilinksAndHashtags - If true, skip processing wikilinks and hashtags (already processed) |
||||
*/ |
||||
export function postProcess(html: string, options: ParserOptions, skipWikilinksAndHashtags: boolean = false): PostProcessResult { |
||||
let processed = html; |
||||
const nostrLinks: NostrLink[] = []; |
||||
const wikilinks: Wikilink[] = []; |
||||
const hashtags: string[] = []; |
||||
|
||||
// First, mark code blocks to avoid processing inside them
|
||||
const codeBlockMarkers: Array<{ start: number; end: number }> = []; |
||||
const codeBlockRegex = /<(pre|code)[^>]*>[\s\S]*?<\/\1>/gi; |
||||
let match; |
||||
while ((match = codeBlockRegex.exec(html)) !== null) { |
||||
codeBlockMarkers.push({ start: match.index, end: match.index + match[0].length }); |
||||
} |
||||
|
||||
function isInCodeBlock(index: number): boolean { |
||||
return codeBlockMarkers.some(marker => index >= marker.start && index < marker.end); |
||||
} |
||||
|
||||
// Process nostr: addresses (but not in code blocks)
|
||||
if (options.enableNostrAddresses !== false) { |
||||
const nostrRegex = /nostr:([np][a-z0-9]+1[a-z0-9]+)/gi; |
||||
const replacements: Array<{ match: string; replacement: string; index: number }> = []; |
||||
|
||||
while ((match = nostrRegex.exec(processed)) !== null) { |
||||
if (isInCodeBlock(match.index)) continue; |
||||
|
||||
const bech32 = match[1]; |
||||
const type = getNostrType(bech32); |
||||
if (!type) continue; |
||||
|
||||
const link: NostrLink = { |
||||
type, |
||||
id: bech32, |
||||
text: match[0], |
||||
bech32: bech32 |
||||
}; |
||||
nostrLinks.push(link); |
||||
|
||||
const url = options.linkBaseURL
|
||||
? `${options.linkBaseURL}/nostr/${bech32}` |
||||
: `#nostr-${bech32}`; |
||||
|
||||
replacements.push({ |
||||
match: match[0], |
||||
replacement: `<a href="${escapeHtml(url)}" class="nostr-link" data-nostr-type="${type}" data-nostr-id="${escapeHtml(bech32)}">${escapeHtml(match[0])}</a>`, |
||||
index: match.index |
||||
}); |
||||
} |
||||
|
||||
// Apply replacements in reverse order to preserve indices
|
||||
replacements.reverse().forEach(({ match, replacement, index }) => { |
||||
processed = processed.substring(0, index) + replacement + processed.substring(index + match.length); |
||||
}); |
||||
} |
||||
|
||||
// Process wikilinks: [[dtag]] or [[dtag|display]] (but not in code blocks)
|
||||
// Skip if already processed (for AsciiDoc)
|
||||
if (!skipWikilinksAndHashtags) { |
||||
const wikilinkRegex = /\[\[([^\]]+)\]\]/g; |
||||
const wikilinkReplacements: Array<{ match: string; replacement: string; index: number }> = []; |
||||
|
||||
while ((match = wikilinkRegex.exec(processed)) !== null) { |
||||
if (isInCodeBlock(match.index)) continue; |
||||
|
||||
// Skip if already inside a link tag
|
||||
const beforeMatch = processed.substring(0, match.index); |
||||
const lastOpenTag = beforeMatch.lastIndexOf('<a'); |
||||
const lastCloseTag = beforeMatch.lastIndexOf('</a>'); |
||||
if (lastOpenTag > lastCloseTag) continue; // Inside a link
|
||||
|
||||
const content = match[1]; |
||||
const parts = content.split('|'); |
||||
const dtag = parts[0].trim(); |
||||
const display = parts.length > 1 ? parts.slice(1).join('|').trim() : dtag; |
||||
|
||||
const wikilink: Wikilink = { |
||||
dtag, |
||||
display, |
||||
original: match[0] |
||||
}; |
||||
wikilinks.push(wikilink); |
||||
|
||||
let url: string; |
||||
if (typeof options.wikilinkUrl === 'function') { |
||||
url = options.wikilinkUrl(dtag); |
||||
} else if (typeof options.wikilinkUrl === 'string') { |
||||
url = options.wikilinkUrl.replace('{dtag}', encodeURIComponent(dtag)); |
||||
} else { |
||||
url = options.linkBaseURL
|
||||
? `${options.linkBaseURL}/events?d=${encodeURIComponent(dtag)}` |
||||
: `#${encodeURIComponent(dtag)}`; |
||||
} |
||||
|
||||
wikilinkReplacements.push({ |
||||
match: match[0], |
||||
replacement: `<a href="${escapeHtml(url)}" class="wikilink" data-dtag="${escapeHtml(dtag)}">${escapeHtml(display)}</a>`, |
||||
index: match.index |
||||
}); |
||||
} |
||||
|
||||
// Apply wikilink replacements in reverse order
|
||||
wikilinkReplacements.reverse().forEach(({ match, replacement, index }) => { |
||||
processed = processed.substring(0, index) + replacement + processed.substring(index + match.length); |
||||
}); |
||||
|
||||
// Process hashtags: #hashtag (but not in code blocks or inside HTML tags)
|
||||
// Match hashtag at start of string, after whitespace, after >, or immediately after opening tags
|
||||
const hashtagRegex = /(#[\w-]+)/g; |
||||
const hashtagReplacements: Array<{ match: string; replacement: string; index: number }> = []; |
||||
|
||||
while ((match = hashtagRegex.exec(processed)) !== null) { |
||||
if (isInCodeBlock(match.index)) continue; |
||||
|
||||
// Check if we're inside an HTML tag
|
||||
const beforeMatch = processed.substring(0, match.index); |
||||
const lastOpenTag = beforeMatch.lastIndexOf('<'); |
||||
const lastCloseTag = beforeMatch.lastIndexOf('>'); |
||||
if (lastOpenTag > lastCloseTag) continue; // Inside a tag
|
||||
|
||||
// Skip if already inside a link or span
|
||||
const lastLinkOpen = beforeMatch.lastIndexOf('<a'); |
||||
const lastLinkClose = beforeMatch.lastIndexOf('</a>'); |
||||
const lastSpanOpen = beforeMatch.lastIndexOf('<span'); |
||||
const lastSpanClose = beforeMatch.lastIndexOf('</span>'); |
||||
if (lastLinkOpen > lastLinkClose || lastSpanOpen > lastSpanClose) continue; |
||||
|
||||
// Check what's before the hashtag
|
||||
const charBefore = match.index > 0 ? processed[match.index - 1] : ''; |
||||
const beforeHashtag = processed.substring(Math.max(0, match.index - 100), match.index); |
||||
const lastTagClose = beforeHashtag.lastIndexOf('>'); |
||||
const textAfterTag = beforeHashtag.substring(lastTagClose + 1); |
||||
|
||||
// Hashtag is valid if:
|
||||
// 1. At start of string
|
||||
// 2. Preceded by whitespace
|
||||
// 3. Preceded by >
|
||||
// 4. Immediately after opening tag (like <p>#hashtag)
|
||||
const isValidPosition =
|
||||
match.index === 0 || |
||||
/\s/.test(charBefore) || |
||||
charBefore === '>' || |
||||
(lastTagClose >= 0 && /^[\s\n]*$/.test(textAfterTag)); |
||||
|
||||
if (!isValidPosition) continue; |
||||
|
||||
const hashtag = match[1]; |
||||
const topic = hashtag.substring(1); |
||||
const prefix = (match.index === 0 || charBefore === '>' || (lastTagClose >= 0 && /^[\s\n]*$/.test(textAfterTag)))
|
||||
? ''
|
||||
: charBefore; |
||||
|
||||
if (!hashtags.includes(topic)) { |
||||
hashtags.push(topic); |
||||
} |
||||
|
||||
let url: string | undefined; |
||||
if (typeof options.hashtagUrl === 'function') { |
||||
url = options.hashtagUrl(topic); |
||||
} else if (typeof options.hashtagUrl === 'string') { |
||||
url = options.hashtagUrl.replace('{topic}', encodeURIComponent(topic)); |
||||
} |
||||
|
||||
const replacement = url |
||||
? `${prefix}<a href="${escapeHtml(url)}" class="hashtag" data-topic="${escapeHtml(topic)}">${escapeHtml(hashtag)}</a>` |
||||
: `${prefix}<span class="hashtag" data-topic="${escapeHtml(topic)}">${escapeHtml(hashtag)}</span>`; |
||||
|
||||
hashtagReplacements.push({ |
||||
match: match[0], |
||||
replacement, |
||||
index: match.index |
||||
}); |
||||
} |
||||
|
||||
// Apply hashtag replacements in reverse order
|
||||
hashtagReplacements.reverse().forEach(({ match, replacement, index }) => { |
||||
processed = processed.substring(0, index) + replacement + processed.substring(index + match.length); |
||||
}); |
||||
} |
||||
|
||||
// Extract wikilinks and hashtags from already-processed HTML (for AsciiDoc)
|
||||
if (skipWikilinksAndHashtags) { |
||||
// Extract wikilinks from existing links
|
||||
const wikilinkLinkRegex = /<a[^>]+class="wikilink"[^>]+data-dtag="([^"]+)"[^>]*>([^<]+)<\/a>/g; |
||||
while ((match = wikilinkLinkRegex.exec(processed)) !== null) { |
||||
wikilinks.push({ |
||||
dtag: match[1], |
||||
display: match[2], |
||||
original: match[0] |
||||
}); |
||||
} |
||||
|
||||
// Extract hashtags from existing spans/links
|
||||
const hashtagRegex = /<(?:a|span)[^>]+class="hashtag"[^>]+data-topic="([^"]+)"[^>]*>#\1<\/\w+>/g; |
||||
while ((match = hashtagRegex.exec(processed)) !== null) { |
||||
const topic = match[1]; |
||||
if (!hashtags.includes(topic)) { |
||||
hashtags.push(topic); |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Remove links inside code blocks (both <code> and <pre> tags)
|
||||
// This ensures URLs in code blocks remain as plain text
|
||||
const codeBlockLinkRegex = /(<(?:code|pre)[^>]*>)([\s\S]*?)(<\/(?:code|pre)>)/gi; |
||||
processed = processed.replace(codeBlockLinkRegex, (match, openTag, content, closeTag) => { |
||||
// Remove all <a> tags inside code blocks, keeping only the text content
|
||||
const cleanedContent = content.replace(/<a[^>]*>(.*?)<\/a>/gi, '$1'); |
||||
return openTag + cleanedContent + closeTag; |
||||
}); |
||||
|
||||
// Process YouTube URLs - ORDER IS CRITICAL to avoid double-parsing
|
||||
// 1. FIRST: Fix video tags that contain YouTube URLs (before they get processed as bare URLs)
|
||||
// AsciiDoc's video:: macro creates <video> tags, but YouTube URLs should be iframes
|
||||
const youtubeVideoTagRegex = /<video[^>]+src="(https?:\/\/(?:www\.)?(?:youtube\.com\/(?:watch\?v=|shorts\/)|youtu\.be\/)([a-zA-Z0-9_-]+))"[^>]*>[\s\S]*?<\/video>/gi; |
||||
processed = processed.replace(youtubeVideoTagRegex, (match, url, videoId) => { |
||||
const embedUrl = `https://www.youtube.com/embed/${videoId}?enablejsapi=1`; |
||||
return `<iframe class="youtube-embed" frameborder="0" allow="encrypted-media; fullscreen; picture-in-picture; web-share" referrerpolicy='strict-origin-when-cross-origin' width="100%" height="360" src="${escapeHtml(embedUrl)}"></iframe>`; |
||||
}); |
||||
|
||||
// 2. SECOND: Process YouTube links in <a> tags
|
||||
// IMPORTANT: Be very specific with YouTube regex to avoid matching Spotify URLs
|
||||
const youtubeLinkRegex = /<a[^>]+href="(https?:\/\/(?:www\.)?(?:youtube\.com\/(?:watch\?v=|shorts\/)|youtu\.be\/)([a-zA-Z0-9_-]+))"[^>]*>.*?<\/a>/gi; |
||||
processed = processed.replace(youtubeLinkRegex, (match, url, videoId) => { |
||||
if (isInCodeBlock(processed.indexOf(match))) return match; |
||||
const embedUrl = `https://www.youtube.com/embed/${videoId}?enablejsapi=1`; |
||||
return `<iframe class="youtube-embed" frameborder="0" allow="encrypted-media; fullscreen; picture-in-picture; web-share" referrerpolicy='strict-origin-when-cross-origin' width="100%" height="360" src="${escapeHtml(embedUrl)}"></iframe>`; |
||||
}); |
||||
|
||||
// 3. THIRD: Fix malformed YouTube iframes from AsciiDoc video:: macro
|
||||
// AsciiDoc sometimes creates iframes with malformed YouTube URLs (watch?v= or shorts/ instead of embed/)
|
||||
// Match the entire iframe element including closing tag to avoid duplicates
|
||||
const malformedYoutubeIframeRegex = /<iframe[^>]+src="[^"]*youtube[^"]*(?:watch\?v=|shorts\/)([a-zA-Z0-9_-]+)[^"]*"[^>]*(?:\/>|>[\s\S]*?<\/iframe>)/gi; |
||||
processed = processed.replace(malformedYoutubeIframeRegex, (match, videoId) => { |
||||
const embedUrl = `https://www.youtube.com/embed/${videoId}?enablejsapi=1`; |
||||
return `<iframe class="youtube-embed" frameborder="0" allow="encrypted-media; fullscreen; picture-in-picture; web-share" referrerpolicy='strict-origin-when-cross-origin' width="100%" height="360" src="${escapeHtml(embedUrl)}"></iframe>`; |
||||
}); |
||||
|
||||
// 3.5: Fix YouTube iframes with embed URLs but wrong parameters or missing required attributes
|
||||
// AsciiDoc's video:: macro creates iframes with ?rel=0 or missing allow/referrerpolicy attributes
|
||||
// Match iframes with embed URLs that don't have enablejsapi=1 or are missing required attributes
|
||||
const incompleteYoutubeIframeRegex = /<iframe[^>]+src="https?:\/\/(?:www\.)?youtube\.com\/embed\/([a-zA-Z0-9_-]+)(\?[^"]*)?"[^>]*(?:\/>|>[\s\S]*?<\/iframe>)/gi; |
||||
processed = processed.replace(incompleteYoutubeIframeRegex, (match, videoId, params) => { |
||||
// Check if this iframe already has the correct format (has enablejsapi=1 and required attributes)
|
||||
if (match.includes('enablejsapi=1') &&
|
||||
match.includes('allow=') &&
|
||||
match.includes('referrerpolicy=') && |
||||
match.includes('class="youtube-embed"')) { |
||||
return match; // Already correct, don't modify
|
||||
} |
||||
// Fix the iframe with proper attributes
|
||||
const embedUrl = `https://www.youtube.com/embed/${videoId}?enablejsapi=1`; |
||||
return `<iframe class="youtube-embed" frameborder="0" allow="encrypted-media; fullscreen; picture-in-picture; web-share" referrerpolicy='strict-origin-when-cross-origin' width="100%" height="360" src="${escapeHtml(embedUrl)}"></iframe>`; |
||||
}); |
||||
|
||||
// 4. FOURTH: Fix any existing YouTube iframes that have malformed embed URLs (AsciiDoc sometimes creates broken embed URLs)
|
||||
// Match the entire iframe element including closing tag to avoid duplicates
|
||||
const brokenYoutubeIframeRegex = /<iframe[^>]+src="[^"]*youtube\.com\/embed\/[^"]*watch\?v=([a-zA-Z0-9_-]+)[^"]*"[^>]*(?:\/>|>[\s\S]*?<\/iframe>)/gi; |
||||
processed = processed.replace(brokenYoutubeIframeRegex, (match, videoId) => { |
||||
const embedUrl = `https://www.youtube.com/embed/${videoId}?enablejsapi=1`; |
||||
return `<iframe class="youtube-embed" frameborder="0" allow="encrypted-media; fullscreen; picture-in-picture; web-share" referrerpolicy='strict-origin-when-cross-origin' width="100%" height="360" src="${escapeHtml(embedUrl)}"></iframe>`; |
||||
}); |
||||
|
||||
// 5. LAST: Handle bare YouTube URLs (not in links, video tags, or iframes)
|
||||
// IMPORTANT: Match must be specific to youtube.com or youtu.be to avoid matching Spotify
|
||||
// This must come AFTER processing video tags and links to avoid double-parsing
|
||||
const bareYoutubeRegex = /(https?:\/\/(?:www\.)?(?:youtube\.com\/(?:watch\?v=|shorts\/)|youtu\.be\/)([a-zA-Z0-9_-]+)(?:\?[^"\s<>]*)?)/gi; |
||||
const youtubeReplacements: Array<{ match: string; replacement: string; index: number }> = []; |
||||
while ((match = bareYoutubeRegex.exec(processed)) !== null) { |
||||
if (isInCodeBlock(match.index)) continue; |
||||
|
||||
// Check if it's already in a tag (link, iframe, video, etc.)
|
||||
// Simple approach: check if we're inside quotes (attribute value) or between <tag and >
|
||||
const before = processed.substring(Math.max(0, match.index - 500), match.index); |
||||
const after = processed.substring(match.index, match.index + match[0].length + 100); |
||||
|
||||
// Check if URL is inside quotes (attribute value like src="..." or href="...")
|
||||
const beforeContext = before.substring(Math.max(0, before.length - 100)); |
||||
if (beforeContext.match(/<(iframe|video|a|img|audio|source)[^>]*\s+(src|href)="[^"]*$/i)) { |
||||
continue; // Inside an attribute value, skip
|
||||
} |
||||
|
||||
// Check if we're between an opening tag and its closing bracket
|
||||
const lastOpenTag = before.lastIndexOf('<'); |
||||
const lastCloseBracket = before.lastIndexOf('>'); |
||||
if (lastOpenTag > lastCloseBracket) { |
||||
// We're inside a tag, check what kind
|
||||
const tagContent = before.substring(lastOpenTag); |
||||
if (/<(iframe|video|a|img|audio|source)[^>]*$/i.test(tagContent)) { |
||||
continue; // Skip URLs inside these tags
|
||||
} |
||||
} |
||||
|
||||
const videoId = match[2]; |
||||
const embedUrl = `https://www.youtube.com/embed/${videoId}?enablejsapi=1`; |
||||
youtubeReplacements.push({ |
||||
match: match[0], |
||||
replacement: `<iframe class="youtube-embed" frameborder="0" allowfullscreen allow="accelerometer; autoplay; clipboard-write; encrypted-media; fullscreen; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" width="100%" height="360" src="${escapeHtml(embedUrl)}"></iframe>`, |
||||
index: match.index |
||||
}); |
||||
} |
||||
youtubeReplacements.reverse().forEach(({ match, replacement, index }) => { |
||||
processed = processed.substring(0, index) + replacement + processed.substring(index + match.length); |
||||
}); |
||||
|
||||
// Fix double-closed iframes (safety net)
|
||||
processed = processed.replace(/<\/iframe><\/iframe>/gi, '</iframe>'); |
||||
|
||||
// Spotify: https://open.spotify.com/episode/ID or https://open.spotify.com/track/ID or https://open.spotify.com/album/ID
|
||||
const spotifyLinkRegex = /<a[^>]+href="(https?:\/\/open\.spotify\.com\/(episode|track|album|playlist)\/([a-zA-Z0-9]+))[^"]*"[^>]*>.*?<\/a>/gi; |
||||
processed = processed.replace(spotifyLinkRegex, (match, url, type, id) => { |
||||
if (isInCodeBlock(processed.indexOf(match))) return match; |
||||
const embedUrl = `https://open.spotify.com/embed/${type}/${id}`; |
||||
return `<iframe src="${escapeHtml(embedUrl)}" width="100%" height="352" frameborder="0" allowtransparency="true" allow="encrypted-media" class="spotify-embed"></iframe>`; |
||||
}); |
||||
|
||||
// Also handle bare Spotify URLs (not in links)
|
||||
const bareSpotifyRegex = /(https?:\/\/open\.spotify\.com\/(episode|track|album|playlist)\/([a-zA-Z0-9]+)(?:\?[^"\s<>]*)?)/gi; |
||||
const spotifyReplacements: Array<{ match: string; replacement: string; index: number }> = []; |
||||
while ((match = bareSpotifyRegex.exec(processed)) !== null) { |
||||
if (isInCodeBlock(match.index)) continue; |
||||
// Check if it's already in a tag
|
||||
const before = processed.substring(0, match.index); |
||||
const lastOpenTag = before.lastIndexOf('<'); |
||||
const lastCloseTag = before.lastIndexOf('>'); |
||||
if (lastOpenTag > lastCloseTag) continue; // Inside a tag
|
||||
|
||||
const type = match[2]; |
||||
const id = match[3]; |
||||
const embedUrl = `https://open.spotify.com/embed/${type}/${id}`; |
||||
spotifyReplacements.push({ |
||||
match: match[0], |
||||
replacement: `<iframe src="${escapeHtml(embedUrl)}" width="100%" height="352" frameborder="0" allowtransparency="true" allow="encrypted-media" class="spotify-embed"></iframe>`, |
||||
index: match.index |
||||
}); |
||||
} |
||||
spotifyReplacements.reverse().forEach(({ match, replacement, index }) => { |
||||
processed = processed.substring(0, index) + replacement + processed.substring(index + match.length); |
||||
}); |
||||
|
||||
// Process bare image/media URLs that aren't already in tags
|
||||
// First, convert bare links (class="bare") that contain image/video/audio URLs to actual media elements
|
||||
// This handles cases where AsciiDoc has already converted URLs to links
|
||||
// IMPORTANT: Check YouTube FIRST, then Spotify, BEFORE checking file extensions to avoid conflicts
|
||||
const bareLinkRegex = /<a[^>]+href="(https?:\/\/[^"]+)"[^>]*class="[^"]*bare[^"]*"[^>]*>([^<]*)<\/a>/gi; |
||||
processed = processed.replace(bareLinkRegex, (match, url, linkText) => { |
||||
if (isInCodeBlock(processed.indexOf(match))) return match; |
||||
|
||||
// Check YouTube URLs FIRST (be very specific - must be youtube.com or youtu.be)
|
||||
// This prevents accidentally matching Spotify URLs
|
||||
const youtubeMatch = url.match(/https?:\/\/(?:www\.)?(?:youtube\.com\/(?:watch\?v=|shorts\/)|youtu\.be\/)([a-zA-Z0-9_-]+)/); |
||||
if (youtubeMatch) { |
||||
const videoId = youtubeMatch[1]; |
||||
const embedUrl = `https://www.youtube.com/embed/${videoId}?enablejsapi=1`; |
||||
return `<iframe class="youtube-embed" frameborder="0" allow="encrypted-media; fullscreen; picture-in-picture; web-share" referrerpolicy='strict-origin-when-cross-origin' width="100%" height="360" src="${escapeHtml(embedUrl)}"></iframe>`; |
||||
} |
||||
|
||||
// Check Spotify URLs (be very specific - must be open.spotify.com)
|
||||
const spotifyMatch = url.match(/https?:\/\/open\.spotify\.com\/(episode|track|album|playlist)\/([a-zA-Z0-9]+)/); |
||||
if (spotifyMatch) { |
||||
const type = spotifyMatch[1]; |
||||
const id = spotifyMatch[2]; |
||||
const embedUrl = `https://open.spotify.com/embed/${type}/${id}`; |
||||
return `<iframe src="${escapeHtml(embedUrl)}" width="100%" height="352" frameborder="0" allowtransparency="true" allow="encrypted-media" class="spotify-embed"></iframe>`; |
||||
} |
||||
|
||||
// Check if it's an image URL
|
||||
if (/\.(jpg|jpeg|png|gif|webp|svg|bmp)(\?|$)/i.test(url)) { |
||||
return `<img src="${escapeHtml(url)}" alt="${escapeHtml(linkText)}" class="bare-image" />`; |
||||
} |
||||
// Check if it's a video URL (but not YouTube)
|
||||
if (/\.(mp4|webm|ogg|mov|avi)(\?|$)/i.test(url)) { |
||||
return `<video src="${escapeHtml(url)}" controls class="bare-video"></video>`; |
||||
} |
||||
// Check if it's an audio URL (but not Spotify)
|
||||
if (/\.(mp3|wav|ogg|flac|aac|m4a)(\?|$)/i.test(url)) { |
||||
return `<audio src="${escapeHtml(url)}" controls class="bare-audio"></audio>`; |
||||
} |
||||
|
||||
// Not a media URL, return as-is
|
||||
return match; |
||||
}); |
||||
|
||||
// Now process bare URLs that aren't in any tags at all
|
||||
// IMPORTANT: Skip YouTube and Spotify URLs - they're already processed above
|
||||
const imageUrlRegex = /(https?:\/\/[^\s<>"']+\.(jpg|jpeg|png|gif|webp|svg|bmp))(?![^<]*>)/gi; |
||||
const videoUrlRegex = /(https?:\/\/[^\s<>"']+\.(mp4|webm|ogg|mov|avi))(?![^<]*>)/gi; |
||||
const audioUrlRegex = /(https?:\/\/[^\s<>"']+\.(mp3|wav|ogg|flac|aac|m4a))(?![^<]*>)/gi; |
||||
|
||||
// Check if URL is already in a tag
|
||||
function isUrlInTag(url: string, index: number): boolean { |
||||
const before = processed.substring(0, index); |
||||
const after = processed.substring(index); |
||||
|
||||
// Check if it's inside an existing tag
|
||||
const lastOpenTag = before.lastIndexOf('<'); |
||||
const lastCloseTag = before.lastIndexOf('>'); |
||||
if (lastOpenTag > lastCloseTag) { |
||||
const tagContent = processed.substring(lastOpenTag, index + url.length); |
||||
if (/<(img|video|audio|a|source|iframe)[^>]*>/i.test(tagContent)) { |
||||
return true; |
||||
} |
||||
} |
||||
|
||||
return false; |
||||
} |
||||
|
||||
const mediaReplacements: Array<{ match: string; replacement: string; index: number }> = []; |
||||
|
||||
// Process images
|
||||
while ((match = imageUrlRegex.exec(processed)) !== null) { |
||||
if (isInCodeBlock(match.index)) continue; |
||||
if (isUrlInTag(match[0], match.index)) continue; |
||||
|
||||
const url = match[0]; |
||||
mediaReplacements.push({ |
||||
match: url, |
||||
replacement: `<img src="${escapeHtml(url)}" alt="" class="bare-image" />`, |
||||
index: match.index |
||||
}); |
||||
} |
||||
|
||||
// Process videos (but skip YouTube URLs - they're handled above)
|
||||
while ((match = videoUrlRegex.exec(processed)) !== null) { |
||||
if (isInCodeBlock(match.index)) continue; |
||||
if (isUrlInTag(match[0], match.index)) continue; |
||||
// Skip YouTube URLs - they should be embeds, not video tags
|
||||
if (/youtube\.com|youtu\.be/i.test(match[0])) continue; |
||||
|
||||
const url = match[0]; |
||||
mediaReplacements.push({ |
||||
match: url, |
||||
replacement: `<video src="${escapeHtml(url)}" controls class="bare-video"></video>`, |
||||
index: match.index |
||||
}); |
||||
} |
||||
|
||||
// Process audio
|
||||
while ((match = audioUrlRegex.exec(processed)) !== null) { |
||||
if (isInCodeBlock(match.index)) continue; |
||||
if (isUrlInTag(match[0], match.index)) continue; |
||||
|
||||
const url = match[0]; |
||||
mediaReplacements.push({ |
||||
match: url, |
||||
replacement: `<audio src="${escapeHtml(url)}" controls class="bare-audio"></audio>`, |
||||
index: match.index |
||||
}); |
||||
} |
||||
|
||||
// Apply media replacements in reverse order
|
||||
mediaReplacements.reverse().forEach(({ match, replacement, index }) => { |
||||
processed = processed.substring(0, index) + replacement + processed.substring(index + match.length); |
||||
}); |
||||
|
||||
// Process markdown table alignment
|
||||
// Marked generates tables with align attributes or style attributes, we need to add CSS classes for styling
|
||||
// Match tables and process alignment on th/td elements
|
||||
const tableRegex = /<table[^>]*>([\s\S]*?)<\/table>/gi; |
||||
processed = processed.replace(tableRegex, (tableMatch: string, tableContent: string) => { |
||||
// Process each row
|
||||
let processedTable = tableContent; |
||||
|
||||
// Find all th and td elements - check for align attribute or style with text-align
|
||||
const cellRegex = /<(th|td)([^>]*)>([\s\S]*?)<\/\1>/gi; |
||||
processedTable = processedTable.replace(cellRegex, (cellMatch: string, tag: string, attrs: string, content: string) => { |
||||
let align: string | null = null; |
||||
let newAttrs = attrs; |
||||
|
||||
// Check for align attribute
|
||||
const alignMatch = attrs.match(/align=["'](left|center|right)["']/i); |
||||
if (alignMatch) { |
||||
align = alignMatch[1].toLowerCase(); |
||||
newAttrs = newAttrs.replace(/\s*align=["'](left|center|right)["']/i, ''); |
||||
} else { |
||||
// Check for style attribute with text-align
|
||||
const styleMatch = attrs.match(/style=["']([^"']*text-align:\s*(left|center|right)[^"']*)["']/i); |
||||
if (styleMatch) { |
||||
align = styleMatch[2].toLowerCase(); |
||||
// Remove text-align from style
|
||||
const styleContent = styleMatch[1].replace(/text-align:\s*(left|center|right);?/gi, '').trim(); |
||||
if (styleContent) { |
||||
newAttrs = newAttrs.replace(/style=["'][^"']+["']/, `style="${styleContent}"`); |
||||
} else { |
||||
newAttrs = newAttrs.replace(/\s*style=["'][^"']+["']/, ''); |
||||
} |
||||
} |
||||
} |
||||
|
||||
// If we found alignment, add CSS class
|
||||
if (align) { |
||||
const alignClass = align === 'left' ? 'halign-left' :
|
||||
align === 'center' ? 'halign-center' : 'halign-right'; |
||||
|
||||
// If there's already a class attribute, merge them
|
||||
if (newAttrs.includes('class=')) { |
||||
const classMatch = newAttrs.match(/class=["']([^"']+)["']/); |
||||
if (classMatch) { |
||||
const existingClass = classMatch[1]; |
||||
if (!existingClass.includes(alignClass)) { |
||||
newAttrs = newAttrs.replace(/class=["'][^"']+["']/, `class="${existingClass} ${alignClass}"`); |
||||
} |
||||
} |
||||
} else { |
||||
newAttrs = `${newAttrs} class="${alignClass}"`.trim(); |
||||
} |
||||
} |
||||
|
||||
return `<${tag}${newAttrs}>${content}</${tag}>`; |
||||
}); |
||||
|
||||
return `<table>${processedTable}</table>`; |
||||
}); |
||||
|
||||
return { |
||||
html: processed, |
||||
nostrLinks, |
||||
wikilinks, |
||||
hashtags |
||||
}; |
||||
} |
||||
|
||||
/** |
||||
* Get Nostr identifier type from bech32 string |
||||
*/ |
||||
function getNostrType(bech32: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'note' | null { |
||||
if (bech32.startsWith('npub')) return 'npub'; |
||||
if (bech32.startsWith('nprofile')) return 'nprofile'; |
||||
if (bech32.startsWith('nevent')) return 'nevent'; |
||||
if (bech32.startsWith('naddr')) return 'naddr'; |
||||
if (bech32.startsWith('note')) return 'note'; |
||||
return null; |
||||
} |
||||
|
||||
/** |
||||
* Escape HTML special characters |
||||
*/ |
||||
function escapeHtml(text: string): string { |
||||
const map: Record<string, string> = { |
||||
'&': '&', |
||||
'<': '<', |
||||
'>': '>', |
||||
'"': '"', |
||||
"'": ''' |
||||
}; |
||||
return text.replace(/[&<>"']/g, (m) => map[m]); |
||||
} |
||||
@ -0,0 +1,175 @@
@@ -0,0 +1,175 @@
|
||||
import { ParserOptions, Wikilink } from './types'; |
||||
import * as emoji from 'node-emoji'; |
||||
|
||||
/** |
||||
* Pre-process raw content to handle wikilinks and hashtags before AsciiDoc conversion |
||||
* This prevents AsciiDoc from converting them to anchors or other formats |
||||
*/ |
||||
export interface PreProcessResult { |
||||
content: string; |
||||
wikilinks: Wikilink[]; |
||||
hashtags: string[]; |
||||
} |
||||
|
||||
/** |
||||
* Pre-process content to convert wikilinks and hashtags to placeholders |
||||
* that will be processed after HTML conversion |
||||
*/ |
||||
export function preProcessAsciiDoc(content: string, options: ParserOptions): PreProcessResult { |
||||
let processed = content; |
||||
const wikilinks: Wikilink[] = []; |
||||
const hashtags: string[] = []; |
||||
|
||||
// Process emojis first
|
||||
processed = emoji.emojify(processed); |
||||
|
||||
// Process wikilinks: [[dtag]] or [[dtag|display]]
|
||||
// Replace with a placeholder that AsciiDoc won't touch
|
||||
const wikilinkRegex = /\[\[([^\]]+)\]\]/g; |
||||
const wikilinkPlaceholders: Map<string, Wikilink> = new Map(); |
||||
let placeholderCounter = 0; |
||||
|
||||
processed = processed.replace(wikilinkRegex, (match, content) => { |
||||
const parts = content.split('|'); |
||||
const dtag = parts[0].trim(); |
||||
const display = parts.length > 1 ? parts.slice(1).join('|').trim() : dtag; |
||||
|
||||
const wikilink: Wikilink = { |
||||
dtag, |
||||
display, |
||||
original: match |
||||
}; |
||||
wikilinks.push(wikilink); |
||||
|
||||
// Use a unique placeholder that won't be processed by AsciiDoc
|
||||
// Use angle brackets to avoid AsciiDoc formatting interpretation
|
||||
const placeholder = `<WIKILINK_PLACEHOLDER_${placeholderCounter}>`; |
||||
wikilinkPlaceholders.set(placeholder, wikilink); |
||||
placeholderCounter++; |
||||
|
||||
return placeholder; |
||||
}); |
||||
|
||||
// Process hashtags: #hashtag (but not in code blocks)
|
||||
// Mark code blocks first
|
||||
const codeBlockMarkers: Array<{ start: number; end: number }> = []; |
||||
const codeBlockRegex = /\[source,[^\]]+\]|\[abc\]|\[plantuml\]|```|`[^`]+`/g; |
||||
let match; |
||||
while ((match = codeBlockRegex.exec(processed)) !== null) { |
||||
// Find the end of the code block
|
||||
const start = match.index; |
||||
let end = start + match[0].length; |
||||
|
||||
// For source blocks, find the closing ----
|
||||
if (match[0].startsWith('[source')) { |
||||
const afterStart = processed.substring(end); |
||||
const closeMatch = afterStart.match(/^[\s\S]*?----/); |
||||
if (closeMatch) { |
||||
end = start + match[0].length + closeMatch[0].length; |
||||
} |
||||
} |
||||
|
||||
codeBlockMarkers.push({ start, end }); |
||||
} |
||||
|
||||
function isInCodeBlock(index: number): boolean { |
||||
return codeBlockMarkers.some(marker => index >= marker.start && index < marker.end); |
||||
} |
||||
|
||||
// Process hashtags
|
||||
const hashtagPlaceholders: Map<string, string> = new Map(); |
||||
let hashtagCounter = 0; |
||||
|
||||
// Match hashtags at start of line, after whitespace, or after > (for blockquotes)
|
||||
const hashtagRegex = /(^|\s|>)(#[\w-]+)/gm; |
||||
|
||||
processed = processed.replace(hashtagRegex, (match, prefix, hashtag, offset) => { |
||||
if (isInCodeBlock(offset)) return match; |
||||
|
||||
const topic = hashtag.substring(1); |
||||
if (!hashtags.includes(topic)) { |
||||
hashtags.push(topic); |
||||
} |
||||
|
||||
// Use angle brackets to avoid AsciiDoc formatting interpretation
|
||||
const placeholder = `<HASHTAG_PLACEHOLDER_${hashtagCounter}>`; |
||||
hashtagPlaceholders.set(placeholder, topic); |
||||
hashtagCounter++; |
||||
|
||||
return `${prefix}${placeholder}`; |
||||
}); |
||||
|
||||
return { |
||||
content: processed, |
||||
wikilinks, |
||||
hashtags |
||||
}; |
||||
} |
||||
|
||||
/** |
||||
* Restore wikilinks and hashtags from placeholders in HTML |
||||
*/ |
||||
export function restorePlaceholders( |
||||
html: string, |
||||
wikilinks: Wikilink[], |
||||
hashtags: string[], |
||||
options: ParserOptions |
||||
): string { |
||||
let processed = html; |
||||
|
||||
// Restore wikilinks (handle both escaped and unescaped placeholders)
|
||||
const wikilinkPlaceholderRegex = /<WIKILINK_PLACEHOLDER_(\d+)>|<WIKILINK_PLACEHOLDER_(\d+)>/g; |
||||
processed = processed.replace(wikilinkPlaceholderRegex, (match, escapedIndex, unescapedIndex) => { |
||||
const index = escapedIndex !== undefined ? parseInt(escapedIndex) : parseInt(unescapedIndex); |
||||
const wikilink = wikilinks[index]; |
||||
if (!wikilink) return match; |
||||
|
||||
let url: string; |
||||
if (typeof options.wikilinkUrl === 'function') { |
||||
url = options.wikilinkUrl(wikilink.dtag); |
||||
} else if (typeof options.wikilinkUrl === 'string') { |
||||
url = options.wikilinkUrl.replace('{dtag}', encodeURIComponent(wikilink.dtag)); |
||||
} else { |
||||
url = options.linkBaseURL
|
||||
? `${options.linkBaseURL}/events?d=${encodeURIComponent(wikilink.dtag)}` |
||||
: `#${encodeURIComponent(wikilink.dtag)}`; |
||||
} |
||||
|
||||
return `<a href="${escapeHtml(url)}" class="wikilink" data-dtag="${escapeHtml(wikilink.dtag)}">${escapeHtml(wikilink.display)}</a>`; |
||||
}); |
||||
|
||||
// Restore hashtags (handle both escaped and unescaped placeholders)
|
||||
const hashtagPlaceholderRegex = /<HASHTAG_PLACEHOLDER_(\d+)>|<HASHTAG_PLACEHOLDER_(\d+)>/g; |
||||
processed = processed.replace(hashtagPlaceholderRegex, (match, escapedIndex, unescapedIndex) => { |
||||
const index = escapedIndex !== undefined ? parseInt(escapedIndex) : parseInt(unescapedIndex); |
||||
const topic = hashtags[index]; |
||||
if (!topic) return match; |
||||
|
||||
let url: string | undefined; |
||||
if (typeof options.hashtagUrl === 'function') { |
||||
url = options.hashtagUrl(topic); |
||||
} else if (typeof options.hashtagUrl === 'string') { |
||||
url = options.hashtagUrl.replace('{topic}', encodeURIComponent(topic)); |
||||
} |
||||
|
||||
const hashtag = `#${topic}`; |
||||
if (url) { |
||||
return `<a href="${escapeHtml(url)}" class="hashtag" data-topic="${escapeHtml(topic)}">${escapeHtml(hashtag)}</a>`; |
||||
} else { |
||||
return `<span class="hashtag" data-topic="${escapeHtml(topic)}">${escapeHtml(hashtag)}</span>`; |
||||
} |
||||
}); |
||||
|
||||
return processed; |
||||
} |
||||
|
||||
function escapeHtml(text: string): string { |
||||
const map: Record<string, string> = { |
||||
'&': '&', |
||||
'<': '<', |
||||
'>': '>', |
||||
'"': '"', |
||||
"'": ''' |
||||
}; |
||||
return text.replace(/[&<>"']/g, (m) => map[m]); |
||||
} |
||||
@ -1,174 +1,56 @@
@@ -1,174 +1,56 @@
|
||||
import asciidoctor from '@asciidoctor/core'; |
||||
import { ProcessResult } from '../types'; |
||||
import { extractTOC, sanitizeHTML, processLinks } from './html-utils'; |
||||
import { postProcessHtml } from './html-postprocess'; |
||||
|
||||
const asciidoctorInstance = asciidoctor(); |
||||
|
||||
export interface ProcessOptions { |
||||
enableCodeHighlighting?: boolean; |
||||
enableLaTeX?: boolean; |
||||
enableMusicalNotation?: boolean; |
||||
originalContent?: string; // Original content for LaTeX detection
|
||||
linkBaseURL?: string; // Base URL for link processing
|
||||
import { ParserOptions } from '../types'; |
||||
import * as emoji from 'node-emoji'; |
||||
|
||||
export interface AsciiDocResult { |
||||
html: string; |
||||
tableOfContents: string; |
||||
hasLaTeX: boolean; |
||||
hasMusicalNotation: boolean; |
||||
} |
||||
|
||||
/** |
||||
* Processes AsciiDoc content to HTML using AsciiDoctor |
||||
* Uses AsciiDoctor's built-in highlight.js and LaTeX support |
||||
* Process AsciiDoc content to HTML |
||||
*/ |
||||
export async function processAsciidoc( |
||||
content: string, |
||||
options: ProcessOptions = {} |
||||
): Promise<ProcessResult> { |
||||
const { |
||||
enableCodeHighlighting = true, |
||||
enableLaTeX = true, |
||||
enableMusicalNotation = true, |
||||
} = options; |
||||
|
||||
// Check if content starts with level 3+ headers
|
||||
// Asciidoctor article doctype requires level 1 (=) or level 2 (==) before level 3 (===)
|
||||
// If content starts with level 3+, use book doctype
|
||||
const firstHeaderMatch = content.match(/^(={1,6})\s+/m); |
||||
let doctype: 'article' | 'book' = 'article'; |
||||
export function processAsciiDoc(content: string, options: ParserOptions): AsciiDocResult { |
||||
const hasLaTeX = /\[source,latex\]|`\$\[|`\$\\|`\$\$|`\$\{|\$\$|\$\{|\$[^$]/.test(content); |
||||
const hasMusicalNotation = /\[abc\]|\[source,abc\]/i.test(content); |
||||
|
||||
if (firstHeaderMatch) { |
||||
const firstHeaderLevel = firstHeaderMatch[1].length; |
||||
if (firstHeaderLevel >= 3) { |
||||
doctype = 'book'; |
||||
} |
||||
} |
||||
|
||||
try { |
||||
const result = asciidoctorInstance.convert(content, { |
||||
safe: 'safe', |
||||
backend: 'html5', |
||||
doctype: doctype, |
||||
attributes: { |
||||
'showtitle': true, |
||||
'sectanchors': true, |
||||
'sectlinks': true, |
||||
'toc': 'left', |
||||
'toclevels': 6, |
||||
'toc-title': 'Table of Contents', |
||||
'source-highlighter': enableCodeHighlighting ? 'highlight.js' : 'none', |
||||
'stem': enableLaTeX ? 'latexmath' : 'none', |
||||
'data-uri': true, |
||||
'imagesdir': '', |
||||
'linkcss': false, |
||||
'stylesheet': '', |
||||
'stylesdir': '', |
||||
'prewrap': true, |
||||
'sectnums': false, |
||||
'sectnumlevels': 6, |
||||
'experimental': true, |
||||
'compat-mode': false, |
||||
'attribute-missing': 'warn', |
||||
'attribute-undefined': 'warn', |
||||
'skip-front-matter': true, |
||||
'source-indent': 0, |
||||
'indent': 0, |
||||
'tabsize': 2, |
||||
'tabwidth': 2, |
||||
'hardbreaks': false, |
||||
'paragraph-rewrite': 'normal', |
||||
'sectids': true, |
||||
'idprefix': '', |
||||
'idseparator': '-', |
||||
'sectidprefix': '', |
||||
'sectidseparator': '-' |
||||
} |
||||
}); |
||||
|
||||
const htmlString = typeof result === 'string' ? result : result.toString(); |
||||
|
||||
// Extract table of contents from HTML
|
||||
const { toc, contentWithoutTOC } = extractTOC(htmlString); |
||||
|
||||
// Sanitize HTML to prevent XSS
|
||||
const sanitized = sanitizeHTML(contentWithoutTOC); |
||||
|
||||
// Post-process HTML: convert macros to HTML, add styling, etc.
|
||||
const processed = postProcessHtml(sanitized, { |
||||
enableMusicalNotation, |
||||
linkBaseURL: options.linkBaseURL, |
||||
}); |
||||
|
||||
// Process links: add target="_blank" to external links
|
||||
const processedWithLinks = options.linkBaseURL
|
||||
? processLinks(processed, options.linkBaseURL) |
||||
: processed; |
||||
|
||||
// Also process TOC
|
||||
const tocSanitized = sanitizeHTML(toc); |
||||
const tocProcessed = postProcessHtml(tocSanitized, { |
||||
enableMusicalNotation: false, // Don't process music in TOC
|
||||
linkBaseURL: options.linkBaseURL, |
||||
}); |
||||
|
||||
// Process links in TOC as well
|
||||
const tocProcessedWithLinks = options.linkBaseURL |
||||
? processLinks(tocProcessed, options.linkBaseURL) |
||||
: tocProcessed; |
||||
|
||||
// Check for LaTeX in original content (more reliable than checking HTML)
|
||||
const contentToCheck = options.originalContent || content; |
||||
const hasLaTeX = enableLaTeX && hasMathContent(contentToCheck); |
||||
|
||||
// Check for musical notation in processed HTML
|
||||
const hasMusicalNotation = enableMusicalNotation && ( |
||||
/class="abc-notation"|class="lilypond-notation"|class="chord"|class="musicxml-notation"/.test(processed) |
||||
); |
||||
|
||||
return { |
||||
content: processedWithLinks, |
||||
tableOfContents: tocProcessedWithLinks, |
||||
hasLaTeX, |
||||
hasMusicalNotation, |
||||
nostrLinks: [], // Will be populated by metadata extraction
|
||||
wikilinks: [], |
||||
hashtags: [], |
||||
links: [], |
||||
media: [], |
||||
}; |
||||
} catch (error) { |
||||
// Fallback to plain text with error logging
|
||||
const errorMessage = error instanceof Error ? error.message : String(error); |
||||
// Use process.stderr.write for Node.js compatibility instead of console.error
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const nodeProcess = (globalThis as any).process; |
||||
if (nodeProcess?.stderr) { |
||||
nodeProcess.stderr.write(`Error processing AsciiDoc: ${errorMessage}\n`); |
||||
// Process emojis before AsciiDoc conversion
|
||||
const processedContent = emoji.emojify(content); |
||||
|
||||
const asciidoctorOptions: any = { |
||||
safe: 'unsafe', |
||||
attributes: { |
||||
'showtitle': true, |
||||
'icons': 'font', |
||||
'source-highlighter': options.enableCodeHighlighting !== false ? 'highlight.js' : undefined, |
||||
'highlightjs-theme': 'github', |
||||
'toc': 'left', |
||||
'toclevels': 6, |
||||
'sectanchors': true, |
||||
'sectlinks': true, |
||||
'idprefix': '_', |
||||
'idseparator': '_' |
||||
} |
||||
|
||||
// Escape HTML in content for safe display
|
||||
const escapedContent = sanitizeHTML(content); |
||||
|
||||
return { |
||||
content: `<p>${escapedContent}</p>`, |
||||
tableOfContents: '', |
||||
hasLaTeX: false, |
||||
hasMusicalNotation: false, |
||||
nostrLinks: [], |
||||
wikilinks: [], |
||||
hashtags: [], |
||||
links: [], |
||||
media: [], |
||||
}; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Check if content has LaTeX math |
||||
* Based on jumble's detection pattern |
||||
*/ |
||||
function hasMathContent(content: string): boolean { |
||||
// Check for inline math: $...$ or \(...\)
|
||||
const inlineMath = /\$[^$]+\$|\\\([^)]+\\\)/.test(content); |
||||
|
||||
// Check for block math: $$...$$ or \[...\]
|
||||
const blockMath = /\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]/.test(content); |
||||
|
||||
return inlineMath || blockMath; |
||||
}; |
||||
|
||||
// Convert to HTML
|
||||
const Asciidoctor = asciidoctor(); |
||||
const htmlResult = Asciidoctor.convert(processedContent, asciidoctorOptions); |
||||
const html = typeof htmlResult === 'string' ? htmlResult : htmlResult.toString(); |
||||
|
||||
// Extract table of contents if present
|
||||
const tocMatch = html.match(/<div id="toc"[^>]*>([\s\S]*?)<\/div>/); |
||||
const tableOfContents = tocMatch ? tocMatch[1] : ''; |
||||
|
||||
// Remove TOC from main content if present
|
||||
const contentWithoutToc = html.replace(/<div id="toc"[^>]*>[\s\S]*?<\/div>/, ''); |
||||
|
||||
return { |
||||
html: contentWithoutToc, |
||||
tableOfContents, |
||||
hasLaTeX, |
||||
hasMusicalNotation |
||||
}; |
||||
} |
||||
|
||||
@ -1,212 +0,0 @@
@@ -1,212 +0,0 @@
|
||||
import { processMusicalNotation } from './music'; |
||||
|
||||
export interface PostProcessOptions { |
||||
enableMusicalNotation?: boolean; |
||||
linkBaseURL?: string; |
||||
} |
||||
|
||||
/** |
||||
* Post-processes HTML output from AsciiDoctor |
||||
* Converts AsciiDoc macros to HTML with data attributes and CSS classes |
||||
*/ |
||||
export function postProcessHtml(html: string, options: PostProcessOptions = {}): string { |
||||
let processed = html; |
||||
|
||||
// Convert bookstr markers to HTML placeholders
|
||||
processed = processed.replace(/BOOKSTR:([^<>\s]+)/g, (_match, bookContent) => { |
||||
const escaped = bookContent.replace(/"/g, '"').replace(/'/g, '''); |
||||
return `<span data-bookstr="${escaped}" class="bookstr-placeholder"></span>`; |
||||
}); |
||||
|
||||
// Convert hashtag links to HTML
|
||||
processed = processed.replace(/hashtag:([^[]+)\[([^\]]+)\]/g, (_match, normalizedHashtag, displayText) => { |
||||
// URL encode the hashtag to prevent XSS
|
||||
const encodedHashtag = encodeURIComponent(normalizedHashtag); |
||||
// HTML escape the display text
|
||||
const escapedDisplay = displayText |
||||
.replace(/&/g, '&') |
||||
.replace(/</g, '<') |
||||
.replace(/>/g, '>') |
||||
.replace(/"/g, '"') |
||||
.replace(/'/g, '''); |
||||
return `<a href="/notes?t=${encodedHashtag}" class="hashtag-link text-green-600 dark:text-green-400 hover:text-green-700 dark:hover:text-green-300 hover:underline">${escapedDisplay}</a>`; |
||||
}); |
||||
|
||||
// Convert WIKILINK:dtag|display placeholder format to HTML
|
||||
// Match WIKILINK:dtag|display, ensuring we don't match across HTML tags
|
||||
processed = processed.replace(/WIKILINK:([^|<>]+)\|([^<>\s]+)/g, (_match, dTag, displayText) => { |
||||
const escapedDtag = dTag.trim().replace(/"/g, '"'); |
||||
const escapedDisplay = displayText.trim() |
||||
.replace(/&/g, '&') |
||||
.replace(/</g, '<') |
||||
.replace(/>/g, '>') |
||||
.replace(/"/g, '"') |
||||
.replace(/'/g, '''); |
||||
|
||||
// Always use relative URL for wikilinks (works on any domain)
|
||||
const url = `/events?d=${escapedDtag}`; |
||||
|
||||
return `<a class="wikilink text-primary-600 dark:text-primary-500 hover:underline" data-dtag="${escapedDtag}" data-url="${url}" href="${url}">${escapedDisplay}</a>`; |
||||
}); |
||||
|
||||
// Convert nostr: links to HTML
|
||||
processed = processed.replace(/link:nostr:([^[]+)\[([^\]]+)\]/g, (_match, bech32Id, displayText) => { |
||||
const nostrType = getNostrType(bech32Id); |
||||
|
||||
if (nostrType === 'nevent' || nostrType === 'naddr' || nostrType === 'note') { |
||||
// Render as embedded event placeholder
|
||||
const escaped = bech32Id.replace(/"/g, '"'); |
||||
return `<div data-embedded-note="${escaped}" class="embedded-note-container">Loading embedded event...</div>`; |
||||
} else if (nostrType === 'npub' || nostrType === 'nprofile') { |
||||
// Render as user handle
|
||||
const escaped = bech32Id.replace(/"/g, '"'); |
||||
return `<span class="user-handle" data-pubkey="${escaped}">@${displayText}</span>`; |
||||
} else { |
||||
// Fallback to regular link
|
||||
const escaped = bech32Id.replace(/"/g, '"'); |
||||
return `<a href="nostr:${bech32Id}" class="nostr-link text-blue-600 hover:text-blue-800 hover:underline" data-nostr-type="${nostrType || 'unknown'}" data-bech32="${escaped}">${displayText}</a>`; |
||||
} |
||||
}); |
||||
|
||||
// Process images: add max-width styling and data attributes
|
||||
processed = processImages(processed); |
||||
|
||||
// Process musical notation if enabled
|
||||
if (options.enableMusicalNotation) { |
||||
processed = processMusicalNotation(processed); |
||||
} |
||||
|
||||
// Clean up any leftover markdown syntax
|
||||
processed = cleanupMarkdown(processed); |
||||
|
||||
// Add styling classes
|
||||
processed = addStylingClasses(processed); |
||||
|
||||
// Hide raw ToC text
|
||||
processed = hideRawTocText(processed); |
||||
|
||||
return processed; |
||||
} |
||||
|
||||
/** |
||||
* Get Nostr identifier type |
||||
*/ |
||||
function getNostrType(id: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'note' | null { |
||||
if (id.startsWith('npub')) return 'npub'; |
||||
if (id.startsWith('nprofile')) return 'nprofile'; |
||||
if (id.startsWith('nevent')) return 'nevent'; |
||||
if (id.startsWith('naddr')) return 'naddr'; |
||||
if (id.startsWith('note')) return 'note'; |
||||
return null; |
||||
} |
||||
|
||||
/** |
||||
* Process images: add max-width styling and data attributes |
||||
*/ |
||||
function processImages(html: string): string { |
||||
const imageUrls: string[] = []; |
||||
const imageUrlRegex = /<img[^>]+src=["']([^"']+)["'][^>]*>/gi; |
||||
let match; |
||||
|
||||
while ((match = imageUrlRegex.exec(html)) !== null) { |
||||
const url = match[1]; |
||||
if (url && !imageUrls.includes(url)) { |
||||
imageUrls.push(url); |
||||
} |
||||
} |
||||
|
||||
return html.replace(/<img([^>]+)>/gi, (imgTag, attributes) => { |
||||
const srcMatch = attributes.match(/src=["']([^"']+)["']/i); |
||||
if (!srcMatch) return imgTag; |
||||
|
||||
const src = srcMatch[1]; |
||||
const currentIndex = imageUrls.indexOf(src); |
||||
|
||||
let updatedAttributes = attributes; |
||||
|
||||
if (updatedAttributes.match(/class=["']/i)) { |
||||
updatedAttributes = updatedAttributes.replace(/class=["']([^"']*)["']/i, (_match: string, classes: string) => { |
||||
const cleanedClasses = classes.replace(/max-w-\[?[^\s\]]+\]?/g, '').trim(); |
||||
const newClasses = cleanedClasses
|
||||
? `${cleanedClasses} max-w-[400px] object-contain cursor-zoom-in` |
||||
: 'max-w-[400px] object-contain cursor-zoom-in'; |
||||
return `class="${newClasses}"`; |
||||
}); |
||||
} else { |
||||
updatedAttributes += ` class="max-w-[400px] h-auto object-contain cursor-zoom-in"`; |
||||
} |
||||
|
||||
updatedAttributes += ` data-asciidoc-image="true" data-image-index="${currentIndex}" data-image-src="${src.replace(/"/g, '"')}"`; |
||||
|
||||
return `<img${updatedAttributes}>`; |
||||
}); |
||||
} |
||||
|
||||
/** |
||||
* Clean up leftover markdown syntax |
||||
*/ |
||||
function cleanupMarkdown(html: string): string { |
||||
let cleaned = html; |
||||
|
||||
// Clean up markdown image syntax
|
||||
cleaned = cleaned.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (_match, alt, url) => { |
||||
const altText = alt || ''; |
||||
return `<img src="${url}" alt="${altText}" class="max-w-[400px] object-contain my-0" />`; |
||||
}); |
||||
|
||||
// Clean up markdown link syntax
|
||||
cleaned = cleaned.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, text, url) => { |
||||
if (cleaned.includes(`href="${url}"`)) { |
||||
return _match; |
||||
} |
||||
return `<a href="${url}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${text} <svg class="size-3" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`; |
||||
}); |
||||
|
||||
return cleaned; |
||||
} |
||||
|
||||
/** |
||||
* Add proper CSS classes for styling |
||||
*/ |
||||
function addStylingClasses(html: string): string { |
||||
let styled = html; |
||||
|
||||
// Add strikethrough styling
|
||||
styled = styled.replace(/<span class="line-through">([^<]+)<\/span>/g, '<span class="line-through line-through-2">$1</span>'); |
||||
|
||||
// Add subscript styling
|
||||
styled = styled.replace(/<span class="subscript">([^<]+)<\/span>/g, '<span class="subscript text-xs align-sub">$1</span>'); |
||||
|
||||
// Add superscript styling
|
||||
styled = styled.replace(/<span class="superscript">([^<]+)<\/span>/g, '<span class="superscript text-xs align-super">$1</span>'); |
||||
|
||||
// Add code highlighting classes
|
||||
styled = styled.replace(/<pre class="highlightjs[^"]*">/g, '<pre class="highlightjs hljs">'); |
||||
styled = styled.replace(/<code class="highlightjs[^"]*">/g, '<code class="highlightjs hljs">'); |
||||
|
||||
return styled; |
||||
} |
||||
|
||||
/** |
||||
* Hide raw AsciiDoc ToC text |
||||
*/ |
||||
function hideRawTocText(html: string): string { |
||||
let cleaned = html; |
||||
|
||||
cleaned = cleaned.replace( |
||||
/<h[1-6][^>]*>.*?Table of Contents.*?\(\d+\).*?<\/h[1-6]>/gi, |
||||
'' |
||||
); |
||||
|
||||
cleaned = cleaned.replace( |
||||
/<p[^>]*>.*?Table of Contents.*?\(\d+\).*?<\/p>/gi, |
||||
'' |
||||
); |
||||
|
||||
cleaned = cleaned.replace( |
||||
/<p[^>]*>.*?Assumptions.*?\[n=0\].*?<\/p>/gi, |
||||
'' |
||||
); |
||||
|
||||
return cleaned; |
||||
} |
||||
@ -1,211 +0,0 @@
@@ -1,211 +0,0 @@
|
||||
/** |
||||
* Extracts the table of contents from AsciiDoc HTML output |
||||
* Returns the TOC HTML and the content HTML without the TOC |
||||
*/ |
||||
export function extractTOC(html: string): { toc: string; contentWithoutTOC: string } { |
||||
// AsciiDoc with toc: 'left' generates a TOC in a div with id="toc" or class="toc"
|
||||
let tocContent = ''; |
||||
let contentWithoutTOC = html; |
||||
|
||||
// Find the start of the TOC div - try multiple patterns
|
||||
const tocStartPatterns = [ |
||||
/<div\s+id=["']toc["']\s+class=["']toc["'][^>]*>/i, |
||||
/<div\s+id=["']toc["'][^>]*>/i, |
||||
/<div\s+class=["']toc["'][^>]*>/i, |
||||
/<nav\s+id=["']toc["'][^>]*>/i, |
||||
]; |
||||
|
||||
let tocStartIdx = -1; |
||||
let tocStartTag = ''; |
||||
|
||||
for (const pattern of tocStartPatterns) { |
||||
const match = html.match(pattern); |
||||
if (match && match.index !== undefined) { |
||||
tocStartIdx = match.index; |
||||
tocStartTag = match[0]; |
||||
break; |
||||
} |
||||
} |
||||
|
||||
if (tocStartIdx === -1) { |
||||
// No TOC found
|
||||
return { toc: '', contentWithoutTOC: html }; |
||||
} |
||||
|
||||
// Find the matching closing tag by counting div/nav tags
|
||||
const searchStart = tocStartIdx + tocStartTag.length; |
||||
let depth = 1; |
||||
let i = searchStart; |
||||
|
||||
while (i < html.length && depth > 0) { |
||||
// Look for opening or closing div/nav tags
|
||||
if (i + 4 < html.length && html.substring(i, i + 4).toLowerCase() === '<div') { |
||||
// Check if it's a closing tag
|
||||
if (i + 5 < html.length && html[i + 4] === '/') { |
||||
depth--; |
||||
const closeIdx = html.indexOf('>', i); |
||||
if (closeIdx === -1) break; |
||||
i = closeIdx + 1; |
||||
} else { |
||||
// Opening tag - find the end (handle attributes and self-closing)
|
||||
const closeIdx = html.indexOf('>', i); |
||||
if (closeIdx === -1) break; |
||||
// Check if it's self-closing (look for /> before the >)
|
||||
const tagContent = html.substring(i, closeIdx); |
||||
if (!tagContent.endsWith('/')) { |
||||
depth++; |
||||
} |
||||
i = closeIdx + 1; |
||||
} |
||||
} else if (i + 5 < html.length && html.substring(i, i + 5).toLowerCase() === '</div') { |
||||
depth--; |
||||
const closeIdx = html.indexOf('>', i); |
||||
if (closeIdx === -1) break; |
||||
i = closeIdx + 1; |
||||
} else if (i + 5 < html.length && html.substring(i, i + 5).toLowerCase() === '</nav') { |
||||
depth--; |
||||
const closeIdx = html.indexOf('>', i); |
||||
if (closeIdx === -1) break; |
||||
i = closeIdx + 1; |
||||
} else if (i + 4 < html.length && html.substring(i, i + 4).toLowerCase() === '<nav') { |
||||
// Handle opening nav tags
|
||||
const closeIdx = html.indexOf('>', i); |
||||
if (closeIdx === -1) break; |
||||
const tagContent = html.substring(i, closeIdx); |
||||
if (!tagContent.endsWith('/')) { |
||||
depth++; |
||||
} |
||||
i = closeIdx + 1; |
||||
} else { |
||||
i++; |
||||
} |
||||
} |
||||
|
||||
if (depth === 0) { |
||||
// Found the matching closing tag
|
||||
const tocEndIdx = i; |
||||
// Extract the TOC content (inner HTML)
|
||||
const tocFullHTML = html.substring(tocStartIdx, tocEndIdx); |
||||
// Extract just the inner content (without the outer div tags)
|
||||
let innerStart = tocStartTag.length; |
||||
let innerEnd = tocFullHTML.length; |
||||
// Find the last </div> or </nav>
|
||||
if (tocFullHTML.endsWith('</div>')) { |
||||
innerEnd -= 6; |
||||
} else if (tocFullHTML.endsWith('</nav>')) { |
||||
innerEnd -= 7; |
||||
} |
||||
tocContent = tocFullHTML.substring(innerStart, innerEnd).trim(); |
||||
|
||||
// Remove the toctitle div if present (AsciiDoc adds "Table of Contents" title)
|
||||
tocContent = tocContent.replace(/<div\s+id=["']toctitle["'][^>]*>.*?<\/div>\s*/gis, ''); |
||||
tocContent = tocContent.trim(); |
||||
|
||||
// Remove the TOC from the content
|
||||
contentWithoutTOC = html.substring(0, tocStartIdx) + html.substring(tocEndIdx); |
||||
} |
||||
|
||||
return { toc: tocContent, contentWithoutTOC }; |
||||
} |
||||
|
||||
/** |
||||
* Performs basic HTML sanitization to prevent XSS |
||||
*/ |
||||
export function sanitizeHTML(html: string): string { |
||||
// Remove script tags and their content
|
||||
html = html.replace(/<script[^>]*>.*?<\/script>/gis, ''); |
||||
|
||||
// Remove event handlers (onclick, onerror, etc.)
|
||||
html = html.replace(/\s*on\w+\s*=\s*["'][^"']*["']/gi, ''); |
||||
|
||||
// Remove javascript: protocol in links
|
||||
html = html.replace(/javascript:/gi, ''); |
||||
|
||||
// Remove data: URLs that could be dangerous
|
||||
html = html.replace(/data:\s*text\/html/gi, ''); |
||||
|
||||
return html; |
||||
} |
||||
|
||||
/** |
||||
* Processes HTML links to add target="_blank" to external links |
||||
* This function is available for use but not currently called automatically. |
||||
* It can be used in post-processing if needed. |
||||
*/ |
||||
export function processLinks(html: string, linkBaseURL: string): string { |
||||
// Extract domain from linkBaseURL for comparison
|
||||
let linkBaseDomain = ''; |
||||
if (linkBaseURL) { |
||||
try { |
||||
// Use URL constructor if available (Node.js 10+)
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const URLConstructor = (globalThis as any).URL; |
||||
if (URLConstructor) { |
||||
const url = new URLConstructor(linkBaseURL); |
||||
linkBaseDomain = url.hostname; |
||||
} else { |
||||
throw new Error('URL not available'); |
||||
} |
||||
} catch { |
||||
// Fallback to simple string parsing if URL constructor fails
|
||||
const url = linkBaseURL.replace(/^https?:\/\//, ''); |
||||
const parts = url.split('/'); |
||||
if (parts.length > 0) { |
||||
linkBaseDomain = parts[0]; |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Regex to match <a> tags with href attributes
|
||||
const linkRegex = /<a\s+([^>]*?)href\s*=\s*["']([^"']+)["']([^>]*?)>/g; |
||||
|
||||
return html.replace(linkRegex, (match, before, href, after) => { |
||||
// Check if it's an external link (starts with http:// or https://)
|
||||
const isExternal = href.startsWith('http://') || href.startsWith('https://'); |
||||
|
||||
if (isExternal) { |
||||
// Check if it's pointing to our own domain
|
||||
if (linkBaseDomain) { |
||||
try { |
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const URLConstructor = (globalThis as any).URL; |
||||
if (URLConstructor) { |
||||
const hrefUrl = new URLConstructor(href); |
||||
if (hrefUrl.hostname === linkBaseDomain) { |
||||
// Same domain - open in same tab (remove any existing target attribute)
|
||||
return match.replace(/\s*target\s*=\s*["'][^"']*["']/gi, ''); |
||||
} |
||||
} else { |
||||
throw new Error('URL not available'); |
||||
} |
||||
} catch { |
||||
// If URL parsing fails, use simple string check
|
||||
if (href.includes(linkBaseDomain)) { |
||||
return match.replace(/\s*target\s*=\s*["'][^"']*["']/gi, ''); |
||||
} |
||||
} |
||||
} |
||||
|
||||
// External link - add target="_blank" and rel="noopener noreferrer" if not already present
|
||||
if (!match.includes('target=')) { |
||||
if (!match.includes('rel=')) { |
||||
return match.replace('>', ' target="_blank" rel="noopener noreferrer">'); |
||||
} else { |
||||
// Update existing rel attribute to include noopener if not present
|
||||
const updatedMatch = match.replace(/rel\s*=\s*["']([^"']*)["']/gi, (relMatch, relValue) => { |
||||
if (!relValue.includes('noopener')) { |
||||
return `rel="${relValue} noopener noreferrer"`; |
||||
} |
||||
return relMatch; |
||||
}); |
||||
return updatedMatch.replace('>', ' target="_blank">'); |
||||
} |
||||
} |
||||
} else { |
||||
// Local/relative link - ensure it opens in same tab (remove target if present)
|
||||
return match.replace(/\s*target\s*=\s*["'][^"']*["']/gi, ''); |
||||
} |
||||
|
||||
return match; |
||||
}); |
||||
} |
||||
@ -0,0 +1,244 @@
@@ -0,0 +1,244 @@
|
||||
import { marked } from 'marked'; |
||||
// @ts-ignore - marked is ESM but we need it to work in Jest
|
||||
import { ParserOptions } from '../types'; |
||||
import * as emoji from 'node-emoji'; |
||||
|
||||
export interface MarkdownResult { |
||||
html: string; |
||||
frontmatter?: Record<string, any>; |
||||
hasLaTeX: boolean; |
||||
hasMusicalNotation: boolean; |
||||
} |
||||
|
||||
/** |
||||
* Extract YAML frontmatter from markdown content |
||||
*/ |
||||
function extractFrontmatter(content: string): { frontmatter?: Record<string, any>; content: string } { |
||||
const frontmatterRegex = /^---\s*\n([\s\S]*?)\n---\s*\n/; |
||||
const match = content.match(frontmatterRegex); |
||||
|
||||
if (!match) { |
||||
return { content }; |
||||
} |
||||
|
||||
try { |
||||
// Simple YAML parser for basic key-value pairs
|
||||
const yamlContent = match[1]; |
||||
const frontmatter: Record<string, any> = {}; |
||||
const lines = yamlContent.split('\n'); |
||||
|
||||
for (const line of lines) { |
||||
const trimmed = line.trim(); |
||||
if (!trimmed || trimmed.startsWith('#')) continue; |
||||
|
||||
const colonIndex = trimmed.indexOf(':'); |
||||
if (colonIndex === -1) continue; |
||||
|
||||
const key = trimmed.substring(0, colonIndex).trim(); |
||||
let value = trimmed.substring(colonIndex + 1).trim(); |
||||
|
||||
// Remove quotes if present
|
||||
if ((value.startsWith('"') && value.endsWith('"')) ||
|
||||
(value.startsWith("'") && value.endsWith("'"))) { |
||||
value = value.slice(1, -1); |
||||
} |
||||
|
||||
// Handle arrays (simple case)
|
||||
if (value.startsWith('[') && value.endsWith(']')) { |
||||
const arrayContent = value.slice(1, -1); |
||||
frontmatter[key] = arrayContent.split(',').map(v => v.trim().replace(/^["']|["']$/g, '')); |
||||
} else { |
||||
frontmatter[key] = value; |
||||
} |
||||
} |
||||
|
||||
return { |
||||
frontmatter: Object.keys(frontmatter).length > 0 ? frontmatter : undefined, |
||||
content: content.substring(match[0].length) |
||||
}; |
||||
} catch (e) { |
||||
return { content }; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Process Markdown content to HTML (minimal markdown support) |
||||
*/ |
||||
export function processMarkdown(content: string, options: ParserOptions): MarkdownResult { |
||||
// Extract frontmatter
|
||||
const { frontmatter, content: contentWithoutFrontmatter } = extractFrontmatter(content); |
||||
|
||||
// Detect LaTeX and musical notation
|
||||
const hasLaTeX = /```latex|`\$\[|`\$\\|`\$\$|`\$\{|\$\$|\$\{|\$[^$]/.test(content); |
||||
const hasMusicalNotation = /```abc|```music/i.test(content); |
||||
|
||||
// Configure marked for minimal markdown
|
||||
marked.setOptions({ |
||||
gfm: true, |
||||
breaks: false |
||||
}); |
||||
|
||||
// Process emoji shortcodes before markdown processing
|
||||
let processedContent = emoji.emojify(contentWithoutFrontmatter); |
||||
|
||||
// Extract and process footnotes before markdown parsing
|
||||
// Footnotes format: [^1] in text and [^1]: definition at end
|
||||
const footnoteDefinitions: Map<string, string> = new Map(); |
||||
let placeholderCounter = 0; |
||||
|
||||
// First, extract footnote definitions
|
||||
const lines = processedContent.split('\n'); |
||||
const processedLines: string[] = []; |
||||
let i = 0; |
||||
|
||||
while (i < lines.length) { |
||||
const line = lines[i]; |
||||
const footnoteDefMatch = line.match(/^\[\^([^\]]+)\]:\s*(.*)$/); |
||||
if (footnoteDefMatch) { |
||||
const id = footnoteDefMatch[1]; |
||||
let definition = footnoteDefMatch[2]; |
||||
|
||||
// Collect multi-line definition (until next definition or blank line)
|
||||
i++; |
||||
while (i < lines.length) { |
||||
const nextLine = lines[i]; |
||||
if (nextLine.match(/^\[\^[^\]]+\]:/) || (nextLine.trim() === '' && i + 1 < lines.length && lines[i + 1].trim() !== '' && !lines[i + 1].match(/^\[\^[^\]]+\]:/))) { |
||||
break; |
||||
} |
||||
if (nextLine.trim() === '' && i + 1 < lines.length && lines[i + 1].match(/^\[\^[^\]]+\]:/)) { |
||||
break; |
||||
} |
||||
definition += '\n' + nextLine; |
||||
i++; |
||||
} |
||||
|
||||
footnoteDefinitions.set(id, definition.trim()); |
||||
// Skip adding this line to processedLines (removing the definition)
|
||||
continue; |
||||
} |
||||
|
||||
processedLines.push(line); |
||||
i++; |
||||
} |
||||
|
||||
processedContent = processedLines.join('\n'); |
||||
|
||||
// Now replace footnote references with placeholders before markdown parsing
|
||||
// Use HTML-like placeholder that markdown will pass through as-is
|
||||
const footnoteRefRegex = /\[\^([^\]]+)\]/g; |
||||
let refMatch; |
||||
while ((refMatch = footnoteRefRegex.exec(processedContent)) !== null) { |
||||
const id = refMatch[1]; |
||||
if (footnoteDefinitions.has(id)) { |
||||
const placeholder = `<span data-footnote-placeholder="${placeholderCounter++}" data-footnote-id="${id}"></span>`; |
||||
processedContent = processedContent.substring(0, refMatch.index) +
|
||||
placeholder +
|
||||
processedContent.substring(refMatch.index + refMatch[0].length); |
||||
// Reset regex since we modified the string
|
||||
footnoteRefRegex.lastIndex = 0; |
||||
} |
||||
} |
||||
|
||||
// Convert markdown to HTML
|
||||
let html = marked.parse(processedContent) as string; |
||||
|
||||
// Process superscripts in HTML (X^2^ syntax) - after markdown parsing to avoid conflicts
|
||||
// But skip inside code blocks
|
||||
const codeBlockRegex = /<(pre|code)[^>]*>[\s\S]*?<\/\1>/gi; |
||||
const codeBlocks: Array<{ start: number; end: number; content: string }> = []; |
||||
let codeMatch; |
||||
while ((codeMatch = codeBlockRegex.exec(html)) !== null) { |
||||
codeBlocks.push({ |
||||
start: codeMatch.index, |
||||
end: codeMatch.index + codeMatch[0].length, |
||||
content: codeMatch[0] |
||||
}); |
||||
} |
||||
|
||||
function isInCodeBlock(index: number): boolean { |
||||
return codeBlocks.some(block => index >= block.start && index < block.end); |
||||
} |
||||
|
||||
// Process superscripts
|
||||
const superscriptRegex = /\^([^\^<>\n]+)\^/g; |
||||
const superscriptReplacements: Array<{ match: string; replacement: string; index: number }> = []; |
||||
let supMatch; |
||||
while ((supMatch = superscriptRegex.exec(html)) !== null) { |
||||
if (isInCodeBlock(supMatch.index)) continue; |
||||
superscriptReplacements.push({ |
||||
match: supMatch[0], |
||||
replacement: `<sup>${supMatch[1]}</sup>`, |
||||
index: supMatch.index |
||||
}); |
||||
} |
||||
|
||||
// Apply superscript replacements in reverse order
|
||||
superscriptReplacements.reverse().forEach(({ match, replacement, index }) => { |
||||
html = html.substring(0, index) + replacement + html.substring(index + match.length); |
||||
}); |
||||
|
||||
// Replace footnote placeholders with actual footnote HTML
|
||||
let footnoteCounter = 1; |
||||
const footnoteRefs: Array<{ id: string; num: number; definition: string }> = []; |
||||
const footnoteRefMap: Map<string, number> = new Map(); |
||||
|
||||
// First, assign numbers to all footnote definitions
|
||||
footnoteDefinitions.forEach((definition, id) => { |
||||
const num = footnoteCounter++; |
||||
footnoteRefMap.set(id, num); |
||||
footnoteRefs.push({ id, num, definition }); |
||||
}); |
||||
|
||||
// Replace HTML span placeholders with footnote HTML
|
||||
// Find all span elements with data-footnote-placeholder attribute
|
||||
const placeholderRegex = /<span data-footnote-placeholder="(\d+)" data-footnote-id="([^"]+)"><\/span>/g; |
||||
html = html.replace(placeholderRegex, (match, placeholderNum, id) => { |
||||
const num = footnoteRefMap.get(id); |
||||
if (num !== undefined) { |
||||
return `<sup class="footnote"><a id="footnoteref_${num}" class="footnote" href="#footnotedef_${num}" title="View footnote.">${num}</a></sup>`; |
||||
} |
||||
return match; // Return original if no definition found
|
||||
}); |
||||
|
||||
// Add footnotes section at the end if there are any
|
||||
if (footnoteRefs.length > 0) { |
||||
let footnotesHtml = '<div id="footnotes"><hr>'; |
||||
footnoteRefs.forEach(({ id, num, definition }) => { |
||||
// Process the definition through markdown again to handle formatting
|
||||
const defHtml = marked.parse(definition) as string; |
||||
footnotesHtml += `<div class="footnote" id="footnotedef_${num}"><a href="#footnoteref_${num}">${num}</a>. ${defHtml}</div>`; |
||||
}); |
||||
footnotesHtml += '</div>'; |
||||
html += footnotesHtml; |
||||
} |
||||
|
||||
// Fix anchor links - markdown headers need IDs
|
||||
// Marked generates headers but may not have proper IDs for anchor links
|
||||
// Process headers to add IDs based on their text content (if they don't already have one)
|
||||
html = html.replace(/<h([1-6])([^>]*)>([^<]+)<\/h[1-6]>/gi, (match: string, level: string, attrs: string, text: string) => { |
||||
// Skip if header already has an id attribute
|
||||
if (attrs && /id=["'][^"']+["']/i.test(attrs)) { |
||||
return match; |
||||
} |
||||
|
||||
// Generate ID from header text (similar to GitHub markdown)
|
||||
const id = text |
||||
.toLowerCase() |
||||
.trim() |
||||
.replace(/[^\w\s-]/g, '') // Remove special chars
|
||||
.replace(/\s+/g, '-') // Replace spaces with hyphens
|
||||
.replace(/-+/g, '-') // Replace multiple hyphens with single
|
||||
.replace(/^-|-$/g, ''); // Remove leading/trailing hyphens
|
||||
|
||||
// Add id attribute
|
||||
const newAttrs = attrs ? `${attrs} id="${id}"` : `id="${id}"`; |
||||
return `<h${level} ${newAttrs}>${text}</h${level}>`; |
||||
}); |
||||
|
||||
return { |
||||
html, |
||||
frontmatter, |
||||
hasLaTeX, |
||||
hasMusicalNotation |
||||
}; |
||||
} |
||||
@ -1,47 +0,0 @@
@@ -1,47 +0,0 @@
|
||||
/** |
||||
* Processes musical notation in HTML content |
||||
* Wraps musical notation in appropriate HTML for rendering |
||||
*/ |
||||
export function processMusicalNotation(html: string): string { |
||||
// Process ABC notation blocks
|
||||
const abcBlockPattern = /(X:\s*\d+[^\n]*\n(?:[^\n]+\n)*)/gs; |
||||
html = html.replace(abcBlockPattern, (match) => { |
||||
const abcContent = match.trim(); |
||||
return `<div class="abc-notation" data-abc="${escapeForAttr(abcContent)}">${abcContent}</div>`; |
||||
}); |
||||
|
||||
// Process LilyPond notation blocks
|
||||
const lilypondPattern = /(\\relative[^}]+})/gs; |
||||
html = html.replace(lilypondPattern, (match) => { |
||||
const lilypondContent = match.trim(); |
||||
return `<div class="lilypond-notation" data-lilypond="${escapeForAttr(lilypondContent)}">${lilypondContent}</div>`; |
||||
}); |
||||
|
||||
// Process inline chord notation: [C], [Am], [F#m7], etc.
|
||||
const chordPattern = /\[([A-G][#b]?m?[0-9]?[^\[\]]*)\]/g; |
||||
html = html.replace(chordPattern, (match, chord) => { |
||||
return `<span class="chord" data-chord="${escapeForAttr(chord)}">[${chord}]</span>`; |
||||
}); |
||||
|
||||
// Process MusicXML-like notation
|
||||
const musicxmlPattern = /(<music[^>]*>.*?<\/music>)/gs; |
||||
html = html.replace(musicxmlPattern, (match) => { |
||||
const musicxmlContent = match.trim(); |
||||
return `<div class="musicxml-notation" data-musicxml="${escapeForAttr(musicxmlContent)}">${musicxmlContent}</div>`; |
||||
}); |
||||
|
||||
return html; |
||||
} |
||||
|
||||
/** |
||||
* Escapes a string for use in HTML attributes |
||||
*/ |
||||
function escapeForAttr(text: string): string { |
||||
return text |
||||
.replace(/"/g, '"') |
||||
.replace(/'/g, ''') |
||||
.replace(/</g, '<') |
||||
.replace(/>/g, '>') |
||||
.replace(/\n/g, ' ') |
||||
.replace(/\r/g, ''); |
||||
} |
||||
@ -1,20 +0,0 @@
@@ -1,20 +0,0 @@
|
||||
/** |
||||
* Type declarations for @asciidoctor/core |
||||
* These are minimal types - the actual types should come from the package |
||||
*/ |
||||
declare module '@asciidoctor/core' { |
||||
interface ConvertOptions { |
||||
safe?: string; |
||||
backend?: string; |
||||
doctype?: string; |
||||
attributes?: Record<string, any>; |
||||
extension_registry?: any; |
||||
} |
||||
|
||||
interface Asciidoctor { |
||||
convert(content: string, options?: ConvertOptions): string | any; |
||||
} |
||||
|
||||
function asciidoctor(): Asciidoctor; |
||||
export default asciidoctor; |
||||
} |
||||
Loading…
Reference in new issue