Browse Source

feat: display readme from more sources

instead of just github. unfortunately most servers have
CORS enabled so a proxy is needed
master
DanConwayDev 2 years ago
parent
commit
19471e7c19
No known key found for this signature in database
GPG Key ID: 68E15486D73F75E1
  1. 100
      src/lib/components/repo/utils.spec.ts
  2. 66
      src/lib/components/repo/utils.ts
  3. 47
      src/lib/stores/repo.ts
  4. 28
      src/routes/git_proxy/readme/[clone]/+server.ts
  5. 39
      src/routes/git_proxy/readme/[readme_url]/+server.ts

100
src/lib/components/repo/utils.spec.ts

@ -1,5 +1,5 @@ @@ -1,5 +1,5 @@
import { describe, expect, test } from 'vitest'
import { extractGithubDetails, selectRepoFromCollection } from './utils'
import { cloneArrayToReadMeUrls, selectRepoFromCollection } from './utils'
import {
collection_defaults,
event_defaults,
@ -160,18 +160,92 @@ describe('getSelectedRepo', () => { @@ -160,18 +160,92 @@ describe('getSelectedRepo', () => {
})
})
describe('extractGithubDetails', () => {
;[
'https://github.com/orgname/reponame.git',
'https://github.com/orgname/reponame',
'git@github.com:orgname/reponame',
].forEach((clone) => {
describe(clone, () => {
test('returns correct org and repo', () => {
const res = extractGithubDetails(clone)
expect(res?.org).toEqual('orgname')
expect(res?.repo_name).toEqual('reponame')
})
describe('cloneArrayToReadMeUrls', () => {
test('for each clone url returns url to /raw/HEAD/README.md and /raw/HEAD/readme.md', () => {
expect(
cloneArrayToReadMeUrls([
'https://gitea.com/orgname/reponame',
'https://gitlab.com/orgname/reponame',
])
).toEqual([
'https://gitea.com/orgname/reponame/raw/HEAD/README.md',
'https://gitea.com/orgname/reponame/raw/HEAD/readme.md',
'https://gitlab.com/orgname/reponame/raw/HEAD/README.md',
'https://gitlab.com/orgname/reponame/raw/HEAD/readme.md',
])
})
test('for github link use raw.githubusercontent.com/HEAD', () => {
expect(
cloneArrayToReadMeUrls(['https://github.com/orgname/reponame'])
).toEqual([
'https://raw.githubusercontent.com/HEAD/README.md',
'https://raw.githubusercontent.com/HEAD/readme.md',
])
})
test('for sr.hr link to /blob/HEAD', () => {
expect(cloneArrayToReadMeUrls(['https://sr.ht/~orgname/reponame'])).toEqual(
[
'https://sr.ht/~orgname/reponame/blob/HEAD/README.md',
'https://sr.ht/~orgname/reponame/blob/HEAD/readme.md',
]
)
})
test('for git.launchpad.net link to /plain', () => {
expect(
cloneArrayToReadMeUrls(['https://git.launchpad.net/orgname/reponame'])
).toEqual([
'https://git.launchpad.net/orgname/reponame/plain/README.md',
'https://git.launchpad.net/orgname/reponame/plain/readme.md',
])
})
test('for git.savannah.gnu.org link to /plain', () => {
expect(
cloneArrayToReadMeUrls(['https://git.savannah.gnu.org/orgname/reponame'])
).toEqual([
'https://git.savannah.gnu.org/orgname/reponame/plain/README.md',
'https://git.savannah.gnu.org/orgname/reponame/plain/readme.md',
])
})
describe('transform clone address to url', () => {
test('strips trailing / from address', () => {
expect(
cloneArrayToReadMeUrls(['https://codeberg.org/orgname/reponame/'])
).toEqual([
'https://codeberg.org/orgname/reponame/raw/HEAD/README.md',
'https://codeberg.org/orgname/reponame/raw/HEAD/readme.md',
])
})
test('strips .git from address', () => {
expect(
cloneArrayToReadMeUrls(['https://codeberg.org/orgname/reponame.git'])
).toEqual([
'https://codeberg.org/orgname/reponame/raw/HEAD/README.md',
'https://codeberg.org/orgname/reponame/raw/HEAD/readme.md',
])
})
test('git@codeberg.org:orgname/reponame.git to address', () => {
expect(
cloneArrayToReadMeUrls(['git@codeberg.org:orgname/reponame.git'])
).toEqual([
'https://codeberg.org/orgname/reponame/raw/HEAD/README.md',
'https://codeberg.org/orgname/reponame/raw/HEAD/readme.md',
])
})
test('ssh://codeberg.org/orgname/reponame to address', () => {
expect(
cloneArrayToReadMeUrls(['ssh://codeberg.org/orgname/reponame'])
).toEqual([
'https://codeberg.org/orgname/reponame/raw/HEAD/README.md',
'https://codeberg.org/orgname/reponame/raw/HEAD/readme.md',
])
})
test('https://custom.com/deep/deeper/deeper to address', () => {
expect(
cloneArrayToReadMeUrls(['https://custom.com/deep/deeper/deeper'])
).toEqual([
'https://custom.com/deep/deeper/deeper/raw/HEAD/README.md',
'https://custom.com/deep/deeper/deeper/raw/HEAD/readme.md',
])
})
})
})

66
src/lib/components/repo/utils.ts

@ -16,20 +16,56 @@ export const selectRepoFromCollection = ( @@ -16,20 +16,56 @@ export const selectRepoFromCollection = (
})[0]
}
export const extractGithubDetails = (
clone: string
): { org: string; repo_name: string } | undefined => {
if (clone.indexOf('github.') > -1) {
const g_split = clone.split('github.')
if (g_split.length > 0) {
const final = g_split[1].replace(':', '/').split('/')
if (final.length > 2) {
return {
org: final[1],
repo_name: final[2].split('.')[0],
}
/** most servers will produce a CORS error so a proxy should be used */
export const cloneArrayToReadMeUrls = (clone: string[]): string[] => {
const addresses = clone.map(extractRepoAddress)
/**
* at the time of this commit these urls work for:
* self-hosted gitea (or forgejo), gitlab
* github.com
* bitbucket.org
* gitlab.org
* gitea.com
* codeberg.org (forgejo instance)
* sourcehut (git.sr.ht)
* launchpad.net
* It doesnt work for:
* self-hosted gogs (requires branch name repo/raw/master/README.md)
* sourceforge.net (https://sourceforge.net/p/mingw/catgets/ci/master/tree/README?format=raw)
* notabug.org (requires branch name notabug.org/org/repo/raw/master/README.md)
*/
return [
...addresses.flatMap((address) => {
let prefix = 'raw/HEAD'
if (address.includes('sr.ht')) prefix = 'blob/HEAD'
if (
address.includes('git.launchpad.net') ||
address.includes('git.savannah.gnu.org')
)
prefix = 'plain'
if (address.includes('github.com')) {
// raw.githubusercontent.com can be used without CORS error
address = address.replace('github.com', 'raw.githubusercontent.com')
prefix = 'HEAD'
}
}
}
return undefined
return ['README.md', 'readme.md'].map(
(filename) => `https://${address}/${prefix}/${filename}`
)
}),
]
}
const extractRepoAddress = (clone_string: string): string => {
let s = clone_string
// remove trailing slash
if (s.endsWith('/')) s = s.substring(0, s.length - 1)
// remove trailing .git
if (s.endsWith('.git')) s = s.substring(0, s.length - 4)
// remove :// and anything before
if (s.includes('://')) s = s.split('://')[1]
// remove @ and anything before
if (s.includes('@')) s = s.split('@')[1]
// replace : with /
s = s.replace(':', '/')
return s
}

47
src/lib/stores/repo.ts

@ -11,7 +11,7 @@ import { @@ -11,7 +11,7 @@ import {
} from '$lib/components/repo/type'
import { ensureRepoCollection } from './repos'
import {
extractGithubDetails,
cloneArrayToReadMeUrls,
selectRepoFromCollection,
} from '$lib/components/repo/utils'
import { get } from 'svelte/store'
@ -104,33 +104,34 @@ const ensureRepoReadme = async ( @@ -104,33 +104,34 @@ const ensureRepoReadme = async (
})
}
}
let text: string | undefined
try {
const github_details = clone
.map(extractGithubDetails)
.find((details) => !!details)
let res: Response
if (github_details) {
let readme_urls = cloneArrayToReadMeUrls(clone)
// prioritise using github as it doesn't require a proxy
readme_urls = [
...readme_urls.filter((url) => url.includes('raw.githubusercontent.com')),
...readme_urls.filter(
(url) => !url.includes('raw.githubusercontent.com')
),
]
for (let i = 0; i < readme_urls.length; i++) {
try {
res = await fetch(
`https://raw.githubusercontent.com/${github_details.org}/${github_details.repo_name}/HEAD/README.md`
const res = await fetch(
readme_urls[i].includes('raw.githubusercontent.com')
? readme_urls[i]
: // use proxy as most servers produce a CORS error
`/git_proxy/readme/${encodeURIComponent(readme_urls[i])}`
)
if (!res.ok) {
throw 'api request error'
if (res.ok) {
text = await res.text()
break
} else {
continue
}
} catch {
res = await fetch(
`https://raw.githubusercontent.com/${github_details.org}/${github_details.repo_name}/HEAD/readme.md`
)
continue
}
} else
res = await fetch(`/git_proxy/readme/${encodeURIComponent(clone[0])}`)
if (!res.ok) {
throw 'api request error'
}
let text = ''
text = await res.text()
update(text)
} catch (e) {
update()
}
} catch {}
update(text)
}

28
src/routes/git_proxy/readme/[clone]/+server.ts

@ -1,28 +0,0 @@ @@ -1,28 +0,0 @@
import { extractGithubDetails } from '$lib/components/repo/utils'
export const GET = async ({ params }: { params: { clone: string } }) => {
const github_details = extractGithubDetails(decodeURIComponent(params.clone))
if (github_details) {
const res = await fetch(
`https://raw.githubusercontent.com/${github_details.org}/${github_details.repo_name}/HEAD/README.md`
)
const text = await res.text()
return new Response(text)
} else {
// options:
// * add support for different git server implementations that serve raw
// files and cycle through the urls until we find the readme
// * add a worker that can use 'git archive' to get specific files
// * unfortunately the two options that can easily embeded within this
// sveltekit backend (wasm-git and isomorphicgit) don't support the
// 'archive' command
// https://github.com/petersalomonsen/wasm-git/
// https://github.com/isomorphic-git
// * 'git clone' is too expensive for retrieving single files. even when
// done using treeless or blobless flags. see:
// https://noise.getoto.net/2020/12/21/get-up-to-speed-with-partial-clone-and-shallow-clone/
return new Response(null)
}
}

39
src/routes/git_proxy/readme/[readme_url]/+server.ts

@ -0,0 +1,39 @@ @@ -0,0 +1,39 @@
import { error } from '@sveltejs/kit'
export const GET = async ({ params }: { params: { readme_url: string } }) => {
// prevent abuse of the proxy by ensuring the url contains 'readme.md'
if (
!(
params.readme_url.includes('readme.md') ||
params.readme_url.includes('README.md')
)
)
return new Response(null)
let text: string | undefined
try {
const res = await fetch(params.readme_url, {
signal: AbortSignal.timeout(5000),
})
if (res.ok) {
text = await res.text()
} else {
return error(res.status, res.statusText)
}
} catch {
return error(408, 'timeout')
}
return new Response(text || null)
// `https://raw.githubusercontent.com/${github_details.org}/${github_details.repo_name}/HEAD/README.md`
// alternative approaches:
// * add a worker that can use 'git archive' to get specific files
// * unfortunately the two options that can easily embeded within this
// sveltekit backend (wasm-git and isomorphicgit) don't support the
// 'archive' command
// https://github.com/petersalomonsen/wasm-git/
// https://github.com/isomorphic-git
// * 'git clone' is too expensive for retrieving single files. even when
// done using treeless or blobless flags. see:
// https://noise.getoto.net/2020/12/21/get-up-to-speed-with-partial-clone-and-shallow-clone/
}
Loading…
Cancel
Save