From 19471e7c190cab4f2f823fb4e279066c5e211c5f Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 2 Apr 2024 16:35:16 +0100 Subject: [PATCH] feat: display readme from more sources instead of just github. unfortunately most servers have CORS enabled so a proxy is needed --- src/lib/components/repo/utils.spec.ts | 100 +++++++++++++++--- src/lib/components/repo/utils.ts | 66 +++++++++--- src/lib/stores/repo.ts | 47 ++++---- .../git_proxy/readme/[clone]/+server.ts | 28 ----- .../git_proxy/readme/[readme_url]/+server.ts | 39 +++++++ 5 files changed, 201 insertions(+), 79 deletions(-) delete mode 100644 src/routes/git_proxy/readme/[clone]/+server.ts create mode 100644 src/routes/git_proxy/readme/[readme_url]/+server.ts diff --git a/src/lib/components/repo/utils.spec.ts b/src/lib/components/repo/utils.spec.ts index fd16572..f7d9270 100644 --- a/src/lib/components/repo/utils.spec.ts +++ b/src/lib/components/repo/utils.spec.ts @@ -1,5 +1,5 @@ import { describe, expect, test } from 'vitest' -import { extractGithubDetails, selectRepoFromCollection } from './utils' +import { cloneArrayToReadMeUrls, selectRepoFromCollection } from './utils' import { collection_defaults, event_defaults, @@ -160,18 +160,92 @@ describe('getSelectedRepo', () => { }) }) -describe('extractGithubDetails', () => { - ;[ - 'https://github.com/orgname/reponame.git', - 'https://github.com/orgname/reponame', - 'git@github.com:orgname/reponame', - ].forEach((clone) => { - describe(clone, () => { - test('returns correct org and repo', () => { - const res = extractGithubDetails(clone) - expect(res?.org).toEqual('orgname') - expect(res?.repo_name).toEqual('reponame') - }) +describe('cloneArrayToReadMeUrls', () => { + test('for each clone url returns url to /raw/HEAD/README.md and /raw/HEAD/readme.md', () => { + expect( + cloneArrayToReadMeUrls([ + 'https://gitea.com/orgname/reponame', + 'https://gitlab.com/orgname/reponame', + ]) + ).toEqual([ + 'https://gitea.com/orgname/reponame/raw/HEAD/README.md', + 'https://gitea.com/orgname/reponame/raw/HEAD/readme.md', + 'https://gitlab.com/orgname/reponame/raw/HEAD/README.md', + 'https://gitlab.com/orgname/reponame/raw/HEAD/readme.md', + ]) + }) + test('for github link use raw.githubusercontent.com/HEAD', () => { + expect( + cloneArrayToReadMeUrls(['https://github.com/orgname/reponame']) + ).toEqual([ + 'https://raw.githubusercontent.com/HEAD/README.md', + 'https://raw.githubusercontent.com/HEAD/readme.md', + ]) + }) + test('for sr.hr link to /blob/HEAD', () => { + expect(cloneArrayToReadMeUrls(['https://sr.ht/~orgname/reponame'])).toEqual( + [ + 'https://sr.ht/~orgname/reponame/blob/HEAD/README.md', + 'https://sr.ht/~orgname/reponame/blob/HEAD/readme.md', + ] + ) + }) + test('for git.launchpad.net link to /plain', () => { + expect( + cloneArrayToReadMeUrls(['https://git.launchpad.net/orgname/reponame']) + ).toEqual([ + 'https://git.launchpad.net/orgname/reponame/plain/README.md', + 'https://git.launchpad.net/orgname/reponame/plain/readme.md', + ]) + }) + test('for git.savannah.gnu.org link to /plain', () => { + expect( + cloneArrayToReadMeUrls(['https://git.savannah.gnu.org/orgname/reponame']) + ).toEqual([ + 'https://git.savannah.gnu.org/orgname/reponame/plain/README.md', + 'https://git.savannah.gnu.org/orgname/reponame/plain/readme.md', + ]) + }) + describe('transform clone address to url', () => { + test('strips trailing / from address', () => { + expect( + cloneArrayToReadMeUrls(['https://codeberg.org/orgname/reponame/']) + ).toEqual([ + 'https://codeberg.org/orgname/reponame/raw/HEAD/README.md', + 'https://codeberg.org/orgname/reponame/raw/HEAD/readme.md', + ]) + }) + test('strips .git from address', () => { + expect( + cloneArrayToReadMeUrls(['https://codeberg.org/orgname/reponame.git']) + ).toEqual([ + 'https://codeberg.org/orgname/reponame/raw/HEAD/README.md', + 'https://codeberg.org/orgname/reponame/raw/HEAD/readme.md', + ]) + }) + test('git@codeberg.org:orgname/reponame.git to address', () => { + expect( + cloneArrayToReadMeUrls(['git@codeberg.org:orgname/reponame.git']) + ).toEqual([ + 'https://codeberg.org/orgname/reponame/raw/HEAD/README.md', + 'https://codeberg.org/orgname/reponame/raw/HEAD/readme.md', + ]) + }) + test('ssh://codeberg.org/orgname/reponame to address', () => { + expect( + cloneArrayToReadMeUrls(['ssh://codeberg.org/orgname/reponame']) + ).toEqual([ + 'https://codeberg.org/orgname/reponame/raw/HEAD/README.md', + 'https://codeberg.org/orgname/reponame/raw/HEAD/readme.md', + ]) + }) + test('https://custom.com/deep/deeper/deeper to address', () => { + expect( + cloneArrayToReadMeUrls(['https://custom.com/deep/deeper/deeper']) + ).toEqual([ + 'https://custom.com/deep/deeper/deeper/raw/HEAD/README.md', + 'https://custom.com/deep/deeper/deeper/raw/HEAD/readme.md', + ]) }) }) }) diff --git a/src/lib/components/repo/utils.ts b/src/lib/components/repo/utils.ts index e8bf1ee..d9f8cce 100644 --- a/src/lib/components/repo/utils.ts +++ b/src/lib/components/repo/utils.ts @@ -16,20 +16,56 @@ export const selectRepoFromCollection = ( })[0] } -export const extractGithubDetails = ( - clone: string -): { org: string; repo_name: string } | undefined => { - if (clone.indexOf('github.') > -1) { - const g_split = clone.split('github.') - if (g_split.length > 0) { - const final = g_split[1].replace(':', '/').split('/') - if (final.length > 2) { - return { - org: final[1], - repo_name: final[2].split('.')[0], - } +/** most servers will produce a CORS error so a proxy should be used */ +export const cloneArrayToReadMeUrls = (clone: string[]): string[] => { + const addresses = clone.map(extractRepoAddress) + /** + * at the time of this commit these urls work for: + * self-hosted gitea (or forgejo), gitlab + * github.com + * bitbucket.org + * gitlab.org + * gitea.com + * codeberg.org (forgejo instance) + * sourcehut (git.sr.ht) + * launchpad.net + * It doesnt work for: + * self-hosted gogs (requires branch name repo/raw/master/README.md) + * sourceforge.net (https://sourceforge.net/p/mingw/catgets/ci/master/tree/README?format=raw) + * notabug.org (requires branch name notabug.org/org/repo/raw/master/README.md) + */ + return [ + ...addresses.flatMap((address) => { + let prefix = 'raw/HEAD' + if (address.includes('sr.ht')) prefix = 'blob/HEAD' + if ( + address.includes('git.launchpad.net') || + address.includes('git.savannah.gnu.org') + ) + prefix = 'plain' + if (address.includes('github.com')) { + // raw.githubusercontent.com can be used without CORS error + address = address.replace('github.com', 'raw.githubusercontent.com') + prefix = 'HEAD' } - } - } - return undefined + return ['README.md', 'readme.md'].map( + (filename) => `https://${address}/${prefix}/${filename}` + ) + }), + ] +} + +const extractRepoAddress = (clone_string: string): string => { + let s = clone_string + // remove trailing slash + if (s.endsWith('/')) s = s.substring(0, s.length - 1) + // remove trailing .git + if (s.endsWith('.git')) s = s.substring(0, s.length - 4) + // remove :// and anything before + if (s.includes('://')) s = s.split('://')[1] + // remove @ and anything before + if (s.includes('@')) s = s.split('@')[1] + // replace : with / + s = s.replace(':', '/') + return s } diff --git a/src/lib/stores/repo.ts b/src/lib/stores/repo.ts index 8056d01..9ecb58b 100644 --- a/src/lib/stores/repo.ts +++ b/src/lib/stores/repo.ts @@ -11,7 +11,7 @@ import { } from '$lib/components/repo/type' import { ensureRepoCollection } from './repos' import { - extractGithubDetails, + cloneArrayToReadMeUrls, selectRepoFromCollection, } from '$lib/components/repo/utils' import { get } from 'svelte/store' @@ -104,33 +104,34 @@ const ensureRepoReadme = async ( }) } } + let text: string | undefined try { - const github_details = clone - .map(extractGithubDetails) - .find((details) => !!details) - let res: Response - if (github_details) { + let readme_urls = cloneArrayToReadMeUrls(clone) + // prioritise using github as it doesn't require a proxy + readme_urls = [ + ...readme_urls.filter((url) => url.includes('raw.githubusercontent.com')), + ...readme_urls.filter( + (url) => !url.includes('raw.githubusercontent.com') + ), + ] + for (let i = 0; i < readme_urls.length; i++) { try { - res = await fetch( - `https://raw.githubusercontent.com/${github_details.org}/${github_details.repo_name}/HEAD/README.md` + const res = await fetch( + readme_urls[i].includes('raw.githubusercontent.com') + ? readme_urls[i] + : // use proxy as most servers produce a CORS error + `/git_proxy/readme/${encodeURIComponent(readme_urls[i])}` ) - if (!res.ok) { - throw 'api request error' + if (res.ok) { + text = await res.text() + break + } else { + continue } } catch { - res = await fetch( - `https://raw.githubusercontent.com/${github_details.org}/${github_details.repo_name}/HEAD/readme.md` - ) + continue } - } else - res = await fetch(`/git_proxy/readme/${encodeURIComponent(clone[0])}`) - if (!res.ok) { - throw 'api request error' } - let text = '' - text = await res.text() - update(text) - } catch (e) { - update() - } + } catch {} + update(text) } diff --git a/src/routes/git_proxy/readme/[clone]/+server.ts b/src/routes/git_proxy/readme/[clone]/+server.ts deleted file mode 100644 index e126df4..0000000 --- a/src/routes/git_proxy/readme/[clone]/+server.ts +++ /dev/null @@ -1,28 +0,0 @@ -import { extractGithubDetails } from '$lib/components/repo/utils' - -export const GET = async ({ params }: { params: { clone: string } }) => { - const github_details = extractGithubDetails(decodeURIComponent(params.clone)) - if (github_details) { - const res = await fetch( - `https://raw.githubusercontent.com/${github_details.org}/${github_details.repo_name}/HEAD/README.md` - ) - const text = await res.text() - - return new Response(text) - } else { - // options: - // * add support for different git server implementations that serve raw - // files and cycle through the urls until we find the readme - // * add a worker that can use 'git archive' to get specific files - // * unfortunately the two options that can easily embeded within this - // sveltekit backend (wasm-git and isomorphicgit) don't support the - // 'archive' command - // https://github.com/petersalomonsen/wasm-git/ - // https://github.com/isomorphic-git - // * 'git clone' is too expensive for retrieving single files. even when - // done using treeless or blobless flags. see: - // https://noise.getoto.net/2020/12/21/get-up-to-speed-with-partial-clone-and-shallow-clone/ - - return new Response(null) - } -} diff --git a/src/routes/git_proxy/readme/[readme_url]/+server.ts b/src/routes/git_proxy/readme/[readme_url]/+server.ts new file mode 100644 index 0000000..d0e3b77 --- /dev/null +++ b/src/routes/git_proxy/readme/[readme_url]/+server.ts @@ -0,0 +1,39 @@ +import { error } from '@sveltejs/kit' + +export const GET = async ({ params }: { params: { readme_url: string } }) => { + // prevent abuse of the proxy by ensuring the url contains 'readme.md' + if ( + !( + params.readme_url.includes('readme.md') || + params.readme_url.includes('README.md') + ) + ) + return new Response(null) + + let text: string | undefined + try { + const res = await fetch(params.readme_url, { + signal: AbortSignal.timeout(5000), + }) + if (res.ok) { + text = await res.text() + } else { + return error(res.status, res.statusText) + } + } catch { + return error(408, 'timeout') + } + return new Response(text || null) + + // `https://raw.githubusercontent.com/${github_details.org}/${github_details.repo_name}/HEAD/README.md` + // alternative approaches: + // * add a worker that can use 'git archive' to get specific files + // * unfortunately the two options that can easily embeded within this + // sveltekit backend (wasm-git and isomorphicgit) don't support the + // 'archive' command + // https://github.com/petersalomonsen/wasm-git/ + // https://github.com/isomorphic-git + // * 'git clone' is too expensive for retrieving single files. even when + // done using treeless or blobless flags. see: + // https://noise.getoto.net/2020/12/21/get-up-to-speed-with-partial-clone-and-shallow-clone/ +}