Search results

Algo

markdown.download

Handy microservice/library to convert various data sources into markdown. Intended to make it easier to consume the web in ereaders

Introductory blog post: https://taras.glek.net/post/markdown.download/

Package: https://jsr.io/@tarasglek/markdown-download

Features

  • Apply readability
  • Further convert article into markdown to simplify it
  • Allow webpages to be viewable as markdown via curl
  • Serve markdown converted to html to browsers
  • Extract youtube subtitles

Source

https://github.com/tarasglek/markdown-download

https://www.val.town/v/taras/markdown_download

License: MIT

Usage: https://markdown.download/ + URL

Dev: https://val.markdown.download/ + URL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import { isProbablyReaderable, Readability } from "npm:@mozilla/readability@^0.5.0";
import { DOMParser } from "npm:linkedom@0.16.10";
import { marked } from "npm:marked@12.0.1";
import { getSubtitles } from "npm:youtube-captions-scraper@^2.0.1";
import { YouTube } from "npm:youtube-sr@4.3.11";
const isCloudflareWorker = typeof Request !== "undefined" && typeof Response !== "undefined";
// init async loading of modules
const AgentMarkdownImport = isCloudflareWorker ? import("npm:agentmarkdown@6.0.0") : null;
const TurndownService = isCloudflareWorker ? null : await import("npm:turndown@^7.1.3");
/**
* converts HTML to markdown
* @returns markdown in string
*/
export async function html2markdown(html: string): Promise<string> {
if (AgentMarkdownImport) {
// TurndownService doesn't work on cf
// Dynamically import AgentMarkdown when running in Cloudflare Worker
const { AgentMarkdown } = await AgentMarkdownImport;
return await AgentMarkdown.produce(html);
} else {
// Dynamically import TurndownService otherwise
return new (await TurndownService)().turndown(html);
}
}
/**
* extracts article from html
* then converts it to md
* @returns markdown in string
*/
export async function readability2markdown(html: string): Promise<{ title: string; markdown: string }> {
const doc = await (new DOMParser().parseFromString(html, "text/html"));
const reader = new Readability(doc);
const article = reader.parse();
const markdown = await html2markdown(article?.content || "");
return { title: doc.title.textContent, markdown };
}
function getYoutubeVideoID(url: URL): string | null {
const regExp = /(?:youtube\.com\/(?:[^/]+\/.+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^"&?/\s]{11})/i;
const match = url.href.match(regExp);
return match ? match[1] : null;
}
function response(message: string, contentType = "text/markdown"): Response {
const headers = new Headers();
headers.set("Access-Control-Allow-Origin", "*");
headers.set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS");
headers.set("Access-Control-Allow-Headers", "Content-Type, Authorization");
headers.set("Access-Control-Max-Age", "86400");
headers.set("Content-Type", contentType);
return new Response(message, {
status: 200,
headers: headers,
});
}
function err(msg: string): Response {
const errorMessage = JSON.stringify({
error: {
message: msg,
code: 400,
},
});
return response(errorMessage, "application/json");
}
function fudgeURL(url: string) {
try {
return new URL(url);
} catch (e) {
// console.log("Url parsing failed", e.stack);
return new URL("https://" + url);
}
}
function processInput(req: Request) {
let ret = {
url: undefined as undefined | URL,
response: undefined as undefined | Response,
};
const myurl = new URL(req.url);
let pathname = myurl.pathname.substring(1) + myurl.search;
if (!pathname.startsWith("http")) {
const urlAsFormParam = myurl.searchParams.get("url");
if (urlAsFormParam) {
pathname = urlAsFormParam;
} else if (pathname.length < 2) {
ret.response = response(
generate_ui(
"URL to convert to markdown:",
"https://www.val.town/v/curtcox/markdown_download",
"markdown.download",
),
"text/html",

markdown.download

Handy microservice/library to convert various data sources into markdown. Intended to make it easier to consume the web in ereaders

Introductory blog post: https://taras.glek.net/post/markdown.download/

Package: https://jsr.io/@tarasglek/markdown-download

Features

  • Apply readability
  • Further convert article into markdown to simplify it
  • Allow webpages to be viewable as markdown via curl
  • Serve markdown converted to html to browsers
  • Extract youtube subtitles

Source

https://github.com/tarasglek/markdown-download

https://www.val.town/v/taras/markdown_download

License: MIT

Usage: https://markdown.download/ + URL

Dev: https://val.markdown.download/ + URL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import { isProbablyReaderable, Readability } from "npm:@mozilla/readability@^0.5.0";
import { DOMParser } from "npm:linkedom@0.16.10";
import { marked } from "npm:marked@12.0.1";
import { getSubtitles } from "npm:youtube-captions-scraper@^2.0.1";
import { YouTube } from "npm:youtube-sr@4.3.11";
const isCloudflareWorker = typeof Request !== "undefined" && typeof Response !== "undefined";
// init async loading of modules
const AgentMarkdownImport = isCloudflareWorker ? import("npm:agentmarkdown@6.0.0") : null;
const TurndownService = isCloudflareWorker ? null : await import("npm:turndown@^7.1.3");
/**
* converts HTML to markdown
* @returns markdown in string
*/
export async function html2markdown(html: string): Promise<string> {
if (AgentMarkdownImport) {
// TurndownService doesn't work on cf
// Dynamically import AgentMarkdown when running in Cloudflare Worker
const { AgentMarkdown } = await AgentMarkdownImport;
return await AgentMarkdown.produce(html);
} else {
// Dynamically import TurndownService otherwise
return new (await TurndownService)().turndown(html);
}
}
/**
* extracts article from html
* then converts it to md
* @returns markdown in string
*/
export async function readability2markdown(html: string): Promise<{ title: string; markdown: string }> {
const doc = await (new DOMParser().parseFromString(html, "text/html"));
const reader = new Readability(doc);
const article = reader.parse();
const markdown = await html2markdown(article?.content || "");
return { title: doc.title.textContent, markdown };
}
function getYoutubeVideoID(url: URL): string | null {
const regExp = /(?:youtube\.com\/(?:[^/]+\/.+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^"&?/\s]{11})/i;
const match = url.href.match(regExp);
return match ? match[1] : null;
}
function response(message: string, contentType = "text/markdown"): Response {
const headers = new Headers();
headers.set("Access-Control-Allow-Origin", "*");
headers.set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS");
headers.set("Access-Control-Allow-Headers", "Content-Type, Authorization");
headers.set("Access-Control-Max-Age", "86400");
headers.set("Content-Type", contentType);
return new Response(message, {
status: 200,
headers: headers,
});
}
function err(msg: string): Response {
const errorMessage = JSON.stringify({
error: {
message: msg,
code: 400,
},
});
return response(errorMessage, "application/json");
}
function fudgeURL(url: string) {
try {
return new URL(url);
} catch (e) {
// console.log("Url parsing failed", e.stack);
return new URL("https://" + url);
}
}
function processInput(req: Request) {
let ret = {
url: undefined as undefined | URL,
response: undefined as undefined | Response,
};
const myurl = new URL(req.url);
let pathname = myurl.pathname.substring(1) + myurl.search;
if (!pathname.startsWith("http")) {
const urlAsFormParam = myurl.searchParams.get("url");
if (urlAsFormParam) {
pathname = urlAsFormParam;
} else if (pathname.length < 2) {
ret.response = response(
generate_ui(
"URL to convert to markdown:",
"https://www.val.town/v/taras/markdown_download",
"markdown.download",
),
"text/html",

Becker’s Barley trellis

SSR chart with Observable Plot

This chart is rendered server-side by val.town, using Observable Plot, from data loaded from the GitHub API. For a more complete example, see https://www.val.town/v/fil.earthquakes. For information on this chart, see https://observablehq.com/@observablehq/plot-barley-trellis.

chart

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
export async function beckerBarley() {
const Plot = await import("https://cdn.jsdelivr.net/npm/@observablehq/plot@0.6.14/+esm");
const d3 = await import("https://cdn.jsdelivr.net/npm/d3@7/+esm");
const { document } = await import("https://cdn.jsdelivr.net/npm/linkedom@0.15/+esm").then((
{ parseHTML: p },
) => p(`<a>`));
const barley = await d3.csv(
"https://raw.githubusercontent.com/observablehq/plot/main/test/data/barley.csv",
d3.autoType,
);
const chart = Plot.plot({
document,
marginLeft: 110,
height: 800,
grid: true,
x: { nice: true },
y: { inset: 5 },
color: { type: "categorical" },
facet: { marginRight: 90 },
marks: [
Plot.frame(),
Plot.dot(barley, {
x: "yield",
y: "variety",
fy: "site",
stroke: "year",
sort: { fy: "x", y: "x", reduce: "median", reverse: true },
}),
],
});
return new Response(
`${chart}`.replace(
/^<svg /,
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" version=\"1.1\" ",
),
{ headers: { "Content-Type": "image/svg+xml" } },
);
}

Earthquake map 🌏

This val loads earthquake data from USGS, a topojson file for the land shape, and supporting libraries. It then creates a map and save it as a SVG string. The result is cached for a day. Note that we must strive to keep it under val.town’s limit of 100kB, hence the heavy simplification of the land shape. (For a simpler example, see becker barley.)

Web pagehttps://fil-earthquakes.web.val.run/
Observable Plot https://observablehq.com/plot/
linkedomhttps://github.com/WebReflection/linkedom
topojsonhttps://github.com/topojson/topojson
earthquakeshttps://earthquake.usgs.gov
worldhttps://observablehq.com/@visionscarto/world-atlas-topojson
csshttps://milligram.io/
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import { fetch } from "https://esm.town/v/std/fetch";
import { set } from "https://esm.town/v/std/set?v=11";
let { earthquakes_storage } = await import("https://esm.town/v/fil/earthquakes_storage");
export async function earthquakes(req?) {
const yesterday = new Date(-24 * 3600 * 1000 + +new Date()).toISOString();
if (!(earthquakes_storage?.date > yesterday)) {
const dataUrl = "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_day.geojson";
const worldUrl = "https://cdn.jsdelivr.net/npm/visionscarto-world-atlas@1/world/110m.json";
let [Plot, { document }, topojson, quakes, world] = await Promise.all([
import("https://cdn.jsdelivr.net/npm/@observablehq/plot@0.6.14/+esm"),
import("https://cdn.jsdelivr.net/npm/linkedom@0.15/+esm").then((l) => l.parseHTML("<a>")),
import("https://cdn.jsdelivr.net/npm/topojson@3/+esm"),
fetch(dataUrl).then((r) => r.json()),
fetch(worldUrl).then((r) => r.json()),
]);
world = topojson.presimplify(world, topojson.sphericalTriangleArea);
world = topojson.simplify(world, 0.0001);
const chart = Plot.plot({
document,
projection: { type: "equal-earth", rotate: [-10, 0] },
r: { type: "linear", domain: [0, 5], range: [0, 10] },
marks: [
Plot.geo(topojson.feature(world, world.objects.land)),
Plot.dot(
quakes.features,
Plot.centroid({
r: (d) => d.properties.mag,
fill: "red",
fillOpacity: 0.3,
}),
),
Plot.graticule(),
Plot.sphere(),
],
});
earthquakes_storage = {
date: new Date().toISOString(),
svg: `${chart}`.replaceAll(/(\.\d)\d+/g, "$1"),
};
await set(
"earthquakes_storage",
earthquakes_storage,
);
}
// If invoked through the web endpoint, return a web page.
return req instanceof Request
? new URL(req.url).searchParams.get("svg")
? new Response(
earthquakes_storage.svg.replace(
/^<svg /,
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" version=\"1.1\" ",
),
{ headers: { "Content-Type": "image/svg+xml" } },
)
: new Response(
`<body style="padding: 3em;"><h1>Earthquake map</h1><p>Data updated <abbr title="${earthquakes_storage.date}">daily</abbr>; source: USGS.</p>
${earthquakes_storage.svg}
<small><a href="https://www.val.town/v/fil.earthquakes"><b><i>vt</i></b> source code</a>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300italic,700,700italic">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.1/normalize.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/milligram/1.4.1/milligram.css">`,
{ headers: { "Content-Type": "text/html" } },
)
: earthquakes_storage;
}

markdown.download

Handy microservice/library to convert various data sources into markdown. Intended to make it easier to consume the web in ereaders

https://jsr.io/@tarasglek/markdown-download

Features

  • Apply readability
  • Further convert article into markdown to simplify it
  • Allow webpages to be viewable as markdown via curl
  • Serve markdown converted to html to browsers
  • Extract youtube subtitles

Source

https://github.com/tarasglek/markdown-download

https://www.val.town/v/taras/markdown_download

License: MIT

Usage: https://markdown.download/ + URL

Dev: https://val.markdown.download/ + URL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import { isProbablyReaderable, Readability } from "npm:@mozilla/readability@^0.5.0";
import { DOMParser } from "npm:linkedom@0.16.10";
import { marked } from "npm:marked@12.0.1";
import { getSubtitles } from "npm:youtube-captions-scraper@^2.0.1";
const isCloudflareWorker = typeof Request !== "undefined" && typeof Response !== "undefined";
// init async loading of modules
const AgentMarkdownImport = isCloudflareWorker ? import("npm:agentmarkdown@6.0.0") : null;
const TurndownService = isCloudflareWorker ? null : await import("npm:turndown@^7.1.3");
async function markdown2html(html: string): Promise<string> {
if (AgentMarkdownImport) {
// TurndownService doesn't work on cf
// Dynamically import AgentMarkdown when running in Cloudflare Worker
const { AgentMarkdown } = await AgentMarkdownImport;
return await AgentMarkdown.produce(html);
} else {
// Dynamically import TurndownService otherwise
return new (await TurndownService)().turndown(html);
}
}
function getYoutubeVideoID(url: URL): string | null {
const regExp = /(?:youtube\.com\/(?:[^/]+\/.+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^"&?/\s]{11})/i;
const match = url.href.match(regExp);
return match ? match[1] : null;
}
function response(message: string, contentType = "text/markdown"): Response {
const headers = new Headers();
headers.set("Access-Control-Allow-Origin", "*");
headers.set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS");
headers.set("Access-Control-Allow-Headers", "Content-Type, Authorization");
headers.set("Access-Control-Max-Age", "86400");
headers.set("Content-Type", contentType);
return new Response(message, {
status: 200,
headers: headers,
});
}
function err(msg: string): Response {
const errorMessage = JSON.stringify({
error: {
message: msg,
code: 400,
},
});
return response(errorMessage, "application/json");
}
function fudgeURL(url: string) {
try {
return new URL(url);
} catch (e) {
// console.log("Url parsing failed", e.stack);
return new URL("https://" + url);
}
}
function processInput(req: Request) {
let ret = {
url: undefined as undefined | URL,
response: undefined as undefined | Response,
};
const myurl = new URL(req.url);
let pathname = myurl.pathname.substring(1) + myurl.search;
if (!pathname.startsWith("http")) {
const urlAsFormParam = myurl.searchParams.get("url");
if (urlAsFormParam) {
pathname = urlAsFormParam;
} else if (pathname.length < 2) {
ret.response = response(
generate_ui(
"URL to convert to markdown:",
"https://www.val.town/v/taras/markdown_download",
"markdown.download",
),
"text/html",
);
return ret;
}
}
ret.url = fudgeURL(pathname);
return ret;
}
export default async function(req: Request): Promise<Response> {
const action = processInput(req);
const url = action.url;
if (!url) {
return action.response!;
}
const youtubeVideoID = getYoutubeVideoID(url);
if (youtubeVideoID) {
const arr = (await getSubtitles({
videoID: youtubeVideoID,
})) as { text: string }[];
const description = "## Generated Transcription\n\n"

markdown.download

Handy microservice/library to convert various data sources into markdown. Intended to make it easier to consume the web in ereaders

https://jsr.io/@tarasglek/markdown-download

Features

  • Apply readability
  • Further convert article into markdown to simplify it
  • Allow webpages to be viewable as markdown via curl
  • Serve markdown converted to html to browsers
  • Extract youtube subtitles

Source

https://github.com/tarasglek/markdown-download

https://www.val.town/v/taras/markdown_download

License: MIT

Usage: https://markdown.download/ + URL

Dev: https://val.markdown.download/ + URL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import { isProbablyReaderable, Readability } from "npm:@mozilla/readability@^0.5.0";
import { DOMParser } from "npm:linkedom@0.16.10";
import { marked } from "npm:marked@12.0.1";
import { getSubtitles } from "npm:youtube-captions-scraper@^2.0.1";
const isCloudflareWorker = typeof Request !== "undefined" && typeof Response !== "undefined";
// init async loading of modules
const AgentMarkdownImport = isCloudflareWorker ? import("npm:agentmarkdown@6.0.0") : null;
const TurndownService = isCloudflareWorker ? null : await import("npm:turndown@^7.1.3");
async function markdown2html(html: string): Promise<string> {
if (AgentMarkdownImport) {
// TurndownService doesn't work on cf
// Dynamically import AgentMarkdown when running in Cloudflare Worker
const { AgentMarkdown } = await AgentMarkdownImport;
return await AgentMarkdown.produce(html);
} else {
// Dynamically import TurndownService otherwise
return new (await TurndownService)().turndown(html);
}
}
function getYoutubeVideoID(url: URL): string | null {
const regExp = /(?:youtube\.com\/(?:[^/]+\/.+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^"&?/\s]{11})/i;
const match = url.href.match(regExp);
return match ? match[1] : null;
}
function response(message: string, contentType = "text/markdown"): Response {
const headers = new Headers();
headers.set("Access-Control-Allow-Origin", "*");
headers.set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS");
headers.set("Access-Control-Allow-Headers", "Content-Type, Authorization");
headers.set("Access-Control-Max-Age", "86400");
headers.set("Content-Type", contentType);
return new Response(message, {
status: 200,
headers: headers,
});
}
function err(msg: string): Response {
const errorMessage = JSON.stringify({
error: {
message: msg,
code: 400,
},
});
return response(errorMessage, "application/json");
}
function fudgeURL(url: string) {
try {
return new URL(url);
} catch (e) {
// console.log("Url parsing failed", e.stack);
return new URL("https://" + url);
}
}
function processInput(req: Request) {
let ret = {
url: undefined as undefined | URL,
response: undefined as undefined | Response,
};
const myurl = new URL(req.url);
let pathname = myurl.pathname.substring(1) + myurl.search;
if (!pathname.startsWith("http")) {
const urlAsFormParam = myurl.searchParams.get("url");
if (urlAsFormParam) {
pathname = urlAsFormParam;
} else if (pathname.length < 2) {
ret.response = response(
generate_ui(
"URL to convert to markdown:",
"https://www.val.town/v/taras/markdown_download",
"markdown.download",
),
"text/html",
);
return ret;
}
}
ret.url = fudgeURL(pathname);
return ret;
}
export default async function(req: Request): Promise<Response> {
const action = processInput(req);
const url = action.url;
if (!url) {
return action.response!;
}
const youtubeVideoID = getYoutubeVideoID(url);
if (youtubeVideoID) {
const arr = (await getSubtitles({
videoID: youtubeVideoID,
})) as { text: string }[];
const description = "## Generated Transcription\n\n"

This is a deno/valtown port in progress of https://github.com/tarasglek/scrape2md

License: MIT

Handy script to scrape various data sources into markdown. Intended to feed llms in https://chatcraft.org

Usage: https://taras-scrape2md.web.val.run/ + URL_TO_SCRAPE

Or just visit in browser and paste your url

TODO

https://chatcraft.org/api/share/tarasglek/IDYChVAilfePgVZb_T5pH POST from browser https://www.val.town/v/nbbaier/valToGH sync to github

Metadata for use with https://github.com/tarasglek/valtown2js:

{
  "typeCheck": false,
  "mappings": {
    "https://esm.sh/linkedom": {
      "name": "linkedom",
      "version": "^0.16.8"
    }
  },
  "package": {
    "name": "scrape2md",
    "version": "1.0.0",
    "devDependencies": {
      "@types/turndown": "^5.0.4"
    }
  }
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import { isProbablyReaderable, Readability } from "npm:@mozilla/readability@^0.5.0";
import { DOMParser } from "npm:linkedom@0.16.10";
import { marked } from "npm:marked@12.0.1";
import TurndownService from "npm:turndown@^7.1.3";
import { getSubtitles } from "npm:youtube-captions-scraper@^2.0.1";
function getYoutubeVideoID(url: URL): string | null {
const regExp = /(?:youtube\.com\/(?:[^/]+\/.+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^"&?/\s]{11})/i;
const match = url.href.match(regExp);
return match ? match[1] : null;
}
function response(message: string, contentType = "text/markdown"): Response {
const headers = new Headers();
headers.set("Access-Control-Allow-Origin", "*");
headers.set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS");
headers.set("Access-Control-Allow-Headers", "Content-Type, Authorization");
headers.set("Access-Control-Max-Age", "86400");
headers.set("Content-Type", contentType);
return new Response(message, {
status: 200,
headers: headers,
});
}
function err(msg: string): Response {
const errorMessage = JSON.stringify({
error: {
message: msg,
code: 400,
},
});
return response(errorMessage, "application/json");
}
export default async function(req: Request): Promise<Response> {
const myurl = new URL(req.url);
let pathname = myurl.pathname.substring(1) + myurl.search;
if (!pathname.startsWith("http")) {
const urlAsFormParam = myurl.searchParams.get("url");
if (urlAsFormParam) {
pathname = urlAsFormParam;
} else {
return response(html, "text/html");
}
}
const url = new URL(pathname);
const youtubeVideoID = getYoutubeVideoID(url);
if (youtubeVideoID) {
const arr = (await getSubtitles({
videoID: youtubeVideoID,
})) as { text: string }[];
const description = "## Generated Transcription\n\n"
+ arr.map(({ text }) => text).join("\n");
return response(description);
}
const dom_promise = fetch(url.toString(), {
method: req.method,
headers: new Headers({
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Sec-Fetch-Site": "cross-site",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-User": "?1",
"Sec-Fetch-Dest": "document",
"Referer": "https://www.google.com/",
"sec-ch-ua": `"Not A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"`,
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": `"macOS"`,
"Upgrade-Insecure-Requests": "1",
// Add any other headers you need here
}),
})
.then(r => r.text())
.then(async html => new DOMParser().parseFromString(html, "text/html"));
const doc = await dom_promise;
const reader = new Readability(doc);
const article = reader.parse();
console.log("content", typeof article?.content, article?.content);
const markdown = new TurndownService().turndown(article?.content || "") + "\n\n" + url;
if (req.headers.get("Accept")?.includes("text/html")) {
return response(await marked.parse(markdown), "text/html");
} else {
return response(markdown);
}
}
const html = `
<!DOCTYPE html>
<html>
<head>
<title>scrape2md ui</title>
<!-- Tailwind CSS -->

Check Bare Bones Tiki to see when their sea light swizzles become available.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import { DOMParser, Node } from "https://esm.sh/linkedom@0.16.1";
import { email } from "https://esm.town/v/std/email?v=9";
import { fetchText } from "https://esm.town/v/stevekrouse/fetchText?v=5";
function isHTMLElement(node: Node): node is HTMLElement {
return node.nodeType === Node.ELEMENT_NODE;
}
const URL = "https://www.barebonestiki.com/shop/swizzles";
// Toggle where an email should be sent ONLY when swizzles are available,
// or sent every time this is run, regardless of status.
const SEND_ONLY_WHEN_AVAILABLE = false;
export const webscrapeBareBonesTiki = async () => {
const html = await fetchText(URL);
const document = new DOMParser().parseFromString(html, "text/html");
// Grab every .grid-title, because the products listed on the page have
// identical classes, they're only differentiated by the text inside
const nodeList: NodeListOf<HTMLElement> = document.querySelectorAll(".grid-title");
// Find the title that matches the product we're interested in
const titleNodes = Array.from(nodeList).filter((node) => {
if (isHTMLElement(node)) {
return node.textContent?.trim() === "Sea Light Swizzles";
}
return false;
});
// Traversal time: go up to find the ancestor that contains both this title
// and the "sold out" status
const parent = titleNodes[0]?.closest(".grid-meta-wrapper");
// Then go back down to find the status element
const status = parent.querySelector(".grid-meta-status").textContent.trim();
const isAvailable = status !== "SOLD OUT";
if (!SEND_ONLY_WHEN_AVAILABLE || isAvailable) {
await email({
subject: `Sea Light Swizzles: ${isAvailable ? "Available! 🐡" : "Not Available 🚫"}`,
html: `<a href="${URL}">Sea Light Swizzles</a> are ${isAvailable ? "available!" : "currently sold out."}`,
});
}
};

D3 Chord diagram

Example taken from the D3 Gallery, and rendered (server-side) as a static SVG served through the web end point.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import { svgServer } from "https://esm.town/v/fil/svgServer";
export async function d3ChordDiagram(req) {
// Import D3 and create a DOM document for server-side-rendering.
const d3 = await import("npm:d3");
const document = await import("https://esm.sh/linkedom@0.15").then((l) =>
l.parseHTML("<a>").document
);
// Data
const data = Object.assign([
[11975, 5871, 8916, 2868],
[1951, 10048, 2060, 6171],
[8010, 16145, 8090, 8045],
[1013, 990, 940, 6907],
], {
names: ["black", "blond", "brown", "red"],
colors: ["#000000", "#ffdd89", "#957244", "#f26223"],
});
// Compute the SVG and return it through the web endpoint
return svgServer(req, chart(data).outerHTML);
//
// ======================================================
//
function chart(data) {
const width = 640;
const height = width;
const outerRadius = Math.min(width, height) * 0.5 - 30;
const innerRadius = outerRadius - 20;
const { names, colors } = data;
const sum = d3.sum(data.flat());
const tickStep = d3.tickStep(0, sum, 100);
const tickStepMajor = d3.tickStep(0, sum, 20);
const formatValue = d3.formatPrefix(",.0", tickStep);
const chord = d3.chord()
.padAngle(20 / innerRadius)
.sortSubgroups(d3.descending);
const arc = d3.arc()
.innerRadius(innerRadius)
.outerRadius(outerRadius);
const ribbon = d3.ribbon()
.radius(innerRadius);
const svg = d3.select(document.body).append("svg")
.attr("width", width)
.attr("height", height)
.attr("viewBox", [-width / 2, -height / 2, width, height])
.attr("style", "max-width: 100%; height: auto; font: 10px sans-serif;");
const chords = chord(data);
const group = svg.append("g")
.selectAll()
.data(chords.groups)
.join("g");
group.append("path")
.attr("fill", (d) => colors[d.index])
.attr("d", arc)
.append("title")
.text((d) => `${d.value.toLocaleString("en-US")} ${names[d.index]}`);
const groupTick = group.append("g")
.selectAll()
.data((d) => groupTicks(d, tickStep))
.join("g")
.attr(
"transform",
(d) =>
`rotate(${d.angle * 180 / Math.PI - 90}) translate(${outerRadius},0)`,
);
groupTick.append("line")
.attr("stroke", "currentColor")
.attr("x2", 6);
groupTick
.filter((d) => d.value % tickStepMajor === 0)
.append("text")
.attr("x", 8)
.attr("dy", ".35em")
.attr(
"transform",
(d) => d.angle > Math.PI ? "rotate(180) translate(-16)" : null,
)
.attr("text-anchor", (d) => d.angle > Math.PI ? "end" : null)
.text((d) => formatValue(d.value));
svg.append("g")
.attr("fill-opacity", 0.7)
.selectAll()
.data(chords)
.join("path")
.attr("d", ribbon)
.attr("fill", (d) => colors[d.target.index])
.attr("stroke", "white")
.append("title")
.text((d) =>
`${d.source.value.toLocaleString("en-US")} ${names[d.source.index]}${
names[d.target.index]
}${
d.source.index !== d.target.index
? `\n${d.target.value.toLocaleString("en-US")} ${
names[d.target.index]
}${names[d.source.index]}`
: ``
}`
);
return svg.node();
1
Next