Avatar

curtcox

16 public vals
Joined January 27, 2024

markdown.download

Handy microservice/library to convert various data sources into markdown. Intended to make it easier to consume the web in ereaders

Introductory blog post: https://taras.glek.net/post/markdown.download/

Package: https://jsr.io/@tarasglek/markdown-download

Features

  • Apply readability
  • Further convert article into markdown to simplify it
  • Allow webpages to be viewable as markdown via curl
  • Serve markdown converted to html to browsers
  • Extract youtube subtitles

Source

https://github.com/tarasglek/markdown-download

https://www.val.town/v/taras/markdown_download

License: MIT

Usage: https://markdown.download/ + URL

Dev: https://val.markdown.download/ + URL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import { isProbablyReaderable, Readability } from "npm:@mozilla/readability@^0.5.0";
import { DOMParser } from "npm:linkedom@0.16.10";
import { marked } from "npm:marked@12.0.1";
import { getSubtitles } from "npm:youtube-captions-scraper@^2.0.1";
import { YouTube } from "npm:youtube-sr@4.3.11";
const isCloudflareWorker = typeof Request !== "undefined" && typeof Response !== "undefined";
// init async loading of modules
const AgentMarkdownImport = isCloudflareWorker ? import("npm:agentmarkdown@6.0.0") : null;
const TurndownService = isCloudflareWorker ? null : await import("npm:turndown@^7.1.3");
/**
* converts HTML to markdown
* @returns markdown in string
*/
export async function html2markdown(html: string): Promise<string> {
if (AgentMarkdownImport) {
// TurndownService doesn't work on cf
// Dynamically import AgentMarkdown when running in Cloudflare Worker
const { AgentMarkdown } = await AgentMarkdownImport;
return await AgentMarkdown.produce(html);
} else {
// Dynamically import TurndownService otherwise
return new (await TurndownService)().turndown(html);
}
}
/**
* extracts article from html
* then converts it to md
* @returns markdown in string
*/
export async function readability2markdown(html: string): Promise<{ title: string; markdown: string }> {
const doc = await (new DOMParser().parseFromString(html, "text/html"));
const reader = new Readability(doc);
const article = reader.parse();
const markdown = await html2markdown(article?.content || "");
return { title: doc.title.textContent, markdown };
}
function getYoutubeVideoID(url: URL): string | null {
const regExp = /(?:youtube\.com\/(?:[^/]+\/.+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^"&?/\s]{11})/i;
const match = url.href.match(regExp);
return match ? match[1] : null;
}
function response(message: string, contentType = "text/markdown"): Response {
const headers = new Headers();
headers.set("Access-Control-Allow-Origin", "*");
headers.set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS");
headers.set("Access-Control-Allow-Headers", "Content-Type, Authorization");
headers.set("Access-Control-Max-Age", "86400");
headers.set("Content-Type", contentType);
return new Response(message, {
status: 200,
headers: headers,
});
}
function err(msg: string): Response {
const errorMessage = JSON.stringify({
error: {
message: msg,
code: 400,
},
});
return response(errorMessage, "application/json");
}
function fudgeURL(url: string) {
try {
return new URL(url);
} catch (e) {
// console.log("Url parsing failed", e.stack);
return new URL("https://" + url);
}
}
function processInput(req: Request) {
let ret = {
url: undefined as undefined | URL,
response: undefined as undefined | Response,
};
const myurl = new URL(req.url);
let pathname = myurl.pathname.substring(1) + myurl.search;
if (!pathname.startsWith("http")) {
const urlAsFormParam = myurl.searchParams.get("url");
if (urlAsFormParam) {
pathname = urlAsFormParam;
} else if (pathname.length < 2) {
ret.response = response(
generate_ui(
"URL to convert to markdown:",
"https://www.val.town/v/curtcox/markdown_download",
"markdown.download",
),
"text/html",
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import {
CommandContext,
CommandDefinition,
} from "https://raw.githubusercontent.com/curtcox/CommandInterpreter/main/command/CommandDefinition.ts";
import { def_from_simple } from "https://raw.githubusercontent.com/curtcox/CommandInterpreter/main/command/ToolsForCommandWriters.ts";
async function markdown(url: string) {
const encoded = encodeURIComponent(url);
const response = await fetch(`https://markdown.download/?url=${encoded}`);
return await response.text();
}
export const command: CommandDefinition = def_from_simple({
name: "markdown",
doc: "return the contents of a specified URL as markdown.",
source: import.meta.url,
func: (_context: CommandContext, url: string) => markdown(url),
});
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import { email } from "https://esm.town/v/std/email";
import {
CommandContext,
CommandData,
CommandDefinition,
} from "https://raw.githubusercontent.com/curtcox/CommandInterpreter/main/CommandDefinition.ts";
const meta = {
name: "email",
doc: "send an email",
source: import.meta.url,
input_formats: ["EmailOptions"],
output_formats: ["text"],
};
const func = async (context: CommandContext, options: CommandData) => {
const result = await email({
subject: options.content.subject,
text: options.content.text,
});
return {
commands: context.commands,
output: {
format: "JSON",
content: result,
},
};
};
export interface EmailOptions {
subject: string;
text: string;
}
export const command: CommandDefinition = {
meta,
func,
};

This is public so you can see what's in it. If it had real secrets, it should be private. See https://www.val.town/v/curtcox/reply_to_slack_message

1
2
3
4
5
6
7
8
9
10
11
12
import { processor } from "https://esm.town/v/curtcox/message_processor?v=17";
import { reply_to_slack_message, SlackConfig } from "https://esm.town/v/curtcox/reply_to_slack_message";
// This is for informational purposes only.
// It needs real tokens to work.
export const chatio = (req: Request) => {
const config: SlackConfig = {
slackToken: "See Val Town docs for how to get this token",
slackVerificationToken: "See Val Town docs for how to get this token",
};
return reply_to_slack_message(req, processor, config);
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
export function h2(value) { return `<h2>${value}</h2>`; }
export function li(value) { return `<li>${value}</li>`; }
export function td(value) { return `<td>${value}</td>`; }
export function th(value) { return `<th>${value}</th>`; }
export function tr(...cells) { return `<tr>${cells.join('')}</tr>`; }
export function ul(...items) { return `<ul>${items.join('')}</ul>`; }
export function table(...items) { return `<table>${items.join('')}</table>`; }
export function bordered(...items) { return `<table border>${items.join('')}</table>`; }
export function a(href,label) { return `<a href="${href}">${label}</a>`; }
export function download(label,href,filename) { return `<a href="${href}" download="${filename}">${label}</a>`; }
export function img(src) { return `<img src="${src}" alt="photo for ${src}" width="100">`}
export function p(value) { return `<p>${value}</p>`; }
export function pre(value) { return `<pre>${value}</pre>`; }
export function details(summary,details) { return `<details><summary>${summary}</summary>${details}</details>`; }
export function html(...items) { return `<!DOCTYPE html><html>${items.join('')}</html>`; }
export function head(...items) { return `<head>${items.join('')}</head>`; }
export function body(...items) { return `<body">${items.join('')}</body>`; }
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import { a, table, td, tr } from "https://esm.town/v/curtcox/Html";
import { asParts, follow } from "https://esm.town/v/curtcox/Object";
function objectable(obj: any): string {
let rows = "";
const parts = asParts(obj);
for (const key in parts) {
const part = parts[key];
const name = part.key;
rows = rows + tr(td(a(name + "/", name)), td(part.type), td(part.value), td(part.parts));
}
return table(rows);
}
function trimmed(input: string): string {
const max = 512;
if (input.length <= max) {
return input;
} else {
const truncated = input.slice(0, max);
const remaining = input.length - max;
return `${truncated}... ${remaining} remaining not shown`;
}
}
function summary(chain, at) {
const name = chain.length > 0 ? chain.at(-1) : "Roots";
const type = typeof at;
const str = trimmed(Deno.inspect(at));
return `${name} ${type} ${str} `;
}
function body(request: Request) {
const roots = { Deno, request, globalThis };
const chain = pathSegments(request);
const at = follow(roots, chain);
return summary(chain, at) + objectable(at);
}
function pathSegments(request: Request) {
const url = new URL(request.url);
const pathSegments = url.pathname.split("/");
return pathSegments.filter(segment => segment.length > 0);
}
export const htmlExample = (req: Request) =>
new Response(body(req), {
headers: {
"Content-Type": "text/html",
},
});
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
const self = "this";
export function follow(obj: any, chain: (string | number)[]): any {
let current = obj;
for (const key of chain) {
try {
current = current[key];
} catch (error) {
current = error;
}
}
return current;
}
function trimmed(input: string): string {
const max = 500;
if (input.length <= max) {
return input;
} else {
const truncated = input.slice(0, max);
const remaining = input.length - max;
return `${truncated}... ${remaining} remaining not shown`;
}
}
export interface ObjectPart {
key: string;
type: string;
value: string;
parts: number;
}
export function asParts(obj: any): ObjectPart[] {
const objectParts: ObjectPart[] = [];
const record = asMap(obj);
for (const key in record) {
const value = record[key];
if (value[self] && typeof value[self].count === "number") {
const { count, obj, type } = value[self];
const stringValue = obj !== undefined ? String(obj) : "";
objectParts.push({
key,
type,
value: trimmed(stringValue),
parts: count,
});
}
}
return objectParts;
}
export function asMap(obj: any, deep: number = 1): Record<string, any> {
const objectMap: Record<string, any> = {};
// Bind the object to globalThis
const globalObj = globalThis.Object(obj);
// Traverse the prototype chain
let currentObj = globalObj;
let count = 0;
while (currentObj !== null) {
// Get all own properties (including non-enumerable ones) of the current object
const properties = Object.getOwnPropertyNames(currentObj);
for (const property of properties) {
count = count + 1;
if (deep > 0) {
try {
const value = asMap(globalObj[property], deep - 1);
objectMap[property] = value;
} catch (error) {
objectMap[property] = error;
}
}
}
// Move to the next object in the prototype chain
currentObj = Object.getPrototypeOf(currentObj);
}
const type = typeof obj;
objectMap[self] = { count, obj, type };
return objectMap;
}

Blob Admin

This is a lightweight Blob Admin interface to view and debug your Blob data.

b7321ca2cd80899250589b9aa08bc3cae9c7cea276282561194e7fc537259b46.png

Forl this val to install:

Install

It uses basic authentication with your Val Town API Token as the password (leave the username field blank).

TODO

  • handle non-textual blobs properly
  • upload a blob by dragging it in (ondrop dropzone on the whole homepage)
  • add upload/download buttons
  • merge edit and view pages
  • add client side navigation using htmx
  • use codemirror instead of a textarea for editing text blobs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
/** @jsxImportSource https://esm.sh/hono@4.0.8/jsx **/
import { modifyFetchHandler } from "https://esm.town/v/andreterron/codeOnValTown?v=50";
import view_route from "https://esm.town/v/pomdtr/blob_admin_blob";
import create_route from "https://esm.town/v/pomdtr/blob_admin_create";
import delete_route from "https://esm.town/v/pomdtr/blob_admin_delete";
import edit_route from "https://esm.town/v/pomdtr/blob_admin_edit";
import { passwordAuth } from "https://esm.town/v/pomdtr/password_auth?v=74";
import { blob } from "https://esm.town/v/std/blob?v=11";
import { Hono } from "npm:hono@4.0.8";
import { jsxRenderer } from "npm:hono@4.0.8/jsx-renderer";
const app = new Hono();
app.use(
jsxRenderer(({ children }) => {
return (
<html>
<head>
<link
rel="stylesheet"
href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.min.css"
/>
<title>Blob Admin</title>
</head>
<body>
<main class="container">
{children}
</main>
</body>
</html>
);
}),
);
app.get("/", async (c) => {
let blobs = await blob.list();
return c.render(
<div class="overflow-auto">
<h1>Blob Admin</h1>
<a href="/create" style={{ marginBottom: "1em", display: "inline-block" }}>New Blob</a>
<div>
<table>
<thead>
<tr>
<th>Name</th>
<th>Size (kb)</th>
<th>Last Modified</th>
<th>Edit</th>
<th>Delete</th>
<th>Download</th>
</tr>
</thead>
{blobs.map(b => (
<tr>
<td>
<a href={`/view/${encodeURIComponent(b.key)}`}>
{b.key}
</a>
</td>
<td>{b.size / 1000}</td>
<td>{new Date(b.lastModified).toLocaleString()}</td>
<td>
<a href={`/edit/${encodeURIComponent(b.key)}`}>✍️</a>
</td>
<td>
<a href={`/delete/${encodeURIComponent(b.key)}`}>🗑️</a>
</td>
<td>
<a href={`/download/${encodeURIComponent(b.key)}`}>💾</a>
</td>
</tr>
))}
</table>
</div>
</div>,
);
});
app.route("/create", create_route);
app.route("/view", view_route);
app.route("/edit", edit_route);
app.route("/delete", delete_route);
app.get("/download/:key", (c) => {
return blob.get(c.req.param("key"));
});
export default modifyFetchHandler(passwordAuth(app.fetch));
1
2
3
4
import { chatio } from "https://esm.town/v/curtcox/_cve";
export const Chatio = (req: Request) => {
return chatio(req);
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import { command_processor } from "https://esm.town/v/curtcox/command_processor?v=14";
function tail(text: string): string {
const trimmed = text.trimStart();
const index = trimmed.indexOf(" ");
if (index !== -1) {
return trimmed.substring(index + 1);
} else {
return "";
}
}
export const processor = async (body) => {
// const text = JSON.stringify(body);
console.log("command processor " + body);
const command = tail(body.event.text); // discard user
return command_processor(command);
};