taras-markdown_download.web.val.run
Readme

markdown.download

Handy microservice/library to convert various data sources into markdown. Intended to make it easier to consume the web in ereaders

Introductory blog post: https://taras.glek.net/post/markdown.download/

Package: https://jsr.io/@tarasglek/markdown-download

Features

  • Apply readability
  • Further convert article into markdown to simplify it
  • Allow webpages to be viewable as markdown via curl
  • Serve markdown converted to html to browsers
  • Extract youtube subtitles
  • Can paste htmlContent in
  • curl 'https://val.markdown.download/' -H 'content-type: application/x-www-form-urlencoded' --data-urlencode "htmlContent@$HOME/Downloads/oai-reference.html"

Source

https://github.com/tarasglek/markdown-download

https://www.val.town/v/taras/markdown_download

License: MIT

Usage: https://markdown.download/ + URL

Dev: https://val.markdown.download/ + URL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import { isProbablyReaderable, Readability } from "npm:@mozilla/readability@^0.5.0";
import { DOMParser } from "npm:linkedom@0.16.10";
import { marked } from "npm:marked@12.0.1";
import { getSubtitles } from "npm:youtube-captions-scraper@^2.0.1";
import { YouTube } from "npm:youtube-sr@4.3.11";
const isCloudflareWorker = typeof Request !== "undefined" && typeof Response !== "undefined";
// init async loading of modules
const AgentMarkdownImport = isCloudflareWorker ? import("npm:agentmarkdown@6.0.0") : null;
const TurndownService = isCloudflareWorker ? null : await import("npm:turndown@^7.1.3");
/**
* converts HTML to markdown
* @returns markdown in string
*/
export async function html2markdown(html: string): Promise<string> {
if (AgentMarkdownImport) {
// TurndownService doesn't work on cf
// Dynamically import AgentMarkdown when running in Cloudflare Worker
const { AgentMarkdown } = await AgentMarkdownImport;
return await AgentMarkdown.produce(html);
} else {
// Dynamically import TurndownService otherwise
return new (await TurndownService)().turndown(html);
}
}
/**
* extracts article from html
* then converts it to md
* @returns markdown in string
*/
export async function readability2markdown(html: string): Promise<{ title: string; markdown: string }> {
const doc = await (new DOMParser().parseFromString(html, "text/html"));
const reader = new Readability(doc);
const article = reader.parse();
const markdown = await html2markdown(article?.content || "");
return { title: doc.title.textContent, markdown };
}
function getYoutubeVideoID(url: URL): string | null {
const regExp = /(?:youtube\.com\/(?:[^/]+\/.+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^"&?/\s]{11})/i;
const match = url.href.match(regExp);
return match ? match[1] : null;
}
function response(message: string, contentType = "text/markdown"): Response {
const headers = new Headers();
headers.set("Access-Control-Allow-Origin", "*");
headers.set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS");
headers.set("Access-Control-Allow-Headers", "Content-Type, Authorization");
headers.set("Access-Control-Max-Age", "86400");
headers.set("Content-Type", contentType);
return new Response(message, {
status: 200,
headers: headers,
});
}
function err(msg: string): Response {
const errorMessage = JSON.stringify({
error: {
message: msg,
code: 400,
},
});
return response(errorMessage, "application/json");
}
function fudgeURL(url: string) {
try {
return new URL(url);
} catch (e) {
// console.log("Url parsing failed", e.stack);
return new URL("https://" + url);
}
}
function processInput(req: Request) {
let ret = {
url: undefined as undefined | URL,
response: undefined as undefined | Response,
};
const myurl = new URL(req.url);
let pathname = myurl.pathname.substring(1) + myurl.search;
if (!pathname.startsWith("http")) {
const urlAsFormParam = myurl.searchParams.get("url");
if (urlAsFormParam) {
pathname = urlAsFormParam;
} else if (pathname.length < 2) {
ret.response = response(
generate_ui(
"URL to convert to markdown:",
"https://www.val.town/v/taras/markdown_download",
"markdown.download",
),
"text/html",
Only the latest version can be previewed
Val Town is a social website to write and deploy JavaScript.
Build APIs and schedule functions from your browser.
Comments
Nobody has commented on this val yet: be the first!
v107
July 25, 2024