Back to packages list

Vals using cheerio

Description from the NPM package:
Tiny, fast, and elegant implementation of core jQuery designed specifically for the server
1
2
3
4
5
6
7
8
9
10
import { fetchText } from "https://esm.town/v/stevekrouse/fetchText?v=6";
import { html, load } from "npm:cheerio";
const htmlStr = await fetchText(
"https://archive.is/pPFRB",
);
const $ = load(htmlStr);
// Cheerio accepts a CSS selector, here we pick the second <p>
const intro = $.html();
console.log(intro);
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
/** @jsxImportSource https://esm.sh/preact */
import cheerio from "npm:cheerio";
import { render } from "npm:preact-render-to-string";
export let isBoeing = async (airline, number) => {
const response = await fetch("https://www.flightstats.com/v2/flight-tracker/" + airline + "/" + number);
const body = await response.text();
const $ = cheerio.load(body);
const boeingElements = $("h5").filter(function() {
return $(this).text().includes("Boeing");
});
let aircraft = [];
boeingElements.each(function() {
aircraft.push($(this).text());
});
return aircraft;
};
async function getFormData(req: Request) {
const data = await req.formData();
const aircraft = ("" + data.get("aircraft")).replace(" ", "");
if (aircraft.toLowerCase().includes("swa")) {
return {
boeingAircraft: ["Southwest only flies Boeing 737s"],
};
}
let index = 2;
let letters = aircraft.slice(0, index);
let numbers = aircraft.slice(index);
const boeingAircraft = await isBoeing(aircraft.slice(0, index), aircraft.slice(index));
return {
boeingAircraft: boeingAircraft,
};
}
function displayPlaneName(formData) {
if (formData && formData.boeingAircraft && formData.boeingAircraft.length > 0) {
return "πŸ’€ " + formData.boeingAircraft[0] + " πŸ“‰";
}
return "πŸ₯³ Not a Boeing πŸ“ˆ";
}
export default async function(req: Request) {
return new Response(
render(
<html>
<head>
<title>Is My Plane a Boeing?</title>
<script src="https://cdn.tailwindcss.com"></script>
<script src="floating.js"></script>
</head>
<body class="bg-gray-50 h-screen flex justify-center items-center">
<div class="container max-w-md mx-auto p-8 md:p-12 bg-white rounded-lg shadow">
<form method="POST" class="space-y-4">
<div>
<label for="aircraft" class="block text-sm font-medium text-gray-700">Flight Number</label>
<input
name="aircraft"
id="aircraft"
class="mt-1 block w-full px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm"
/>
</div>
<button
type="submit"
class="w-full flex justify-center py-2 px-4 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500"
>
Is My Plane a Boeing?
</button>
</form>
<div class="mt-4">
{req.method === "POST"
? (
<>
<span class="block text-sm font-medium text-gray-700">Aircraft:</span>
<pre class="mt-1 block w-full px-3 py-2 bg-gray-100 text-gray-800 border border-gray-300 rounded-md shadow-sm sm:text-sm">{displayPlaneName(await getFormData(req))}</pre>
</>
)
: null}
</div>
</div>
</body>
</html>,
),
{
headers: {
"Content-Type": "text/html; charset=utf-8",
},
},
);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import cheerio from "npm:cheerio";
export default async function tacoBellNutritionScrapper(req) {
console.log(req.params);
const sourceUrl = `https://www.nutritionix.com/taco-bell/menu/premium`;
const siteText = await fetch(sourceUrl);
// const text = await siteText.text();
const $ = cheerio.load(await siteText.text());
const items = $("tr.odd, tr.even").map((i, e) => {
const name = $(e).find(".nmItem").html();
const calories = $(e).find("[aria-label*='Calories']").html();
return { name: name, calories: calories };
}).toArray();
return Response.json({ items });
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
/** @jsxImportSource npm:react **/
import { fetchText } from "https://esm.town/v/stevekrouse/fetchText?v=6";
import { load } from "npm:cheerio";
import { renderToString } from "npm:react-dom@18/server";
export default async (req: Request) => {
const html = await fetchText(
"https://www.moongiant.com/phase/today/",
);
const $ = load(html);
const todayContents = [...$("#today_").contents()].filter(e => e.type === "text" && $(e).text().trim())
.map(e => $(e).text().trim());
const moonIcon = `:${todayContents[0].toLowerCase().replace(" ", "_")}_moon:`;
console.log(moonIcon);
/**
*
* //This is the way Slack wants it.
* {
"status_text": "riding a train",
"status_emoji": ":mountain_railway:",
"status_expiration": 0
}
*
*
* */
return new Response(
renderToString(
<html>
<link rel="stylesheet" href="https://unpkg.com/missing.css@1.1.1" />
<main>
<h1>{moonIcon}</h1>
</main>
</html>,
),
{ headers: { "Content-Type": "text/html" } },
);
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import cheerio from "npm:cheerio";
const DIAMOND_LEAGUE_EVENT_URL = "https://shanghai.diamondleague.com/programme-results/programme-results-shanghai/";
function normalizeURL(url: string) {
return url.startsWith("http://") || url.startsWith("https://")
? url
: "http://" + url;
}
async function fetchText(url: string, options?: any) {
const response = await fetch(normalizeURL(url), {
redirect: "follow",
...(options || {}),
});
return response.text();
}
export const webScrapeBytesNewsletter = async () => {
const html = await fetchText(DIAMOND_LEAGUE_EVENT_URL);
const $ = cheerio.load(html);
const $rows = $(".row");
console.log($rows);
return {
$rows,
};
};
webScrapeBytesNewsletter();
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import cheerio from "npm:cheerio";
const FARGO_GOLF_URL = "https://foreupsoftware.com/index.php/booking/a/19956/18#/teetimes";
function normalizeURL(url: string) {
return url.startsWith("http://") || url.startsWith("https://")
? url
: "http://" + url;
}
async function fetchText(url: string, options?: any) {
const response = await fetch(normalizeURL(url), {
redirect: "follow",
...(options || {}),
});
return response.text();
}
const daysSelector = "td.day:not(.disabled), td.active.day:not(.disabled)";
const teeTimesListSelector = ".times-inner";
export const webScrapeTeeTimes = async () => {
const html = await fetchText(FARGO_GOLF_URL);
const $ = cheerio.load(html);
window.querySelector("#content > div > button").click();
const availableDays = document.querySelectorAll(daysSelector);
// code to select the destired day here
const availableTeeTimes = $(".time-tile-ob-no-details");
console.log(availableTeeTimes.length);
// const teeTimes = [];
// availableTeeTimes.forEach((teeTime) => {
// const time = teeTime.querySelector('times-booking-start-time-label');
// const course = teeTime.querySelector('times-booking-teesheet-name');
// const playerCount = teeTime.querySelector('time-summary-ob-player-count');
// });
// const latestIssueSection = $("main > :nth-child(2)");
// const title = latestIssueSection
// .find("a")
// .children()
// .eq(1)
// .find("h3")
// .children()
// .eq(1)
// .text();
// const articleNumber = latestIssueSection
// .find("a")
// .children()
// .eq(1)
// .find("h3")
// .children()
// .eq(0)
// .text();
// const date = latestIssueSection
// .find("a")
// .children()
// .eq(1)
// .find("div > div > span")
// .text();
// return {
// id: Number(articleNumber.split(" ")[1]),
// title,
// date,
// };
};
webScrapeTeeTimes();
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import cheerio from "npm:cheerio";
export default async function mylittlescraper(req) {
console.log(req.params);
const sourceUrl = `https://ssb.ua.edu/pls/PROD/ua_bwckschd.p_disp_detail_sched?term_in=202340&crn_in=43971`;
let siteText = await fetch(sourceUrl);
const $ = cheerio.load(await siteText.text());
const $numclass = $("table.datadisplaytable:nth-child(16) > tbody:nth-child(2) > tr:nth-child(2) > td:nth-child(3)");
const numberOfSeats = $numclass.text();
let isOpen = false;
if (Number.parseInt(numberOfSeats) > 0) {
isOpen = true;
}
return Response.json({ open: isOpen });
}
1
2
3
4
5
6
7
8
9
10
11
12
import { fetchText } from "https://esm.town/v/stevekrouse/fetchText?v=6";
import { load } from "npm:cheerio";
export async function latLngOfCity(args: { cityName: string }) {
const { cityName } = args;
const html = await fetchText(
`https://en.wikipedia.org/wiki/${cityName}`,
);
const $ = load(html);
const lat_lng = $("span.geo-default > span").first().text();
return lat_lng;
}
1
2
3
4
5
6
7
8
9
10
11
12
import { fetchText } from "https://esm.town/v/stevekrouse/fetchText?v=6";
import { load } from "npm:cheerio";
export async function latLngOfCity(args: { cityName: string }) {
const { cityName } = args;
const html = await fetchText(
`https://en.wikipedia.org/wiki/${cityName}`,
);
const $ = load(html);
const lat_lng = $("span.geo-default > span").first().text();
return lat_lng;
}

Extract all text content from a URL using Cheerio

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import { fetch } from "https://esm.town/v/std/fetch";
import * as cheerio from "npm:cheerio";
export async function extractAllContent(request: Request) {
try {
const body = await request.json();
const url = body.url;
if (url) {
// Use std/fetch for proxied request
const response = await fetch(url);
const html = await response.text();
const $ = cheerio.load(html);
return Response.json($.text());
} else {
return Response.json({ message: "URL not found" }, {
status: 400,
});
}
} catch (error) {
console.error("Error:", error);
return Response.json({ message: "The body of this request was not JSON-encoded." }, {
status: 400,
});
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import cheerio from "npm:cheerio";
const NEWSLETTER_URL = "https://bytes.dev/archives";
function normalizeURL(url: string) {
return url.startsWith("http://") || url.startsWith("https://")
? url
: "http://" + url;
}
async function fetchText(url: string, options?: any) {
const response = await fetch(normalizeURL(url), {
redirect: "follow",
...(options || {}),
});
return response.text();
}
export const webScrapeBytesNewsletter = async () => {
const html = await fetchText(NEWSLETTER_URL);
const $ = cheerio.load(html);
const latestIssueSection = $("main > :nth-child(2)");
const title = latestIssueSection
.find("a")
.children()
.eq(1)
.find("h3")
.children()
.eq(1)
.text();
const articleNumber = latestIssueSection
.find("a")
.children()
.eq(1)
.find("h3")
.children()
.eq(0)
.text();
const date = latestIssueSection
.find("a")
.children()
.eq(1)
.find("div > div > span")
.text();
return {
id: Number(articleNumber.split(" ")[1]),
title,
date,
};
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
/** @jsxImportSource https://esm.sh/preact */
import cheerio from "npm:cheerio";
import { render } from "npm:preact-render-to-string";
export let isBoeing = async (airline, number) => {
const response = await fetch("https://www.flightstats.com/v2/flight-tracker/" + airline + "/" + number);
const body = await response.text();
const $ = cheerio.load(body);
const boeingElements = $("h5").filter(function() {
return $(this).text().includes("Boeing");
});
let aircraft = [];
boeingElements.each(function() {
aircraft.push($(this).text());
});
return aircraft;
};
async function getFormData(req: Request) {
const data = await req.formData();
const aircraft = ("" + data.get("aircraft")).replace(" ", "");
if (aircraft.toLowerCase().includes("swa")) {
return {
boeingAircraft: ["Southwest only flies Boeing 737s"],
};
}
let index = 2;
let letters = aircraft.slice(0, index);
let numbers = aircraft.slice(index);
const boeingAircraft = await isBoeing(aircraft.slice(0, index), aircraft.slice(index));
return {
boeingAircraft: boeingAircraft,
};
}
function displayPlaneName(formData) {
if (formData && formData.boeingAircraft && formData.boeingAircraft.length > 0) {
return "πŸ’€ " + formData.boeingAircraft[0] + " πŸ“‰";
}
return "πŸ₯³ Not a Boeing πŸ“ˆ";
}
export default async function(req: Request) {
return new Response(
render(
<html>
<head>
<title>Is My Plane a Boeing?</title>
<script src="https://cdn.tailwindcss.com"></script>
<script src="floating.js"></script>
</head>
<body class="bg-gray-50 h-screen flex justify-center items-center">
<div class="container max-w-md mx-auto p-8 md:p-12 bg-white rounded-lg shadow">
<form method="POST" class="space-y-4">
<div>
<label for="aircraft" class="block text-sm font-medium text-gray-700">Flight Number</label>
<input
name="aircraft"
id="aircraft"
class="mt-1 block w-full px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm"
/>
</div>
<button
type="submit"
class="w-full flex justify-center py-2 px-4 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500"
>
Is My Plane a Boeing?
</button>
</form>
<div class="mt-4">
{req.method === "POST"
? (
<>
<span class="block text-sm font-medium text-gray-700">Aircraft:</span>
<pre class="mt-1 block w-full px-3 py-2 bg-gray-100 text-gray-800 border border-gray-300 rounded-md shadow-sm sm:text-sm">{displayPlaneName(await getFormData(req))}</pre>
</>
)
: null}
</div>
</div>
</body>
</html>,
),
{
headers: {
"Content-Type": "text/html; charset=utf-8",
},
},
);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import cheerio from "npm:cheerio";
import { Feed, FeedOptions, Item as FeedItem } from "npm:feed";
export async function flydotioRSS(req: Request): Promise<Response> {
const response = await fetch("https://fly.io/blog/");
const body = await response.text();
const $ = cheerio.load(body);
const feed = new Feed({
title: "Fly.io Blog",
description: "This is my personal feed!",
id: "https://fly.io/blog/",
link: "https://fly.io/blog/",
image: "https://fly.io/static/images/favicon/favicon-32x32.png",
favicon: "https://fly.io/static/images/favicon/favicon-32x32.png",
} as FeedOptions);
$("h1").toArray().flatMap(el =>
feed.addItem({
title: cheerio.text(cheerio(el)).trim(),
description: cheerio("p", el.parentNode).text().trim(),
link: `https://fly.io${cheerio("a", el.parentNode).attr("href")}`,
} as FeedItem)
);
console.log(feed.rss2());
// return Response.json({ ok: true });
return new Response(feed.rss2(), { headers: { "Content-Type": "application/xml" } });
}

fetch the contents of the Wikipedia "On this day in history" page. defaults to JSON output, but specify ?format=textor ?format=html for other outputs. e.g.

#wikipedia

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import { fetchText } from "https://esm.town/v/stevekrouse/fetchText?v=6";
import { load } from "npm:cheerio";
const fetchToday = async () => {
const html = await fetchText(
"https://en.wikipedia.org/wiki/Wikipedia:On_this_day/Today",
);
const $ = load(html);
// Cheerio accepts a CSS selector, here we pick the second <p>
// const intro = $("p:nth-of-type(2)").first().text();
const body = $("#mw-content-text").first().text();
// TODO trim body a bit; what is all this other stuff?
let parsedBody = body.split(".mw-parser-output")[0];
parsedBody = parsedBody.split("\n").slice(1, -1).join("\n").trim();
return parsedBody;
};
export const wikipediaToday = async (req: Request) => {
const searchParams = new URL(req.url).searchParams;
const format = searchParams.get("format") ?? "html";
const data = await fetchToday();
if (format == "json") {
return Response.json({ data });
} else if (format == "html" || format == "text") {
// return new Response(`<pre>${JSON.stringify(data, null, 2)}</pre>`, { headers: { 'Content-Type': 'text/html' } });
return new Response(data, { headers: { "Content-Type": "text/html" } });
} else {
throw new Error("unsupported format");
}
};

RSS feed of stevekrouse.com essays

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import cheerio from "npm:cheerio";
import { Feed } from "npm:feed";
export const stevekrouseRSS = async () => {
const response = await fetch("https://stevekrouse.com/");
const body = await response.text();
const $ = cheerio.load(body);
/*
<li><span class="date">2022 Nov 08 - </span><a href="https://val-town.notion.site/End-programmer-Programming-a749beb4a9b143f2990f575fb7e59b33">End-programmer Programming</a></li>
<li><span class="date">2019 Apr 17 - </span><a href="https://futureofcoding.org/notes/alan-kay-lunch">Lunch with Alan Kay: how to become educated enough to invent the future</a></li>
*/
// parse this into a JSON array with date, link, title
const parsedItems = $("#page > div:nth-child(4) > ul > li").map((_, el) => {
const date = $(el).find(".date").text().trim().replace(" -", "");
const link = $(el).find("a").attr("href");
const title = $(el).find("a").text();
return { date, link, title };
}).get();
console.log(JSON.stringify(parsedItems));
// return this as an RSS feed
const feed = new Feed({
title: "Steve Krouse's Blog",
description: "RSS Feed for Steve Krouse's Blog",
id: "https://stevekrouse.com/",
link: "https://stevekrouse.com/",
language: "en",
updated: new Date(parsedItems[0]?.date),
generator: "Cheerio & Feed for TypeScript",
});
parsedItems.forEach((item) => {
feed.addItem({
title: item.title,
id: item.link,
link: item.link,
description: item.title,
content: item.title,
date: new Date(item.date),
});
});
return new Response(feed.rss2(), {
headers: {
"Content-Type": "application/rss+xml",
},
});
};