Search results

Algo
1
2
3
4
5
6
7
8
9
10
import { fetchText } from "https://esm.town/v/stevekrouse/fetchText?v=6";
import { html, load } from "npm:cheerio";
const htmlStr = await fetchText(
"https://archive.is/pPFRB",
);
const $ = load(htmlStr);
// Cheerio accepts a CSS selector, here we pick the second <p>
const intro = $.html();
console.log(intro);

Gets a list with all the films a user has added to their public diary in Letterboxd. The result is a JSON file with the following structure:

Create val "updated_at": "2023-08-13", "count": 470, "films": [ { "watched_on": "2022-10-24", "title": "Aftersun (2022)", "rating": 4.5, "rewatched": false }, { "watched_on": "2021-03-20", "title": "Le Trou (1960)", "rating": 5, "rewatched": true }, ... { "watched_on": "2020-09-13", "title": "Tampopo (1985)", "rating": 5, "rewatched": false } ] }

If you want to use it in the browser, just visit this URL:

https://javier-letterboxdscrapper.web.val.run/?username={username}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import { fetchText } from "https://esm.town/v/stevekrouse/fetchText?v=5";
export const letterboxd = async (username: string) => {
const { default: cheerio } = await import("npm:cheerio");
const URL = `https://letterboxd.com/${username}/films/diary/`;
const getTotalPages = async () => {
const html = await fetchText(URL);
const $ = cheerio.load(html);
return +$(".paginate-page a").last().text();
};
const getFilmsForPage = async (page) => {
const html = await fetchText(URL + "page/" + page);
const $ = cheerio.load(html);
return $(".diary-entry-edit a").map((i, metadata) => {
const filmTitle = $(metadata).data("film-name");
const watchedOn = $(metadata).data("viewing-date");
const rewatched = $(metadata).data("rewatch");
const year = $(metadata).data("film-year");
const title = `${filmTitle} (${year})`;
const rating = parseInt($(metadata).data("rating"), 10) / 2;
return { title, watched_on: watchedOn, rating, rewatched };
});
};
async function getFilms() {
const totalPages = await getTotalPages();
const films = [];
for (let i = 1; i <= totalPages; i++) {
const movies = await getFilmsForPage(i);
for (let j = 0; j < movies.length; j++) {
films.push(movies[j]);
}
}
return {
updated_at: new Date().toISOString().split("T")[0],
count: films.length,
films,
};
}
return getFilms();
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
/** @jsxImportSource https://esm.sh/preact */
import cheerio from "npm:cheerio";
import { render } from "npm:preact-render-to-string";
export let isBoeing = async (airline, number) => {
const response = await fetch("https://www.flightstats.com/v2/flight-tracker/" + airline + "/" + number);
const body = await response.text();
const $ = cheerio.load(body);
const boeingElements = $("h5").filter(function() {
return $(this).text().includes("Boeing");
});
let aircraft = [];
boeingElements.each(function() {
aircraft.push($(this).text());
});
return aircraft;
};
async function getFormData(req: Request) {
const data = await req.formData();
const aircraft = ("" + data.get("aircraft")).replace(" ", "");
if (aircraft.toLowerCase().includes("swa")) {
return {
boeingAircraft: ["Southwest only flies Boeing 737s"],
};
}
let index = 2;
let letters = aircraft.slice(0, index);
let numbers = aircraft.slice(index);
const boeingAircraft = await isBoeing(aircraft.slice(0, index), aircraft.slice(index));
return {
boeingAircraft: boeingAircraft,
};
}
function displayPlaneName(formData) {
if (formData && formData.boeingAircraft && formData.boeingAircraft.length > 0) {
return "💀 " + formData.boeingAircraft[0] + " 📉";
}
return "🥳 Not a Boeing 📈";
}
export default async function(req: Request) {
return new Response(
render(
<html>
<head>
<title>Is My Plane a Boeing?</title>
<script src="https://cdn.tailwindcss.com"></script>
<script src="floating.js"></script>
</head>
<body class="bg-gray-50 h-screen flex justify-center items-center">
<div class="container max-w-md mx-auto p-8 md:p-12 bg-white rounded-lg shadow">
<form method="POST" class="space-y-4">
<div>
<label for="aircraft" class="block text-sm font-medium text-gray-700">Flight Number</label>
<input
name="aircraft"
id="aircraft"
class="mt-1 block w-full px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm"
/>
</div>
<button
type="submit"
class="w-full flex justify-center py-2 px-4 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500"
>
Is My Plane a Boeing?
</button>
</form>
<div class="mt-4">
{req.method === "POST"
? (
<>
<span class="block text-sm font-medium text-gray-700">Aircraft:</span>
<pre class="mt-1 block w-full px-3 py-2 bg-gray-100 text-gray-800 border border-gray-300 rounded-md shadow-sm sm:text-sm">{displayPlaneName(await getFormData(req))}</pre>
</>
)
: null}
</div>
</div>
</body>
</html>,
),
{
headers: {
"Content-Type": "text/html; charset=utf-8",
},
},
);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
/** @jsxImportSource https://esm.sh/preact */
import cheerio from "npm:cheerio";
import { render } from "npm:preact-render-to-string";
export let isBoeing = async (airline, number) => {
const response = await fetch("https://www.flightstats.com/v2/flight-tracker/" + airline + "/" + number);
const body = await response.text();
const $ = cheerio.load(body);
const boeingElements = $("h5").filter(function() {
return $(this).text().includes("Boeing");
});
let aircraft = [];
boeingElements.each(function() {
aircraft.push($(this).text());
});
return aircraft;
};
async function getFormData(req: Request) {
const data = await req.formData();
const aircraft = ("" + data.get("aircraft")).replace(" ", "");
if (aircraft.toLowerCase().includes("swa")) {
return {
boeingAircraft: ["Southwest only flies Boeing 737s"],
};
}
let index = 2;
let letters = aircraft.slice(0, index);
let numbers = aircraft.slice(index);
const boeingAircraft = await isBoeing(aircraft.slice(0, index), aircraft.slice(index));
return {
boeingAircraft: boeingAircraft,
};
}
function displayPlaneName(formData) {
if (formData && formData.boeingAircraft && formData.boeingAircraft.length > 0) {
return "💀 " + formData.boeingAircraft[0] + " 📉";
}
return "🥳 Not a Boeing 📈";
}
export default async function(req: Request) {
return new Response(
render(
<html>
<head>
<title>Is My Plane a Boeing?</title>
<script src="https://cdn.tailwindcss.com"></script>
<script src="floating.js"></script>
</head>
<body class="bg-gray-50 h-screen flex justify-center items-center">
<div class="container max-w-md mx-auto p-8 md:p-12 bg-white rounded-lg shadow">
<form method="POST" class="space-y-4">
<div>
<label for="aircraft" class="block text-sm font-medium text-gray-700">Flight Number</label>
<input
name="aircraft"
id="aircraft"
class="mt-1 block w-full px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm"
/>
</div>
<button
type="submit"
class="w-full flex justify-center py-2 px-4 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500"
>
Is My Plane a Boeing?
</button>
</form>
<div class="mt-4">
{req.method === "POST"
? (
<>
<span class="block text-sm font-medium text-gray-700">Aircraft:</span>
<pre class="mt-1 block w-full px-3 py-2 bg-gray-100 text-gray-800 border border-gray-300 rounded-md shadow-sm sm:text-sm">{displayPlaneName(await getFormData(req))}</pre>
</>
)
: null}
</div>
</div>
</body>
</html>,
),
{
headers: {
"Content-Type": "text/html; charset=utf-8",
},
},
);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import cheerio from "npm:cheerio";
const FARGO_GOLF_URL = "https://foreupsoftware.com/index.php/booking/a/19956/18#/teetimes";
function normalizeURL(url: string) {
return url.startsWith("http://") || url.startsWith("https://")
? url
: "http://" + url;
}
async function fetchText(url: string, options?: any) {
const response = await fetch(normalizeURL(url), {
redirect: "follow",
...(options || {}),
});
return response.text();
}
const daysSelector = "td.day:not(.disabled), td.active.day:not(.disabled)";
const teeTimesListSelector = ".times-inner";
export const webScrapeTeeTimes = async () => {
const html = await fetchText(FARGO_GOLF_URL);
const $ = cheerio.load(html);
window.querySelector("#content > div > button").click();
const availableDays = document.querySelectorAll(daysSelector);
// code to select the destired day here
const availableTeeTimes = $(".time-tile-ob-no-details");
console.log(availableTeeTimes.length);
// const teeTimes = [];
// availableTeeTimes.forEach((teeTime) => {
// const time = teeTime.querySelector('times-booking-start-time-label');
// const course = teeTime.querySelector('times-booking-teesheet-name');
// const playerCount = teeTime.querySelector('time-summary-ob-player-count');
// });
// const latestIssueSection = $("main > :nth-child(2)");
// const title = latestIssueSection
// .find("a")
// .children()
// .eq(1)
// .find("h3")
// .children()
// .eq(1)
// .text();
// const articleNumber = latestIssueSection
// .find("a")
// .children()
// .eq(1)
// .find("h3")
// .children()
// .eq(0)
// .text();
// const date = latestIssueSection
// .find("a")
// .children()
// .eq(1)
// .find("div > div > span")
// .text();
// return {
// id: Number(articleNumber.split(" ")[1]),
// title,
// date,
// };
};
webScrapeTeeTimes();
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import cheerio from "npm:cheerio";
export default async function tacoBellNutritionScrapper(req) {
console.log(req.params);
const sourceUrl = `https://www.nutritionix.com/taco-bell/menu/premium`;
const siteText = await fetch(sourceUrl);
// const text = await siteText.text();
const $ = cheerio.load(await siteText.text());
const items = $("tr.odd, tr.even").map((i, e) => {
const name = $(e).find(".nmItem").html();
const calories = $(e).find("[aria-label*='Calories']").html();
return { name: name, calories: calories };
}).toArray();
return Response.json({ items });
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
/** @jsxImportSource npm:react **/
import { fetchText } from "https://esm.town/v/stevekrouse/fetchText?v=6";
import { load } from "npm:cheerio";
import { renderToString } from "npm:react-dom@18/server";
export default async (req: Request) => {
const html = await fetchText(
"https://www.moongiant.com/phase/today/",
);
const $ = load(html);
const todayContents = [...$("#today_").contents()].filter(e => e.type === "text" && $(e).text().trim())
.map(e => $(e).text().trim());
const moonIcon = `:${todayContents[0].toLowerCase().replace(" ", "_")}_moon:`;
console.log(moonIcon);
/**
*
* //This is the way Slack wants it.
* {
"status_text": "riding a train",
"status_emoji": ":mountain_railway:",
"status_expiration": 0
}
*
*
* */
return new Response(
renderToString(
<html>
<link rel="stylesheet" href="https://unpkg.com/missing.css@1.1.1" />
<main>
<h1>{moonIcon}</h1>
</main>
</html>,
),
{ headers: { "Content-Type": "text/html" } },
);
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import cheerio from "npm:cheerio";
const DIAMOND_LEAGUE_EVENT_URL = "https://shanghai.diamondleague.com/programme-results/programme-results-shanghai/";
function normalizeURL(url: string) {
return url.startsWith("http://") || url.startsWith("https://")
? url
: "http://" + url;
}
async function fetchText(url: string, options?: any) {
const response = await fetch(normalizeURL(url), {
redirect: "follow",
...(options || {}),
});
return response.text();
}
export const webScrapeBytesNewsletter = async () => {
const html = await fetchText(DIAMOND_LEAGUE_EVENT_URL);
const $ = cheerio.load(html);
const $rows = $(".row");
console.log($rows);
return {
$rows,
};
};
webScrapeBytesNewsletter();
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import cheerio from "npm:cheerio";
export default async function mylittlescraper(req) {
console.log(req.params);
const sourceUrl = `https://ssb.ua.edu/pls/PROD/ua_bwckschd.p_disp_detail_sched?term_in=202340&crn_in=43971`;
let siteText = await fetch(sourceUrl);
const $ = cheerio.load(await siteText.text());
const $numclass = $("table.datadisplaytable:nth-child(16) > tbody:nth-child(2) > tr:nth-child(2) > td:nth-child(3)");
const numberOfSeats = $numclass.text();
let isOpen = false;
if (Number.parseInt(numberOfSeats) > 0) {
isOpen = true;
}
return Response.json({ open: isOpen });
}

Extract all text content from a URL using Cheerio

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import { fetch } from "https://esm.town/v/std/fetch";
import * as cheerio from "npm:cheerio";
export async function extractAllContent(request: Request) {
try {
const body = await request.json();
const url = body.url;
if (url) {
// Use std/fetch for proxied request
const response = await fetch(url);
const html = await response.text();
const $ = cheerio.load(html);
return Response.json($.text());
} else {
return Response.json({ message: "URL not found" }, {
status: 400,
});
}
} catch (error) {
console.error("Error:", error);
return Response.json({ message: "The body of this request was not JSON-encoded." }, {
status: 400,
});
}
}
1
2
3
4
5
6
7
8
9
10
11
12
import { fetchText } from "https://esm.town/v/stevekrouse/fetchText?v=6";
import { load } from "npm:cheerio";
export async function latLngOfCity(args: { cityName: string }) {
const { cityName } = args;
const html = await fetchText(
`https://en.wikipedia.org/wiki/${cityName}`,
);
const $ = load(html);
const lat_lng = $("span.geo-default > span").first().text();
return lat_lng;
}
1
2
3
4
5
6
7
8
9
10
11
12
import { fetchText } from "https://esm.town/v/stevekrouse/fetchText?v=6";
import { load } from "npm:cheerio";
export async function latLngOfCity(args: { cityName: string }) {
const { cityName } = args;
const html = await fetchText(
`https://en.wikipedia.org/wiki/${cityName}`,
);
const $ = load(html);
const lat_lng = $("span.geo-default > span").first().text();
return lat_lng;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import axios from "npm:axios";
import * as cheerio from "npm:cheerio";
export async function extractOpenGraphTags(request: Request) {
try {
const body = await request.json();
const url = body.url;
if (url) {
const response = await axios.get(url);
const html = response.data;
const $ = cheerio.load(html);
const openGraphTags = [];
$("meta[property^='og:']").each((i, el) => {
const property = $(el).attr("property");
const content = $(el).attr("content");
if (property && content) {
openGraphTags.push({ property, content });
}
});
return Response.json(openGraphTags);
} else {
return Response.json({ message: "URL not found" }, {
status: 400,
});
}
} catch (error) {
console.error("Error:", error);
return Response.json({ message: "The body of this request was not JSON-encoded." }, {
status: 400,
});
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import cheerio from "npm:cheerio";
const NEWSLETTER_URL = "https://bytes.dev/archives";
function normalizeURL(url: string) {
return url.startsWith("http://") || url.startsWith("https://")
? url
: "http://" + url;
}
async function fetchText(url: string, options?: any) {
const response = await fetch(normalizeURL(url), {
redirect: "follow",
...(options || {}),
});
return response.text();
}
export const webScrapeBytesNewsletter = async () => {
const html = await fetchText(NEWSLETTER_URL);
const $ = cheerio.load(html);
const latestIssueSection = $("main > :nth-child(2)");
const title = latestIssueSection
.find("a")
.children()
.eq(1)
.find("h3")
.children()
.eq(1)
.text();
const articleNumber = latestIssueSection
.find("a")
.children()
.eq(1)
.find("h3")
.children()
.eq(0)
.text();
const date = latestIssueSection
.find("a")
.children()
.eq(1)
.find("div > div > span")
.text();
return {
id: Number(articleNumber.split(" ")[1]),
title,
date,
};
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import { email } from "https://esm.town/v/std/email?v=9";
import { fetch } from "https://esm.town/v/std/fetch";
export async function exchangeRate() {
const cheerio = await import("npm:cheerio");
const page = await fetch(
"https://kur.doviz.com/serbest-piyasa/amerikan-dolari",
).then((response) => response.text());
const $ = cheerio.load(page);
const table = $(".value-table");
await email({
html: table.html(),
subject: "TRY/USD exchange rate alert from val.town",
});
console.log("email sent!");
}