From 27bce6cc6321ac7aa722f3e4961e7593fd06dbf2 Mon Sep 17 00:00:00 2001 From: biglyderv Date: Sat, 1 Feb 2025 12:01:59 -0500 Subject: [PATCH] do not recalculate milked pages --- site.js | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/site.js b/site.js index 2e00470..9de4baf 100644 --- a/site.js +++ b/site.js @@ -12,11 +12,12 @@ let greylist = (process.env.greylist + '').split(',') || []; let hh; let percent = 0; let d = process.env.depth || 1; +let noWorry = []; async function urlCollector(url, path, file, useLimit, data2) { let urls = data2[url] ? (data2[url][path] || []) : []; urls = [...urls]; - if (path != 'following') return urls; + if (path != 'following' || noWorry.indexOf(url) != -1) return urls; let data; try { @@ -31,8 +32,10 @@ async function urlCollector(url, path, file, useLimit, data2) { let body = cheerio.load(data); let links = body('a'); let ll = urls.length; + let returnedFalse = false; links.each(function (i, link) { if (useLimit && urls.length >= pageLimit * 40 + ll) { + returnedFalse = true; return false; } let h = body(link).attr('href'); @@ -60,6 +63,10 @@ async function urlCollector(url, path, file, useLimit, data2) { return true; }) + if (!returnedFalse) { + noWorry.push(url); + } + let h3; try { h3 = new URL(new URL(url).origin);