do not recalculate milked pages

This commit is contained in:
biglyderv 2025-02-01 12:01:59 -05:00
parent 552ac802b1
commit 27bce6cc63

View file

@ -12,11 +12,12 @@ let greylist = (process.env.greylist + '').split(',') || [];
let hh; let hh;
let percent = 0; let percent = 0;
let d = process.env.depth || 1; let d = process.env.depth || 1;
let noWorry = [];
async function urlCollector(url, path, file, useLimit, data2) { async function urlCollector(url, path, file, useLimit, data2) {
let urls = data2[url] ? (data2[url][path] || []) : []; let urls = data2[url] ? (data2[url][path] || []) : [];
urls = [...urls]; urls = [...urls];
if (path != 'following') return urls; if (path != 'following' || noWorry.indexOf(url) != -1) return urls;
let data; let data;
try { try {
@ -31,8 +32,10 @@ async function urlCollector(url, path, file, useLimit, data2) {
let body = cheerio.load(data); let body = cheerio.load(data);
let links = body('a'); let links = body('a');
let ll = urls.length; let ll = urls.length;
let returnedFalse = false;
links.each(function (i, link) { links.each(function (i, link) {
if (useLimit && urls.length >= pageLimit * 40 + ll) { if (useLimit && urls.length >= pageLimit * 40 + ll) {
returnedFalse = true;
return false; return false;
} }
let h = body(link).attr('href'); let h = body(link).attr('href');
@ -60,6 +63,10 @@ async function urlCollector(url, path, file, useLimit, data2) {
return true; return true;
}) })
if (!returnedFalse) {
noWorry.push(url);
}
let h3; let h3;
try { try {
h3 = new URL(new URL(url).origin); h3 = new URL(new URL(url).origin);