do not recalculate milked pages
This commit is contained in:
parent
552ac802b1
commit
27bce6cc63
1 changed files with 8 additions and 1 deletions
9
site.js
9
site.js
|
@ -12,11 +12,12 @@ let greylist = (process.env.greylist + '').split(',') || [];
|
|||
let hh;
|
||||
let percent = 0;
|
||||
let d = process.env.depth || 1;
|
||||
let noWorry = [];
|
||||
|
||||
async function urlCollector(url, path, file, useLimit, data2) {
|
||||
let urls = data2[url] ? (data2[url][path] || []) : [];
|
||||
urls = [...urls];
|
||||
if (path != 'following') return urls;
|
||||
if (path != 'following' || noWorry.indexOf(url) != -1) return urls;
|
||||
|
||||
let data;
|
||||
try {
|
||||
|
@ -31,8 +32,10 @@ async function urlCollector(url, path, file, useLimit, data2) {
|
|||
let body = cheerio.load(data);
|
||||
let links = body('a');
|
||||
let ll = urls.length;
|
||||
let returnedFalse = false;
|
||||
links.each(function (i, link) {
|
||||
if (useLimit && urls.length >= pageLimit * 40 + ll) {
|
||||
returnedFalse = true;
|
||||
return false;
|
||||
}
|
||||
let h = body(link).attr('href');
|
||||
|
@ -60,6 +63,10 @@ async function urlCollector(url, path, file, useLimit, data2) {
|
|||
return true;
|
||||
})
|
||||
|
||||
if (!returnedFalse) {
|
||||
noWorry.push(url);
|
||||
}
|
||||
|
||||
let h3;
|
||||
try {
|
||||
h3 = new URL(new URL(url).origin);
|
||||
|
|
Loading…
Reference in a new issue