diff --git a/site.js b/site.js index 8d0d2fc..ea15d66 100644 --- a/site.js +++ b/site.js @@ -9,7 +9,7 @@ let route = process.env.route || './test.txt'; let pageLimit = process.env.pageLimit || Infinity; let hh; -async function urlCollector(url, path, file, useLimit) { +async function urlCollector(url, path, file, useLimit, data2) { if (path != 'following') return []; let data; @@ -24,10 +24,10 @@ async function urlCollector(url, path, file, useLimit) { let body = cheerio.load(data); let links = body('a'); - let urls = []; + let urls = data2[url] ? (data2[url][path] || []) : []; + let ll = urls.length; links.each(function (i, link) { - if (useLimit && urls.length >= pageLimit * 40) return; - console.log(`User ${url} has ${i} pages calculated`); + if (useLimit && urls.length >= pageLimit * 40 + ll) return; let h = body(link).attr('href'); if (!h) return; @@ -43,7 +43,9 @@ async function urlCollector(url, path, file, useLimit) { } catch (err) { return; } - urls.push(h2.toString()) + urls.push(h2.toString()); + urls = [...new Set(urls)]; + console.log(`User ${url} has ${urls.length} pages calculated`); }) @@ -69,7 +71,7 @@ async function textCollector(word, path, file) { return words; } -async function siteCollector(user, path, site, useLimit) { +async function siteCollector(user, path, site, useLimit, dat) { let users = []; let i = 1; let out = []; @@ -79,7 +81,7 @@ async function siteCollector(user, path, site, useLimit) { } if (site == 'url') { - return await urlCollector(user, path, route, useLimit); + return await urlCollector(user, path, route, useLimit, dat); } while (true) { @@ -130,12 +132,13 @@ async function siteCollector(user, path, site, useLimit) { penv = penv.split(','); let users = []; + let data = {}; for (let u of penv) { - let ca = await siteCollector(u, 'followers', site); + let ca = await siteCollector(u, 'followers', site, false, data); users = users.concat(ca); - users = users.concat(await siteCollector(u, 'following', site)); + users = users.concat(await siteCollector(u, 'following', site, false, data)); } users = [...new Set(users)]; @@ -145,12 +148,11 @@ async function siteCollector(user, path, site, useLimit) { users = [...new Set(users)]; let tempSet = [...users]; for (let u of tempSet) { - users = users.concat(await siteCollector(u, 'followers', site, true)); - users = users.concat(await siteCollector(u, 'following', site, true)); + users = users.concat(await siteCollector(u, 'followers', site, true, data)); + users = users.concat(await siteCollector(u, 'following', site, true, data)); } } - let data = {}; let p = []; let congested = []; @@ -170,7 +172,7 @@ async function siteCollector(user, path, site, useLimit) { } data[u] = { followers: [], following: [] }; p.push(async function (k) { - let j1 = await siteCollector(u, 'followers', site, true); + let j1 = await siteCollector(u, 'followers', site, true, data); data[u].followers = j1; console.log(`User ${u} followers fully calculated`); @@ -179,7 +181,7 @@ async function siteCollector(user, path, site, useLimit) { }(p.length)); p.push(async function (k) { - let j1 = await siteCollector(u, 'following', site,true); + let j1 = await siteCollector(u, 'following', site,true, data); data[u].following = j1; console.log(`User ${u} following fully calculated`);