re-index if discovered again
This commit is contained in:
parent
0fac405c13
commit
2622c3bb06
1 changed files with 16 additions and 14 deletions
30
site.js
30
site.js
|
@ -9,7 +9,7 @@ let route = process.env.route || './test.txt';
|
||||||
let pageLimit = process.env.pageLimit || Infinity;
|
let pageLimit = process.env.pageLimit || Infinity;
|
||||||
let hh;
|
let hh;
|
||||||
|
|
||||||
async function urlCollector(url, path, file, useLimit) {
|
async function urlCollector(url, path, file, useLimit, data2) {
|
||||||
if (path != 'following') return [];
|
if (path != 'following') return [];
|
||||||
|
|
||||||
let data;
|
let data;
|
||||||
|
@ -24,10 +24,10 @@ async function urlCollector(url, path, file, useLimit) {
|
||||||
|
|
||||||
let body = cheerio.load(data);
|
let body = cheerio.load(data);
|
||||||
let links = body('a');
|
let links = body('a');
|
||||||
let urls = [];
|
let urls = data2[url] ? (data2[url][path] || []) : [];
|
||||||
|
let ll = urls.length;
|
||||||
links.each(function (i, link) {
|
links.each(function (i, link) {
|
||||||
if (useLimit && urls.length >= pageLimit * 40) return;
|
if (useLimit && urls.length >= pageLimit * 40 + ll) return;
|
||||||
console.log(`User ${url} has ${i} pages calculated`);
|
|
||||||
|
|
||||||
let h = body(link).attr('href');
|
let h = body(link).attr('href');
|
||||||
if (!h) return;
|
if (!h) return;
|
||||||
|
@ -43,7 +43,9 @@ async function urlCollector(url, path, file, useLimit) {
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
urls.push(h2.toString())
|
urls.push(h2.toString());
|
||||||
|
urls = [...new Set(urls)];
|
||||||
|
console.log(`User ${url} has ${urls.length} pages calculated`);
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@ -69,7 +71,7 @@ async function textCollector(word, path, file) {
|
||||||
return words;
|
return words;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function siteCollector(user, path, site, useLimit) {
|
async function siteCollector(user, path, site, useLimit, dat) {
|
||||||
let users = [];
|
let users = [];
|
||||||
let i = 1;
|
let i = 1;
|
||||||
let out = [];
|
let out = [];
|
||||||
|
@ -79,7 +81,7 @@ async function siteCollector(user, path, site, useLimit) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (site == 'url') {
|
if (site == 'url') {
|
||||||
return await urlCollector(user, path, route, useLimit);
|
return await urlCollector(user, path, route, useLimit, dat);
|
||||||
}
|
}
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
|
@ -130,12 +132,13 @@ async function siteCollector(user, path, site, useLimit) {
|
||||||
penv = penv.split(',');
|
penv = penv.split(',');
|
||||||
|
|
||||||
let users = [];
|
let users = [];
|
||||||
|
let data = {};
|
||||||
|
|
||||||
for (let u of penv) {
|
for (let u of penv) {
|
||||||
let ca = await siteCollector(u, 'followers', site);
|
let ca = await siteCollector(u, 'followers', site, false, data);
|
||||||
users = users.concat(ca);
|
users = users.concat(ca);
|
||||||
|
|
||||||
users = users.concat(await siteCollector(u, 'following', site));
|
users = users.concat(await siteCollector(u, 'following', site, false, data));
|
||||||
}
|
}
|
||||||
|
|
||||||
users = [...new Set(users)];
|
users = [...new Set(users)];
|
||||||
|
@ -145,12 +148,11 @@ async function siteCollector(user, path, site, useLimit) {
|
||||||
users = [...new Set(users)];
|
users = [...new Set(users)];
|
||||||
let tempSet = [...users];
|
let tempSet = [...users];
|
||||||
for (let u of tempSet) {
|
for (let u of tempSet) {
|
||||||
users = users.concat(await siteCollector(u, 'followers', site, true));
|
users = users.concat(await siteCollector(u, 'followers', site, true, data));
|
||||||
users = users.concat(await siteCollector(u, 'following', site, true));
|
users = users.concat(await siteCollector(u, 'following', site, true, data));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let data = {};
|
|
||||||
let p = [];
|
let p = [];
|
||||||
let congested = [];
|
let congested = [];
|
||||||
|
|
||||||
|
@ -170,7 +172,7 @@ async function siteCollector(user, path, site, useLimit) {
|
||||||
}
|
}
|
||||||
data[u] = { followers: [], following: [] };
|
data[u] = { followers: [], following: [] };
|
||||||
p.push(async function (k) {
|
p.push(async function (k) {
|
||||||
let j1 = await siteCollector(u, 'followers', site, true);
|
let j1 = await siteCollector(u, 'followers', site, true, data);
|
||||||
|
|
||||||
data[u].followers = j1;
|
data[u].followers = j1;
|
||||||
console.log(`User ${u} followers fully calculated`);
|
console.log(`User ${u} followers fully calculated`);
|
||||||
|
@ -179,7 +181,7 @@ async function siteCollector(user, path, site, useLimit) {
|
||||||
}(p.length));
|
}(p.length));
|
||||||
|
|
||||||
p.push(async function (k) {
|
p.push(async function (k) {
|
||||||
let j1 = await siteCollector(u, 'following', site,true);
|
let j1 = await siteCollector(u, 'following', site,true, data);
|
||||||
|
|
||||||
data[u].following = j1;
|
data[u].following = j1;
|
||||||
console.log(`User ${u} following fully calculated`);
|
console.log(`User ${u} following fully calculated`);
|
||||||
|
|
Loading…
Reference in a new issue