2025-01-30 07:59:30 -05:00
|
|
|
|
2025-01-30 21:00:26 -05:00
|
|
|
import { writeFile, readFile } from "fs/promises";
|
2025-01-30 07:59:30 -05:00
|
|
|
import { rankCalc } from "./rank.js";
|
2025-01-31 18:30:18 -05:00
|
|
|
import * as cheerio from 'cheerio';
|
2025-01-30 07:59:30 -05:00
|
|
|
|
2025-01-30 08:20:17 -05:00
|
|
|
let cache = {};
|
2025-01-30 08:42:27 -05:00
|
|
|
let site = process.env.site || 'darflen';
|
2025-01-31 17:39:47 -05:00
|
|
|
let route = process.env.route || './test.txt';
|
2025-01-31 17:58:09 -05:00
|
|
|
let pageLimit = process.env.pageLimit || Infinity;
|
2025-01-30 21:00:26 -05:00
|
|
|
let hh;
|
|
|
|
|
2025-01-31 20:08:22 -05:00
|
|
|
async function urlCollector(url, path, file, useLimit) {
|
2025-01-31 18:30:18 -05:00
|
|
|
if (path != 'following') return [];
|
|
|
|
|
|
|
|
let data;
|
|
|
|
try {
|
|
|
|
data = await fetch(url);
|
|
|
|
data = await data.text();
|
|
|
|
} catch (err) {
|
|
|
|
return [];
|
|
|
|
}
|
|
|
|
|
|
|
|
let body = cheerio.load(data);
|
|
|
|
let links = body('a');
|
|
|
|
let urls = [];
|
|
|
|
links.each(function(i, link) {
|
2025-01-31 20:21:42 -05:00
|
|
|
if (useLimit && urls.length >= pageLimit * 40) return;
|
|
|
|
|
2025-01-31 18:35:22 -05:00
|
|
|
let h = body(link).attr('href');
|
|
|
|
if (!h) return;
|
|
|
|
h = h.trim();
|
|
|
|
if (h.startsWith('./') && h.startsWith('../')) {
|
|
|
|
h = `${url}/h`;
|
|
|
|
} else if (h.startsWith('/')) {
|
|
|
|
let u = new URL(url);
|
|
|
|
u.pathname = h;
|
2025-01-31 18:35:53 -05:00
|
|
|
h = u.toString();
|
2025-01-31 18:35:22 -05:00
|
|
|
}
|
2025-01-31 20:21:42 -05:00
|
|
|
let h2;
|
|
|
|
try {
|
|
|
|
h2 = new URL(h);
|
|
|
|
} catch(err) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
urls.push(h2.toString())
|
|
|
|
urls.push(h2.origin)
|
2025-01-31 18:30:18 -05:00
|
|
|
})
|
|
|
|
|
|
|
|
return urls;
|
|
|
|
}
|
|
|
|
|
2025-01-30 21:00:26 -05:00
|
|
|
async function textCollector(word, path, file) {
|
2025-01-31 17:39:47 -05:00
|
|
|
if (!hh) hh = await readFile(file, 'utf8');
|
2025-01-30 21:00:26 -05:00
|
|
|
|
|
|
|
hh = hh.toLowerCase();
|
|
|
|
|
2025-01-31 17:39:47 -05:00
|
|
|
let words = hh.split(/[\n.]+/g).filter(x => word.length == 0 || ` ${x} `.includes(` ${word} `));
|
2025-01-30 21:00:26 -05:00
|
|
|
words = words.join(' ').split(/[^a-zA-Z0-9']+/g);
|
|
|
|
words = [...new Set(words)];
|
|
|
|
return words;
|
|
|
|
}
|
2025-01-30 07:59:30 -05:00
|
|
|
|
2025-01-31 18:05:21 -05:00
|
|
|
async function siteCollector(user, path, site, useLimit) {
|
2025-01-30 08:20:17 -05:00
|
|
|
let users = [];
|
2025-01-30 08:34:59 -05:00
|
|
|
let i = 1;
|
2025-01-30 20:17:43 -05:00
|
|
|
let out = [];
|
2025-01-30 21:00:26 -05:00
|
|
|
|
2025-01-31 17:39:47 -05:00
|
|
|
if (site == 'file') {
|
2025-01-31 20:08:22 -05:00
|
|
|
return await textCollector(user, path, route, useLimit);
|
2025-01-30 21:00:26 -05:00
|
|
|
}
|
|
|
|
|
2025-01-31 18:30:18 -05:00
|
|
|
if (site == 'url') {
|
|
|
|
return await urlCollector(user, path, route);
|
|
|
|
}
|
|
|
|
|
2025-01-30 07:59:30 -05:00
|
|
|
while (true) {
|
2025-01-30 08:42:27 -05:00
|
|
|
let p;
|
|
|
|
|
|
|
|
if (site == 'darflen') {
|
|
|
|
p = `https://api.darflen.com/users/${user}/${path}/${i}`;
|
|
|
|
} else if (site == 'scratch') {
|
2025-01-30 09:37:33 -05:00
|
|
|
p = `https://api.scratch.mit.edu/users/${user}/${path}/?limit=40&offset=${i * 40}`;
|
2025-01-30 08:42:27 -05:00
|
|
|
} else {
|
|
|
|
throw 'That site is not supported.';
|
|
|
|
}
|
|
|
|
|
2025-01-30 08:34:59 -05:00
|
|
|
let j1 = cache[p];
|
2025-01-30 08:20:17 -05:00
|
|
|
if (!j1) {
|
|
|
|
let h1 = await fetch(p);
|
2025-01-30 08:42:27 -05:00
|
|
|
try {
|
|
|
|
j1 = await h1.json();
|
2025-01-30 12:40:55 -05:00
|
|
|
} catch (err) {
|
2025-01-30 08:42:27 -05:00
|
|
|
j1 = [];
|
|
|
|
}
|
2025-01-30 08:20:17 -05:00
|
|
|
}
|
|
|
|
cache[p] = j1;
|
2025-01-30 07:59:30 -05:00
|
|
|
|
2025-01-31 20:02:59 -05:00
|
|
|
if (!j1.map || (i >= pageLimit && useLimit)) break;
|
2025-01-31 17:56:05 -05:00
|
|
|
|
2025-01-30 08:42:27 -05:00
|
|
|
let users2;
|
|
|
|
if (site == 'darflen') {
|
|
|
|
users2 = j1[path].map(x => x.profile.username);
|
|
|
|
} else if (site == 'scratch') {
|
|
|
|
users2 = j1.map(x => x.username);
|
|
|
|
}
|
2025-01-30 07:59:30 -05:00
|
|
|
|
2025-01-30 20:17:43 -05:00
|
|
|
users.push(users2);
|
2025-01-30 07:59:30 -05:00
|
|
|
|
|
|
|
if (users2.length == 0) break;
|
|
|
|
|
|
|
|
i++;
|
2025-01-30 08:20:17 -05:00
|
|
|
|
|
|
|
console.log(`User ${user} has ${i} pages calculated`);
|
2025-01-30 07:59:30 -05:00
|
|
|
}
|
2025-01-30 20:17:43 -05:00
|
|
|
out = out.concat(...users);
|
|
|
|
return out;
|
2025-01-30 08:20:17 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
(async function () {
|
2025-01-30 12:40:55 -05:00
|
|
|
let penv = process.env.user || 'paradock';
|
2025-01-31 17:42:18 -05:00
|
|
|
penv = penv.split(',');
|
2025-01-30 12:40:55 -05:00
|
|
|
|
2025-01-31 18:14:00 -05:00
|
|
|
let users = [];
|
2025-01-31 18:05:21 -05:00
|
|
|
let legal = [];
|
2025-01-31 18:19:18 -05:00
|
|
|
|
2025-01-31 18:14:00 -05:00
|
|
|
for (let u of penv) {
|
|
|
|
let ca = await siteCollector(u, 'followers', site);
|
|
|
|
users = users.concat(ca);
|
|
|
|
legal = legal.concat(ca);
|
2025-01-31 18:18:05 -05:00
|
|
|
|
2025-01-31 18:14:00 -05:00
|
|
|
users = users.concat(await siteCollector(u, 'following', site));
|
2025-01-30 12:40:55 -05:00
|
|
|
}
|
2025-01-30 08:20:17 -05:00
|
|
|
|
2025-01-31 18:14:00 -05:00
|
|
|
users = [...new Set(users)];
|
|
|
|
|
2025-01-31 18:18:05 -05:00
|
|
|
let d = process.env.depth || 1;
|
|
|
|
for (let i = 1; i < d; i++) {
|
2025-01-31 18:19:18 -05:00
|
|
|
users = [...new Set(users)];
|
2025-01-31 20:08:22 -05:00
|
|
|
let tempSet = [...users];
|
|
|
|
for (let u of tempSet) {
|
2025-01-31 18:30:18 -05:00
|
|
|
users = users.concat(await siteCollector(u, 'followers', site, true));
|
2025-01-31 18:18:05 -05:00
|
|
|
users = users.concat(await siteCollector(u, 'following', site, true));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-01-30 07:59:30 -05:00
|
|
|
let data = {};
|
|
|
|
let p = [];
|
2025-01-30 09:37:33 -05:00
|
|
|
let congested = [];
|
|
|
|
|
2025-01-30 07:59:30 -05:00
|
|
|
for (let u of users) {
|
2025-01-31 18:05:21 -05:00
|
|
|
let inLegal = legal.indexOf(u) != -1;
|
|
|
|
|
2025-01-30 09:37:33 -05:00
|
|
|
let it = 0;
|
2025-01-30 09:42:04 -05:00
|
|
|
while (p.length >= (process.env.maxRate || 15)) {
|
2025-01-30 09:37:33 -05:00
|
|
|
p = p.filter(x => x != 'hi');
|
2025-01-30 09:40:21 -05:00
|
|
|
if (p.length == 0) break;
|
2025-01-30 09:37:33 -05:00
|
|
|
let pv = await Promise.any(p);
|
|
|
|
it++;
|
|
|
|
if (it > 10) {
|
|
|
|
console.warn(`Promises got congested. Moving to another array...`)
|
|
|
|
congested = congested.concat(p);
|
|
|
|
p = [];
|
|
|
|
break;
|
|
|
|
}
|
2025-01-30 07:59:30 -05:00
|
|
|
}
|
2025-01-31 17:39:47 -05:00
|
|
|
data[u] = { followers: [], following: [] };
|
2025-01-30 20:17:43 -05:00
|
|
|
p.push(async function (k) {
|
2025-01-31 18:19:18 -05:00
|
|
|
let j1 = await siteCollector(u, 'followers', site, inLegal);
|
2025-01-30 07:59:30 -05:00
|
|
|
|
2025-01-30 20:17:43 -05:00
|
|
|
data[u].followers = j1;
|
|
|
|
console.log(`User ${u} followers fully calculated`);
|
2025-01-30 07:59:30 -05:00
|
|
|
|
2025-01-30 20:17:43 -05:00
|
|
|
p[k] = 'hi';
|
|
|
|
}(p.length));
|
|
|
|
|
|
|
|
p.push(async function (k) {
|
2025-01-31 18:19:18 -05:00
|
|
|
let j1 = await siteCollector(u, 'following', site, inLegal);
|
2025-01-30 20:17:43 -05:00
|
|
|
|
|
|
|
data[u].following = j1;
|
|
|
|
console.log(`User ${u} following fully calculated`);
|
2025-01-30 12:40:55 -05:00
|
|
|
|
2025-01-30 20:17:43 -05:00
|
|
|
p[k] = 'hi';
|
|
|
|
}(p.length));
|
2025-01-30 07:59:30 -05:00
|
|
|
}
|
|
|
|
|
2025-01-30 09:37:33 -05:00
|
|
|
p = p.concat(congested);
|
2025-01-30 07:59:30 -05:00
|
|
|
await Promise.all(p);
|
|
|
|
|
2025-01-31 20:21:42 -05:00
|
|
|
for (let uf in data) {
|
|
|
|
let u = data[uf];
|
2025-01-31 18:30:42 -05:00
|
|
|
if (!u) continue;
|
2025-01-31 18:30:18 -05:00
|
|
|
let { following, followers } = u;
|
|
|
|
if (!following || !followers) continue;
|
|
|
|
for (let f of followers) {
|
2025-01-31 20:21:42 -05:00
|
|
|
if (!data[f]) data[f] = {followers: []};
|
|
|
|
if (!data[f].following) data[f].following = [];
|
|
|
|
if (data[f].following.indexOf(uf) == -1) {
|
|
|
|
data[f].following.push(uf);
|
2025-01-31 18:30:18 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
for (let f of following) {
|
2025-01-31 20:21:42 -05:00
|
|
|
if (!data[f]) data[f] = {following: []};
|
|
|
|
if (!data[f].followers) data[f].followers = [];
|
|
|
|
if (data[f].followers.indexOf(uf) == -1) {
|
|
|
|
data[f].followers.push(uf);
|
2025-01-31 18:30:18 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-01-31 17:42:18 -05:00
|
|
|
let dat = Object.entries(rankCalc(data, 100, penv));
|
2025-01-30 07:59:30 -05:00
|
|
|
dat = dat.sort((a, b) => a[1] - b[1]);
|
|
|
|
let dat2 = {};
|
|
|
|
for (let d of dat) {
|
|
|
|
dat2[d[0]] = d[1] * 100 + "%";
|
|
|
|
}
|
|
|
|
|
|
|
|
let srz = JSON.stringify(dat2);
|
|
|
|
await writeFile(`./users.json`, srz, 'utf8');
|
|
|
|
})()
|