fix this mess
This commit is contained in:
parent
a04ca35c19
commit
4210075f37
5 changed files with 896 additions and 37 deletions
21
index.js
Normal file
21
index.js
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
import { main } from "./site.js";
|
||||||
|
|
||||||
|
let {
|
||||||
|
site = 'darflen',
|
||||||
|
route = './test.txt',
|
||||||
|
pageLimit = 0.1,
|
||||||
|
blackList = '',
|
||||||
|
greyList = '',
|
||||||
|
discardThreshold = 1,
|
||||||
|
delay = 100,
|
||||||
|
depth = 1,
|
||||||
|
isRelative = false,
|
||||||
|
fetchRate = 15,
|
||||||
|
user = 'paradock',
|
||||||
|
matrixIterations = 3,
|
||||||
|
useArchive = true,
|
||||||
|
isGpu = false
|
||||||
|
} = process.env;
|
||||||
|
let settings = { site, route, pageLimit, blackList, greyList, discardThreshold, delay, depth, isRelative, fetchRate, user, matrixIterations, useArchive, isGpu };
|
||||||
|
|
||||||
|
main(settings);
|
837
package-lock.json
generated
837
package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
@ -2,6 +2,7 @@
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"cheerio": "^1.0.0",
|
"cheerio": "^1.0.0",
|
||||||
|
"express": "^4.21.2",
|
||||||
"gpu.js": "^2.15.0"
|
"gpu.js": "^2.15.0"
|
||||||
},
|
},
|
||||||
"overrides": {
|
"overrides": {
|
||||||
|
|
3
rank.js
3
rank.js
|
@ -1,7 +1,7 @@
|
||||||
import { GPU, input, Input } from "gpu.js";
|
import { GPU, input, Input } from "gpu.js";
|
||||||
|
|
||||||
// derived from https://git.dervland.net/biglyderv/new-bigly-chat/src/branch/master/docs/stats.php
|
// derived from https://git.dervland.net/biglyderv/new-bigly-chat/src/branch/master/docs/stats.php
|
||||||
function rankCalc(result, iterations = 10, main = [], domainMode = false) {
|
function rankCalc(result, iterations = 10, main = [], domainMode = false, isGpu = false) {
|
||||||
|
|
||||||
let fng = {};
|
let fng = {};
|
||||||
let fnc = {};
|
let fnc = {};
|
||||||
|
@ -90,7 +90,6 @@ function rankCalc(result, iterations = 10, main = [], domainMode = false) {
|
||||||
let mm = (iterations);
|
let mm = (iterations);
|
||||||
|
|
||||||
let gpu = new GPU();
|
let gpu = new GPU();
|
||||||
let isGpu = process.env.isGpu;
|
|
||||||
let multiplyMatrix;
|
let multiplyMatrix;
|
||||||
|
|
||||||
if (isGpu) {
|
if (isGpu) {
|
||||||
|
|
69
site.js
69
site.js
|
@ -4,19 +4,15 @@ import { rankCalc } from "./rank.js";
|
||||||
import * as cheerio from 'cheerio';
|
import * as cheerio from 'cheerio';
|
||||||
|
|
||||||
let cache = {};
|
let cache = {};
|
||||||
let site = process.env.site || 'darflen';
|
|
||||||
let route = process.env.route || './test.txt';
|
|
||||||
let pageLimit = process.env.pageLimit || Infinity;
|
|
||||||
let blacklist = (process.env.blacklist + '').split(',') || [];
|
|
||||||
let greylist = (process.env.greylist + '').split(',') || [];
|
|
||||||
let threshold = process.env.threshold || 100;
|
|
||||||
let rel = (process.env.rel == 'relative');
|
|
||||||
let hh;
|
let hh;
|
||||||
let percent = 0;
|
let percent = 0;
|
||||||
let d = process.env.depth || 1;
|
|
||||||
let noWorry = {};
|
let noWorry = {};
|
||||||
|
|
||||||
async function urlCollector(url, path, file, useLimit, data2) {
|
async function urlCollector(url, path, file, useLimit, data2, settings) {
|
||||||
|
let { pageLimit } = settings;
|
||||||
|
let blacklist = (settings.blacklist + '').split(',');
|
||||||
|
let greyList = (settings.greyList + '').split(',');
|
||||||
|
|
||||||
let urls = data2[url] ? (data2[url][path] || []) : [];
|
let urls = data2[url] ? (data2[url][path] || []) : [];
|
||||||
urls = [...urls];
|
urls = [...urls];
|
||||||
if (path != 'following') return urls;
|
if (path != 'following') return urls;
|
||||||
|
@ -45,7 +41,7 @@ async function urlCollector(url, path, file, useLimit, data2) {
|
||||||
let h = body(link).attr('href');
|
let h = body(link).attr('href');
|
||||||
if (!h) return true;
|
if (!h) return true;
|
||||||
h = h.trim();
|
h = h.trim();
|
||||||
if (h.startsWith('./') || h.startsWith('../') || h.startsWith('/')) {
|
if (h.startsWith('./') || h.startsWith('../') || h.startsWith('/')) {
|
||||||
let u = new URL(url);
|
let u = new URL(url);
|
||||||
u.pathname = h;
|
u.pathname = h;
|
||||||
h = u.toString();
|
h = u.toString();
|
||||||
|
@ -66,8 +62,8 @@ async function urlCollector(url, path, file, useLimit, data2) {
|
||||||
|
|
||||||
}
|
}
|
||||||
if (!h2) return true;
|
if (!h2) return true;
|
||||||
if (rel && h2.host != new URL(url).host) return true;
|
if (settings.isRelative == 'relative' && h2.host != new URL(url).host) return true;
|
||||||
for (let g of greylist) {
|
for (let g of greyList) {
|
||||||
if (h2.toString().includes(g) && g != '') return true;
|
if (h2.toString().includes(g) && g != '') return true;
|
||||||
}
|
}
|
||||||
if (blacklist.indexOf(h2.toString()) != -1) return true;
|
if (blacklist.indexOf(h2.toString()) != -1) return true;
|
||||||
|
@ -96,20 +92,21 @@ async function urlCollector(url, path, file, useLimit, data2) {
|
||||||
return data2[url][path];
|
return data2[url][path];
|
||||||
}
|
}
|
||||||
|
|
||||||
async function rounder(users, data, mode) {
|
async function rounder(users, data, mode, settings) {
|
||||||
let p = [];
|
let p = [];
|
||||||
let congested = [];
|
let congested = [];
|
||||||
let timeout = false;
|
let timeout = false;
|
||||||
|
let { delay, fetchRate, depth } = settings;
|
||||||
|
|
||||||
let pr = new Promise(resolve => setTimeout(function (...ag) {
|
let pr = new Promise(resolve => setTimeout(function (...ag) {
|
||||||
timeout = true;
|
timeout = true;
|
||||||
resolve(ag);
|
resolve(ag);
|
||||||
}, (process.env.delay * 1000) || (60 * 1000)))
|
}, delay))
|
||||||
|
|
||||||
let ul = 0;
|
let ul = 0;
|
||||||
for (let u of users) {
|
for (let u of users) {
|
||||||
let it = 0;
|
let it = 0;
|
||||||
while (p.length >= (process.env.maxRate || 15)) {
|
while (p.length >= (fetchRate)) {
|
||||||
p = p.filter(x => x != 'hi');
|
p = p.filter(x => x != 'hi');
|
||||||
if (p.length == 0) break;
|
if (p.length == 0) break;
|
||||||
let pv = await Promise.any([...p, pr]);
|
let pv = await Promise.any([...p, pr]);
|
||||||
|
@ -130,23 +127,23 @@ async function rounder(users, data, mode) {
|
||||||
if (!data[u]) data[u] = { followers: [], following: [] };
|
if (!data[u]) data[u] = { followers: [], following: [] };
|
||||||
if (noWorry[u]) {
|
if (noWorry[u]) {
|
||||||
|
|
||||||
percent += 50 / d / users.length;
|
percent += 50 / depth / users.length;
|
||||||
console.log(`User ${u} followers was already fully calculated (${percent}% total)`);
|
console.log(`User ${u} followers was already fully calculated (${percent}% total)`);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
p.push(async function (k) {
|
p.push(async function (k) {
|
||||||
await siteCollector(u, 'followers', site, mode, data);
|
await siteCollector(u, 'followers', settings.site, mode, data, settings);
|
||||||
|
|
||||||
percent += 50 / d / users.length;
|
percent += 50 / depth / users.length;
|
||||||
console.log(`User ${u} followers is fully calculated (${percent}% total)`);
|
console.log(`User ${u} followers is fully calculated (${percent}% total)`);
|
||||||
|
|
||||||
p[k] = 'hi';
|
p[k] = 'hi';
|
||||||
}(p.length));
|
}(p.length));
|
||||||
|
|
||||||
p.push(async function (k) {
|
p.push(async function (k) {
|
||||||
await siteCollector(u, 'following', site, mode, data);
|
await siteCollector(u, 'following', settings.site, mode, data, settings);
|
||||||
|
|
||||||
percent += 50 / d / users.length;
|
percent += 50 / depth / users.length;
|
||||||
console.log(`User ${u} following is fully calculated (${percent}% total)`);
|
console.log(`User ${u} following is fully calculated (${percent}% total)`);
|
||||||
|
|
||||||
p[k] = 'hi';
|
p[k] = 'hi';
|
||||||
|
@ -188,7 +185,8 @@ async function textCollector(word, path, file, data2) {
|
||||||
return words;
|
return words;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function siteCollector(user, path, site, useLimit, data2) {
|
async function siteCollector(user, path, site, useLimit, data2, settings) {
|
||||||
|
let { route, pageLimit } = settings;
|
||||||
let users = [];
|
let users = [];
|
||||||
let urls = data2[user] ? (data2[user][path] || []) : [];
|
let urls = data2[user] ? (data2[user][path] || []) : [];
|
||||||
let ul = urls.length;
|
let ul = urls.length;
|
||||||
|
@ -200,7 +198,7 @@ async function siteCollector(user, path, site, useLimit, data2) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (site == 'url') {
|
if (site == 'url') {
|
||||||
return await urlCollector(user, path, route, useLimit, data2);
|
return await urlCollector(user, path, route, useLimit, data2, settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
|
@ -256,9 +254,9 @@ async function siteCollector(user, path, site, useLimit, data2) {
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
(async function () {
|
async function main(settings) {
|
||||||
let penv = process.env.user || 'paradock';
|
let { site, discardThreshold, depth, user, matrixIterations, useArchive, } = settings;
|
||||||
penv = penv.split(',');
|
user = user.split(',');
|
||||||
|
|
||||||
let users = [];
|
let users = [];
|
||||||
let data = {};
|
let data = {};
|
||||||
|
@ -270,12 +268,12 @@ async function siteCollector(user, path, site, useLimit, data2) {
|
||||||
data = {};
|
data = {};
|
||||||
}
|
}
|
||||||
|
|
||||||
users = await rounder(penv, data, false);
|
users = await rounder(user, data, false, settings);
|
||||||
|
|
||||||
users = [...new Set(users)];
|
users = [...new Set(users)];
|
||||||
|
|
||||||
let dat;
|
let dat;
|
||||||
for (let i = 0; i < d; i++) {
|
for (let i = 0; i < depth; i++) {
|
||||||
if (i != 0) {
|
if (i != 0) {
|
||||||
let tempSet = dat.map(x => x[0]);
|
let tempSet = dat.map(x => x[0]);
|
||||||
let kk = Object.keys(data);
|
let kk = Object.keys(data);
|
||||||
|
@ -283,11 +281,11 @@ async function siteCollector(user, path, site, useLimit, data2) {
|
||||||
console.log(kk)
|
console.log(kk)
|
||||||
let oldLength = kk.length;
|
let oldLength = kk.length;
|
||||||
let theData = {};
|
let theData = {};
|
||||||
for (let a = 0; a < oldLength * threshold && a < oldLength; a++) {
|
for (let a = 0; a < oldLength * discardThreshold && a < oldLength; a++) {
|
||||||
let key = kk[a];
|
let key = kk[a];
|
||||||
theData[key] = data[key];
|
theData[key] = data[key];
|
||||||
}
|
}
|
||||||
users = tempSet.concat(await rounder(tempSet, theData, true));
|
users = tempSet.concat(await rounder(tempSet, theData, true, settings));
|
||||||
users = [...new Set(users)];
|
users = [...new Set(users)];
|
||||||
}
|
}
|
||||||
for (let uf of users) {
|
for (let uf of users) {
|
||||||
|
@ -325,7 +323,8 @@ async function siteCollector(user, path, site, useLimit, data2) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
console.log(`Graph is fully repaired`);
|
console.log(`Graph is fully repaired`);
|
||||||
dat = Object.entries(rankCalc(data, (i == d - 1) ? process.env.matrixIterations : 3, penv, site == 'url'));
|
let calcedRank = rankCalc(data, (i == depth - 1) ? matrixIterations : 3, user, site == 'url', settings.isGpu)
|
||||||
|
dat = Object.entries(calcedRank);
|
||||||
dat = dat.sort((a, b) => b[1] - a[1]);
|
dat = dat.sort((a, b) => b[1] - a[1]);
|
||||||
console.log(`Graph is calculated with ${dat.length} entries`);
|
console.log(`Graph is calculated with ${dat.length} entries`);
|
||||||
let dat2 = {};
|
let dat2 = {};
|
||||||
|
@ -334,11 +333,11 @@ async function siteCollector(user, path, site, useLimit, data2) {
|
||||||
}
|
}
|
||||||
|
|
||||||
let srz = JSON.stringify(dat2);
|
let srz = JSON.stringify(dat2);
|
||||||
let ff = `./users_${i}_${btoa(penv[0])}_${+new Date()}.json`;
|
let ff = `./users_${i}_${btoa(user[0])}_${+new Date()}.json`;
|
||||||
await writeFile(ff, srz, 'utf8');
|
await writeFile(ff, srz, 'utf8');
|
||||||
console.log(`Temporary file ${ff} is written`);
|
console.log(`Temporary file ${ff} is written`);
|
||||||
if (process.env.useArchive) {
|
if (useArchive == 'use') {
|
||||||
ff = `./net_${btoa(penv[0])}.json`;
|
ff = `./net_${btoa(user[0])}.json`;
|
||||||
await writeFile(ff, JSON.stringify(data), 'utf8');
|
await writeFile(ff, JSON.stringify(data), 'utf8');
|
||||||
console.log(`Temporary file ${ff} is written`);
|
console.log(`Temporary file ${ff} is written`);
|
||||||
}
|
}
|
||||||
|
@ -346,4 +345,6 @@ async function siteCollector(user, path, site, useLimit, data2) {
|
||||||
|
|
||||||
console.log(`Graph is complete (${Object.keys(users).length} entries)`);
|
console.log(`Graph is complete (${Object.keys(users).length} entries)`);
|
||||||
|
|
||||||
})()
|
};
|
||||||
|
|
||||||
|
export { main };
|
Loading…
Reference in a new issue