fix this mess

This commit is contained in:
biglyderv 2025-02-03 06:42:01 -05:00
parent a04ca35c19
commit 4210075f37
5 changed files with 896 additions and 37 deletions

21
index.js Normal file
View file

@ -0,0 +1,21 @@
import { main } from "./site.js";
let {
site = 'darflen',
route = './test.txt',
pageLimit = 0.1,
blackList = '',
greyList = '',
discardThreshold = 1,
delay = 100,
depth = 1,
isRelative = false,
fetchRate = 15,
user = 'paradock',
matrixIterations = 3,
useArchive = true,
isGpu = false
} = process.env;
let settings = { site, route, pageLimit, blackList, greyList, discardThreshold, delay, depth, isRelative, fetchRate, user, matrixIterations, useArchive, isGpu };
main(settings);

837
package-lock.json generated

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@
"type": "module", "type": "module",
"dependencies": { "dependencies": {
"cheerio": "^1.0.0", "cheerio": "^1.0.0",
"express": "^4.21.2",
"gpu.js": "^2.15.0" "gpu.js": "^2.15.0"
}, },
"overrides": { "overrides": {

View file

@ -1,7 +1,7 @@
import { GPU, input, Input } from "gpu.js"; import { GPU, input, Input } from "gpu.js";
// derived from https://git.dervland.net/biglyderv/new-bigly-chat/src/branch/master/docs/stats.php // derived from https://git.dervland.net/biglyderv/new-bigly-chat/src/branch/master/docs/stats.php
function rankCalc(result, iterations = 10, main = [], domainMode = false) { function rankCalc(result, iterations = 10, main = [], domainMode = false, isGpu = false) {
let fng = {}; let fng = {};
let fnc = {}; let fnc = {};
@ -90,7 +90,6 @@ function rankCalc(result, iterations = 10, main = [], domainMode = false) {
let mm = (iterations); let mm = (iterations);
let gpu = new GPU(); let gpu = new GPU();
let isGpu = process.env.isGpu;
let multiplyMatrix; let multiplyMatrix;
if (isGpu) { if (isGpu) {

67
site.js
View file

@ -4,19 +4,15 @@ import { rankCalc } from "./rank.js";
import * as cheerio from 'cheerio'; import * as cheerio from 'cheerio';
let cache = {}; let cache = {};
let site = process.env.site || 'darflen';
let route = process.env.route || './test.txt';
let pageLimit = process.env.pageLimit || Infinity;
let blacklist = (process.env.blacklist + '').split(',') || [];
let greylist = (process.env.greylist + '').split(',') || [];
let threshold = process.env.threshold || 100;
let rel = (process.env.rel == 'relative');
let hh; let hh;
let percent = 0; let percent = 0;
let d = process.env.depth || 1;
let noWorry = {}; let noWorry = {};
async function urlCollector(url, path, file, useLimit, data2) { async function urlCollector(url, path, file, useLimit, data2, settings) {
let { pageLimit } = settings;
let blacklist = (settings.blacklist + '').split(',');
let greyList = (settings.greyList + '').split(',');
let urls = data2[url] ? (data2[url][path] || []) : []; let urls = data2[url] ? (data2[url][path] || []) : [];
urls = [...urls]; urls = [...urls];
if (path != 'following') return urls; if (path != 'following') return urls;
@ -66,8 +62,8 @@ async function urlCollector(url, path, file, useLimit, data2) {
} }
if (!h2) return true; if (!h2) return true;
if (rel && h2.host != new URL(url).host) return true; if (settings.isRelative == 'relative' && h2.host != new URL(url).host) return true;
for (let g of greylist) { for (let g of greyList) {
if (h2.toString().includes(g) && g != '') return true; if (h2.toString().includes(g) && g != '') return true;
} }
if (blacklist.indexOf(h2.toString()) != -1) return true; if (blacklist.indexOf(h2.toString()) != -1) return true;
@ -96,20 +92,21 @@ async function urlCollector(url, path, file, useLimit, data2) {
return data2[url][path]; return data2[url][path];
} }
async function rounder(users, data, mode) { async function rounder(users, data, mode, settings) {
let p = []; let p = [];
let congested = []; let congested = [];
let timeout = false; let timeout = false;
let { delay, fetchRate, depth } = settings;
let pr = new Promise(resolve => setTimeout(function (...ag) { let pr = new Promise(resolve => setTimeout(function (...ag) {
timeout = true; timeout = true;
resolve(ag); resolve(ag);
}, (process.env.delay * 1000) || (60 * 1000))) }, delay))
let ul = 0; let ul = 0;
for (let u of users) { for (let u of users) {
let it = 0; let it = 0;
while (p.length >= (process.env.maxRate || 15)) { while (p.length >= (fetchRate)) {
p = p.filter(x => x != 'hi'); p = p.filter(x => x != 'hi');
if (p.length == 0) break; if (p.length == 0) break;
let pv = await Promise.any([...p, pr]); let pv = await Promise.any([...p, pr]);
@ -130,23 +127,23 @@ async function rounder(users, data, mode) {
if (!data[u]) data[u] = { followers: [], following: [] }; if (!data[u]) data[u] = { followers: [], following: [] };
if (noWorry[u]) { if (noWorry[u]) {
percent += 50 / d / users.length; percent += 50 / depth / users.length;
console.log(`User ${u} followers was already fully calculated (${percent}% total)`); console.log(`User ${u} followers was already fully calculated (${percent}% total)`);
continue; continue;
} }
p.push(async function (k) { p.push(async function (k) {
await siteCollector(u, 'followers', site, mode, data); await siteCollector(u, 'followers', settings.site, mode, data, settings);
percent += 50 / d / users.length; percent += 50 / depth / users.length;
console.log(`User ${u} followers is fully calculated (${percent}% total)`); console.log(`User ${u} followers is fully calculated (${percent}% total)`);
p[k] = 'hi'; p[k] = 'hi';
}(p.length)); }(p.length));
p.push(async function (k) { p.push(async function (k) {
await siteCollector(u, 'following', site, mode, data); await siteCollector(u, 'following', settings.site, mode, data, settings);
percent += 50 / d / users.length; percent += 50 / depth / users.length;
console.log(`User ${u} following is fully calculated (${percent}% total)`); console.log(`User ${u} following is fully calculated (${percent}% total)`);
p[k] = 'hi'; p[k] = 'hi';
@ -188,7 +185,8 @@ async function textCollector(word, path, file, data2) {
return words; return words;
} }
async function siteCollector(user, path, site, useLimit, data2) { async function siteCollector(user, path, site, useLimit, data2, settings) {
let { route, pageLimit } = settings;
let users = []; let users = [];
let urls = data2[user] ? (data2[user][path] || []) : []; let urls = data2[user] ? (data2[user][path] || []) : [];
let ul = urls.length; let ul = urls.length;
@ -200,7 +198,7 @@ async function siteCollector(user, path, site, useLimit, data2) {
} }
if (site == 'url') { if (site == 'url') {
return await urlCollector(user, path, route, useLimit, data2); return await urlCollector(user, path, route, useLimit, data2, settings);
} }
while (true) { while (true) {
@ -256,9 +254,9 @@ async function siteCollector(user, path, site, useLimit, data2) {
return out; return out;
} }
(async function () { async function main(settings) {
let penv = process.env.user || 'paradock'; let { site, discardThreshold, depth, user, matrixIterations, useArchive, } = settings;
penv = penv.split(','); user = user.split(',');
let users = []; let users = [];
let data = {}; let data = {};
@ -270,12 +268,12 @@ async function siteCollector(user, path, site, useLimit, data2) {
data = {}; data = {};
} }
users = await rounder(penv, data, false); users = await rounder(user, data, false, settings);
users = [...new Set(users)]; users = [...new Set(users)];
let dat; let dat;
for (let i = 0; i < d; i++) { for (let i = 0; i < depth; i++) {
if (i != 0) { if (i != 0) {
let tempSet = dat.map(x => x[0]); let tempSet = dat.map(x => x[0]);
let kk = Object.keys(data); let kk = Object.keys(data);
@ -283,11 +281,11 @@ async function siteCollector(user, path, site, useLimit, data2) {
console.log(kk) console.log(kk)
let oldLength = kk.length; let oldLength = kk.length;
let theData = {}; let theData = {};
for (let a = 0; a < oldLength * threshold && a < oldLength; a++) { for (let a = 0; a < oldLength * discardThreshold && a < oldLength; a++) {
let key = kk[a]; let key = kk[a];
theData[key] = data[key]; theData[key] = data[key];
} }
users = tempSet.concat(await rounder(tempSet, theData, true)); users = tempSet.concat(await rounder(tempSet, theData, true, settings));
users = [...new Set(users)]; users = [...new Set(users)];
} }
for (let uf of users) { for (let uf of users) {
@ -325,7 +323,8 @@ async function siteCollector(user, path, site, useLimit, data2) {
} }
} }
console.log(`Graph is fully repaired`); console.log(`Graph is fully repaired`);
dat = Object.entries(rankCalc(data, (i == d - 1) ? process.env.matrixIterations : 3, penv, site == 'url')); let calcedRank = rankCalc(data, (i == depth - 1) ? matrixIterations : 3, user, site == 'url', settings.isGpu)
dat = Object.entries(calcedRank);
dat = dat.sort((a, b) => b[1] - a[1]); dat = dat.sort((a, b) => b[1] - a[1]);
console.log(`Graph is calculated with ${dat.length} entries`); console.log(`Graph is calculated with ${dat.length} entries`);
let dat2 = {}; let dat2 = {};
@ -334,11 +333,11 @@ async function siteCollector(user, path, site, useLimit, data2) {
} }
let srz = JSON.stringify(dat2); let srz = JSON.stringify(dat2);
let ff = `./users_${i}_${btoa(penv[0])}_${+new Date()}.json`; let ff = `./users_${i}_${btoa(user[0])}_${+new Date()}.json`;
await writeFile(ff, srz, 'utf8'); await writeFile(ff, srz, 'utf8');
console.log(`Temporary file ${ff} is written`); console.log(`Temporary file ${ff} is written`);
if (process.env.useArchive) { if (useArchive == 'use') {
ff = `./net_${btoa(penv[0])}.json`; ff = `./net_${btoa(user[0])}.json`;
await writeFile(ff, JSON.stringify(data), 'utf8'); await writeFile(ff, JSON.stringify(data), 'utf8');
console.log(`Temporary file ${ff} is written`); console.log(`Temporary file ${ff} is written`);
} }
@ -346,4 +345,6 @@ async function siteCollector(user, path, site, useLimit, data2) {
console.log(`Graph is complete (${Object.keys(users).length} entries)`); console.log(`Graph is complete (${Object.keys(users).length} entries)`);
})() };
export { main };