badguardhome/scripts/whotracksme/index.js

101 lines
2.7 KiB
JavaScript
Raw Normal View History

const fs = require('fs');
const sqlite3 = require('sqlite3').verbose();
2020-01-30 09:28:01 +00:00
const axios = require('axios');
const INPUT_SQL_URL = 'https://raw.githubusercontent.com/cliqz-oss/whotracks.me/master/whotracksme/data/assets/trackerdb.sql';
2018-10-14 20:24:11 +00:00
const OUTPUT_PATH = 'whotracksme.json';
2020-01-30 09:28:01 +00:00
async function runScript() {
console.log('Downloading ' + INPUT_SQL_URL);
let response = await axios.get(INPUT_SQL_URL);
let trackersDbSql = response.data;
2020-01-30 09:28:01 +00:00
let transformToSqlite = function (sql) {
sql = sql.trim();
if (sql.indexOf("CREATE TABLE") >= 0) {
sql = sql.replace(/UNIQUE/g, '');
}
return sql;
}
2020-01-30 09:28:01 +00:00
let whotracksme = {
timeUpdated: new Date().toISOString(),
categories: {},
trackers: {},
trackerDomains: {}
};
2020-01-30 09:28:01 +00:00
console.log('Initializing the in-memory trackers database');
let db = new sqlite3.Database(':memory:');
db.serialize(function () {
trackersDbSql.split(/;\s*$/gm).forEach(function (sql) {
sql = transformToSqlite(sql);
db.run(sql, function () { });
});
2020-01-30 09:28:01 +00:00
db.each("SELECT * FROM categories", function (err, row) {
if (err) {
console.error(err);
return;
}
2020-01-30 09:28:01 +00:00
whotracksme.categories[row.id] = row.name;
});
2020-01-30 15:40:58 +00:00
const companies = {};
db.each("SELECT * FROM companies", function (err, row) {
if (err) {
console.error(err);
return;
}
companies[row.id] = {
"id": row.id,
"name": row.name,
"website_url": row.website_url
};
});
2020-01-30 09:28:01 +00:00
db.each("SELECT * FROM trackers", function (err, row) {
if (err) {
console.error(err);
return;
}
2020-01-30 15:40:58 +00:00
const company = companies[row.company_id];
let url = row.website_url;
if (!url && company) {
url = company.website_url;
}
2020-01-30 09:28:01 +00:00
whotracksme.trackers[row.id] = {
"name": row.name,
"categoryId": row.category_id,
2020-01-30 15:40:58 +00:00
"url": url,
2020-01-30 09:28:01 +00:00
};
});
2020-01-30 09:28:01 +00:00
db.each("SELECT * FROM tracker_domains", function (err, row) {
if (err) {
console.error(err);
return;
}
whotracksme.trackerDomains[row.domain] = row.tracker;
});
});
2020-01-30 09:28:01 +00:00
db.close(function (err) {
if (err) {
console.error(err);
return;
}
2020-01-30 09:28:01 +00:00
fs.writeFileSync(OUTPUT_PATH, JSON.stringify(whotracksme, 0, 4));
console.log('Trackers json file has been updated: ' + OUTPUT_PATH);
});
2020-01-30 09:28:01 +00:00
}
2020-01-30 09:28:01 +00:00
runScript();