* : query log anonymization tool
This commit is contained in:
parent
38066151a6
commit
f694a40c57
|
@ -0,0 +1,2 @@
|
||||||
|
node_modules
|
||||||
|
test/anonquerylog.json
|
|
@ -0,0 +1,10 @@
|
||||||
|
# Helper tools to work with the Query log
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
- `npm install` - Install the dependencies
|
||||||
|
- `npm run anonymize <source> <dst>` - Reads querylog from the `<source>` and writes anonymized version to `<dst>`
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
|
||||||
|
- `npm run anonymize test/querylog.json test/anonquerylog.json` - anonymizes the `test/querylog.json`.
|
|
@ -0,0 +1,124 @@
|
||||||
|
const fs = require('fs');
|
||||||
|
const readline = require('readline');
|
||||||
|
const dnsPacket = require('dns-packet')
|
||||||
|
|
||||||
|
const decodeBase64 = (data) => {
|
||||||
|
let buff = new Buffer(data, 'base64');
|
||||||
|
return buff.toString('ascii');
|
||||||
|
}
|
||||||
|
|
||||||
|
const processLineByLine = async (source, callback) => {
|
||||||
|
const fileStream = fs.createReadStream(source);
|
||||||
|
|
||||||
|
const rl = readline.createInterface({
|
||||||
|
input: fileStream,
|
||||||
|
crlfDelay: Infinity
|
||||||
|
});
|
||||||
|
|
||||||
|
for await (const line of rl) {
|
||||||
|
await callback(line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const anonDomain = (domain) => {
|
||||||
|
// Replace all question domain letters with a
|
||||||
|
return domain.replace(/[a-z]/g, 'a');
|
||||||
|
}
|
||||||
|
|
||||||
|
const anonIP = (ip) => {
|
||||||
|
// Replace all numbers with '1'
|
||||||
|
return ip.replace(/[0-9]/g, '1');
|
||||||
|
}
|
||||||
|
|
||||||
|
const anonAnswer = (answer) => {
|
||||||
|
const answerData = Buffer.from(answer, 'base64');
|
||||||
|
const packet = dnsPacket.decode(answerData, 0);
|
||||||
|
|
||||||
|
packet.questions.forEach((q) => {
|
||||||
|
q.name = anonDomain(q.name);
|
||||||
|
});
|
||||||
|
packet.answers.forEach((q) => {
|
||||||
|
q.name = anonDomain(q.name);
|
||||||
|
|
||||||
|
if (q.type === 'A' || q.type === 'AAAA') {
|
||||||
|
q.data = anonIP(q.data);
|
||||||
|
} else if (typeof q.data === 'string') {
|
||||||
|
q.data = anonDomain(q.data);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const anonData = dnsPacket.encode(packet);
|
||||||
|
return anonData.toString('base64');
|
||||||
|
}
|
||||||
|
|
||||||
|
const anonLine = (line) => {
|
||||||
|
if (!line) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const logItem = JSON.parse(line);
|
||||||
|
|
||||||
|
// Replace all numbers with '1'
|
||||||
|
logItem['IP'] = logItem['IP'].replace(/[0-9]/g, '1');
|
||||||
|
// Replace all question domain letters with a
|
||||||
|
logItem['QH'] = logItem['QH'].replace(/[a-z]/g, 'a');
|
||||||
|
// Anonymize "Answer" and "OrigAnswer" fields
|
||||||
|
if (logItem['Answer']) {
|
||||||
|
logItem['Answer'] = anonAnswer(logItem['Answer']);
|
||||||
|
}
|
||||||
|
if (logItem['OrigAnswer']) {
|
||||||
|
logItem['OrigAnswer'] = anonAnswer(logItem['OrigAnswer']);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If Result is set, anonymize the "Rule" field
|
||||||
|
if (logItem['Result'] && logItem['Result']['Rule']) {
|
||||||
|
logItem['Result']['Rule'] = anonDomain(logItem['Result']['Rule']);
|
||||||
|
}
|
||||||
|
|
||||||
|
return JSON.stringify(logItem);
|
||||||
|
} catch (ex) {
|
||||||
|
console.error(`Failed to parse ${line}: ${ex} ${ex.stack}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const anon = async (source, dest) => {
|
||||||
|
const out = fs.createWriteStream(dest, {
|
||||||
|
flags: 'w',
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
await processLineByLine(source, async (line) => {
|
||||||
|
const newLine = anonLine(line);
|
||||||
|
if (!newLine) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
out.write(`${newLine}\n`);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const main = async () => {
|
||||||
|
console.log('Start query log anonymization');
|
||||||
|
|
||||||
|
const source = process.argv[2];
|
||||||
|
const dest = process.argv[3];
|
||||||
|
|
||||||
|
console.log(`Source: ${source}`);
|
||||||
|
console.log(`Destination: ${dest}`);
|
||||||
|
|
||||||
|
if (!fs.existsSync(source)) {
|
||||||
|
throw new Error(`${source} not found`);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
await anon(source, dest);
|
||||||
|
} catch (ex) {
|
||||||
|
console.error(ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('Finished query log anonymization')
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
{
|
||||||
|
"name": "querylog",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"lockfileVersion": 1,
|
||||||
|
"requires": true,
|
||||||
|
"dependencies": {
|
||||||
|
"dns-packet": {
|
||||||
|
"version": "5.2.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/dns-packet/-/dns-packet-5.2.1.tgz",
|
||||||
|
"integrity": "sha512-JHj2yJeKOqlxzeuYpN1d56GfhzivAxavNwHj9co3qptECel27B1rLY5PifJAvubsInX5pGLDjAHuCfCUc2Zv/w==",
|
||||||
|
"requires": {
|
||||||
|
"ip": "^1.1.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ip": {
|
||||||
|
"version": "1.1.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/ip/-/ip-1.1.5.tgz",
|
||||||
|
"integrity": "sha1-vd7XARQpCCjAoDnnLvJfWq7ENUo="
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,10 @@
|
||||||
|
{
|
||||||
|
"name": "querylog",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"scripts": {
|
||||||
|
"anonymize": "node anonymize.js"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"dns-packet": "^5.2.1"
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,5 @@
|
||||||
|
{"IP":"192.168.0.0","T":"2020-08-31T16:43:37.724457416+03:00","QH":"mtalk.google.com","QT":"A","QC":"IN","CP":"","Answer":"rm+BgAABAAIAAAAABW10YWxrBmdvb2dsZQNjb20AAAEAAcAMAAUAAQAAnwUAEQxtb2JpbGUtZ3RhbGsBbMASwC4AAQABAAAAWQAEjvobvA==","Result":{},"Elapsed":48051030,"Upstream":"tls://dns-unfiltered.adguard.com:853"}
|
||||||
|
{"IP":"127.0.0.1","T":"2020-09-09T13:56:35.532956+03:00","QH":"example.org","QT":"AAAA","QC":"IN","CP":"","Answer":"mrOBgAABAAEAAAAAB2V4YW1wbGUDb3JnAAAcAAHADAAcAAEAAKjAABAmBigAAiAAAQJIGJMlyBlG","Result":{},"Elapsed":132164793,"Upstream":"https://dns10.quad9.net:443/dns-query"}
|
||||||
|
{"IP":"127.0.0.1","T":"2020-09-09T13:56:54.255453+03:00","QH":"ad.doubleclick.net","QT":"A","QC":"IN","CP":"","Answer":"wqmBgAABAAIAAAAAAmFkC2RvdWJsZWNsaWNrA25ldAAAAQABwAwABQABAACTawAJBGRhcnQBbMAPwDAAAQABAAAA5gAErNkQhg==","Result":{},"Elapsed":48131793,"Upstream":"https://dns10.quad9.net:443/dns-query"}
|
||||||
|
{"IP":"127.0.0.1","T":"2020-09-09T13:57:07.495948+03:00","QH":"ad.doubleclick.net","QT":"A","QC":"IN","CP":"","Answer":"JP2BhQABAAAAAAAAAmFkC2RvdWJsZWNsaWNrA25ldAAAAQAB","Result":{"IsFiltered":true,"Reason":3,"Rule":"||ad.doubleclick.net^","FilterID":1},"Elapsed":369806}
|
||||||
|
{"IP":"192.168.0.15","T":"2020-01-17T17:39:40.306375885+03:00","QH":"push.apple.com","QT":"TXT","QC":"IN","Answer":"8AWBgAABAAEAAAABBHB1c2gFYXBwbGUDY29tAAAQAAHADAAQAAEAABOsAAkIY291bnQ9NTAAACkFrAAAAAAAQAAMADwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=","Result":{},"Elapsed":30271893,"Upstream":"https://cloudflare-dns.com:443/dns-query"}
|
Loading…
Reference in New Issue