* : query log anonymization tool

This commit is contained in:
Andrey Meshkov 2020-09-18 14:42:04 +03:00
parent 38066151a6
commit f694a40c57
6 changed files with 172 additions and 0 deletions

2
scripts/querylog/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
node_modules
test/anonquerylog.json

View File

@ -0,0 +1,10 @@
# Helper tools to work with the Query log
### Usage
- `npm install` - Install the dependencies
- `npm run anonymize <source> <dst>` - Reads querylog from the `<source>` and writes anonymized version to `<dst>`
### Examples
- `npm run anonymize test/querylog.json test/anonquerylog.json` - anonymizes the `test/querylog.json`.

View File

@ -0,0 +1,124 @@
const fs = require('fs');
const readline = require('readline');
const dnsPacket = require('dns-packet')
const decodeBase64 = (data) => {
let buff = new Buffer(data, 'base64');
return buff.toString('ascii');
}
const processLineByLine = async (source, callback) => {
const fileStream = fs.createReadStream(source);
const rl = readline.createInterface({
input: fileStream,
crlfDelay: Infinity
});
for await (const line of rl) {
await callback(line);
}
}
const anonDomain = (domain) => {
// Replace all question domain letters with a
return domain.replace(/[a-z]/g, 'a');
}
const anonIP = (ip) => {
// Replace all numbers with '1'
return ip.replace(/[0-9]/g, '1');
}
const anonAnswer = (answer) => {
const answerData = Buffer.from(answer, 'base64');
const packet = dnsPacket.decode(answerData, 0);
packet.questions.forEach((q) => {
q.name = anonDomain(q.name);
});
packet.answers.forEach((q) => {
q.name = anonDomain(q.name);
if (q.type === 'A' || q.type === 'AAAA') {
q.data = anonIP(q.data);
} else if (typeof q.data === 'string') {
q.data = anonDomain(q.data);
}
});
const anonData = dnsPacket.encode(packet);
return anonData.toString('base64');
}
const anonLine = (line) => {
if (!line) {
return null;
}
try {
const logItem = JSON.parse(line);
// Replace all numbers with '1'
logItem['IP'] = logItem['IP'].replace(/[0-9]/g, '1');
// Replace all question domain letters with a
logItem['QH'] = logItem['QH'].replace(/[a-z]/g, 'a');
// Anonymize "Answer" and "OrigAnswer" fields
if (logItem['Answer']) {
logItem['Answer'] = anonAnswer(logItem['Answer']);
}
if (logItem['OrigAnswer']) {
logItem['OrigAnswer'] = anonAnswer(logItem['OrigAnswer']);
}
// If Result is set, anonymize the "Rule" field
if (logItem['Result'] && logItem['Result']['Rule']) {
logItem['Result']['Rule'] = anonDomain(logItem['Result']['Rule']);
}
return JSON.stringify(logItem);
} catch (ex) {
console.error(`Failed to parse ${line}: ${ex} ${ex.stack}`);
return null;
}
}
const anon = async (source, dest) => {
const out = fs.createWriteStream(dest, {
flags: 'w',
});
await processLineByLine(source, async (line) => {
const newLine = anonLine(line);
if (!newLine) {
return;
}
out.write(`${newLine}\n`);
});
}
const main = async () => {
console.log('Start query log anonymization');
const source = process.argv[2];
const dest = process.argv[3];
console.log(`Source: ${source}`);
console.log(`Destination: ${dest}`);
if (!fs.existsSync(source)) {
throw new Error(`${source} not found`);
}
try {
await anon(source, dest);
} catch (ex) {
console.error(ex);
}
console.log('Finished query log anonymization')
}
main();

21
scripts/querylog/package-lock.json generated Normal file
View File

@ -0,0 +1,21 @@
{
"name": "querylog",
"version": "0.1.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
"dns-packet": {
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/dns-packet/-/dns-packet-5.2.1.tgz",
"integrity": "sha512-JHj2yJeKOqlxzeuYpN1d56GfhzivAxavNwHj9co3qptECel27B1rLY5PifJAvubsInX5pGLDjAHuCfCUc2Zv/w==",
"requires": {
"ip": "^1.1.5"
}
},
"ip": {
"version": "1.1.5",
"resolved": "https://registry.npmjs.org/ip/-/ip-1.1.5.tgz",
"integrity": "sha1-vd7XARQpCCjAoDnnLvJfWq7ENUo="
}
}
}

View File

@ -0,0 +1,10 @@
{
"name": "querylog",
"version": "0.1.0",
"scripts": {
"anonymize": "node anonymize.js"
},
"dependencies": {
"dns-packet": "^5.2.1"
}
}

View File

@ -0,0 +1,5 @@
{"IP":"192.168.0.0","T":"2020-08-31T16:43:37.724457416+03:00","QH":"mtalk.google.com","QT":"A","QC":"IN","CP":"","Answer":"rm+BgAABAAIAAAAABW10YWxrBmdvb2dsZQNjb20AAAEAAcAMAAUAAQAAnwUAEQxtb2JpbGUtZ3RhbGsBbMASwC4AAQABAAAAWQAEjvobvA==","Result":{},"Elapsed":48051030,"Upstream":"tls://dns-unfiltered.adguard.com:853"}
{"IP":"127.0.0.1","T":"2020-09-09T13:56:35.532956+03:00","QH":"example.org","QT":"AAAA","QC":"IN","CP":"","Answer":"mrOBgAABAAEAAAAAB2V4YW1wbGUDb3JnAAAcAAHADAAcAAEAAKjAABAmBigAAiAAAQJIGJMlyBlG","Result":{},"Elapsed":132164793,"Upstream":"https://dns10.quad9.net:443/dns-query"}
{"IP":"127.0.0.1","T":"2020-09-09T13:56:54.255453+03:00","QH":"ad.doubleclick.net","QT":"A","QC":"IN","CP":"","Answer":"wqmBgAABAAIAAAAAAmFkC2RvdWJsZWNsaWNrA25ldAAAAQABwAwABQABAACTawAJBGRhcnQBbMAPwDAAAQABAAAA5gAErNkQhg==","Result":{},"Elapsed":48131793,"Upstream":"https://dns10.quad9.net:443/dns-query"}
{"IP":"127.0.0.1","T":"2020-09-09T13:57:07.495948+03:00","QH":"ad.doubleclick.net","QT":"A","QC":"IN","CP":"","Answer":"JP2BhQABAAAAAAAAAmFkC2RvdWJsZWNsaWNrA25ldAAAAQAB","Result":{"IsFiltered":true,"Reason":3,"Rule":"||ad.doubleclick.net^","FilterID":1},"Elapsed":369806}
{"IP":"192.168.0.15","T":"2020-01-17T17:39:40.306375885+03:00","QH":"push.apple.com","QT":"TXT","QC":"IN","Answer":"8AWBgAABAAEAAAABBHB1c2gFYXBwbGUDY29tAAAQAAHADAAQAAEAABOsAAkIY291bnQ9NTAAACkFrAAAAAAAQAAMADwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=","Result":{},"Elapsed":30271893,"Upstream":"https://cloudflare-dns.com:443/dns-query"}