threatintel-attributes-compare/build_similarities.py
2018-06-10 21:04:50 +02:00

80 lines
3.1 KiB
Python
Executable file

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# A quick-and-dirty test to deduce the appropriate SimHash distance to use with a
# MISP dataset (per type). The idea is to analyse existing types and defines a
# specific SimHash distance depending of the attribute type (such as sigma,
# yara, text, comment or what ever type supported) in MISP when the correlation
# engine will support it.
#
# Software licensed under the AGPL version 3 or later.
#
# Copyright (C) 2018 Alexandre Dulaunoy - a@foo.be
from pymisp import PyMISP
from keys import misp_url, misp_key, misp_verifycert
import argparse
import os
from simhash import Simhash
import redis
r = redis.Redis(host='localhost', port=6380, encoding='utf-8', decode_responses=True)
def init(url, key):
return PyMISP(url, key, misp_verifycert, 'json')
def search(m, quiet, url, out=None, custom_type_attribute="text",
default_distance=19, skip=False):
controller = 'attributes'
result = m.search(controller, type_attribute=custom_type_attribute)
# Redis data structure
# v: (key/value) -> values of a specific uuid
# r: (zrank) -> a set ranked by the SimHash distance of an attribute value
# compared to other attribute value
# all (set) -> all uuid analysed
# all_hits (set) -> all uuid matching the SimHash distance set
# hits (key - counter) -> number of times a distance is matched between two
# attributes values
# missed (key - counter) -> number of times a distance is not matched between
# two attributes values
for e in result['response']['Attribute']:
r.sadd("all", e['uuid'])
r.set("v:{}".format(e['uuid']), e['value'])
for att in r.smembers("all"):
if not quiet:
print(att)
if att != e['uuid']:
att_value = r.get("v:{}".format(att))
distance = Simhash(e['value']).distance(Simhash(att_value))
if distance > default_distance:
r.incr('missed')
continue
r.incr('hits')
r.sadd('all_hits', e['uuid'])
r.zadd('r:{}'.format(e['uuid']), '{}:{}'.format(e['event_id'], att), distance)
else:
print("don't compare self values")
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Get all the attributes in MISP and calculate the SimHash distance')
parser.add_argument("-q", "--quiet", action='store_true', help="Only display URLs to MISP")
parser.add_argument("-s", "--skip", action='store_true', help="Skip duplicate match from same MISP event", default=False)
parser.add_argument("-t", "--type", default='text')
parser.add_argument("-d", "--distance", default=19, type=int)
parser.add_argument("-o", "--output", help="Output file")
args = parser.parse_args()
if args.output is not None and os.path.exists(args.output):
print('Output file already exists, abort.')
exit(0)
misp = init(misp_url, misp_key)
search(misp, args.quiet, misp_url, args.output, custom_type_attribute=args.type, default_distance=args.distance, skip=args.skip)