From 78b8ea3bb65fefa23b076b199d02a47a99df0cd0 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Sun, 10 Jun 2018 21:04:50 +0200 Subject: [PATCH] Initial import of my dirty script/test to calculate SimHash from MISP attributes and store it in Redis backend. --- README.md | 18 ++++++++++ build_similarities.py | 80 +++++++++++++++++++++++++++++++++++++++++++ keys.py.sample | 6 ++++ 3 files changed, 104 insertions(+) create mode 100644 README.md create mode 100755 build_similarities.py create mode 100644 keys.py.sample diff --git a/README.md b/README.md new file mode 100644 index 0000000..8eee8e0 --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ + +# threatintel-attributes-compare + +A quick-and-dirty test to deduce the appropriate SimHash distance to use with a [MISP](https://github.com/MISP/MISP) dataset (per type). The idea is to analyse existing types and defines a +specific [SimHash](http://www.wwwconference.org/www2007/papers/paper215.pdf) distance depending of the attribute type (such as sigma, yara, text, comment or what ever type supported) in MISP when the correlation engine will support it. + + +# Usage + +~~~~ +python3 build_similarities.py --quiet --type=yara --distance=10 +~~~~ + +# Requirements + +- Redis +- SimHash Python library +- PyMISP diff --git a/build_similarities.py b/build_similarities.py new file mode 100755 index 0000000..31adc92 --- /dev/null +++ b/build_similarities.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# A quick-and-dirty test to deduce the appropriate SimHash distance to use with a +# MISP dataset (per type). The idea is to analyse existing types and defines a +# specific SimHash distance depending of the attribute type (such as sigma, +# yara, text, comment or what ever type supported) in MISP when the correlation +# engine will support it. +# +# Software licensed under the AGPL version 3 or later. +# +# Copyright (C) 2018 Alexandre Dulaunoy - a@foo.be + +from pymisp import PyMISP +from keys import misp_url, misp_key, misp_verifycert +import argparse +import os +from simhash import Simhash +import redis + +r = redis.Redis(host='localhost', port=6380, encoding='utf-8', decode_responses=True) + +def init(url, key): + return PyMISP(url, key, misp_verifycert, 'json') + +def search(m, quiet, url, out=None, custom_type_attribute="text", + default_distance=19, skip=False): + controller = 'attributes' + result = m.search(controller, type_attribute=custom_type_attribute) + +# Redis data structure +# v: (key/value) -> values of a specific uuid +# r: (zrank) -> a set ranked by the SimHash distance of an attribute value +# compared to other attribute value +# all (set) -> all uuid analysed +# all_hits (set) -> all uuid matching the SimHash distance set +# hits (key - counter) -> number of times a distance is matched between two +# attributes values +# missed (key - counter) -> number of times a distance is not matched between +# two attributes values + + for e in result['response']['Attribute']: + r.sadd("all", e['uuid']) + r.set("v:{}".format(e['uuid']), e['value']) + + for att in r.smembers("all"): + if not quiet: + print(att) + if att != e['uuid']: + att_value = r.get("v:{}".format(att)) + distance = Simhash(e['value']).distance(Simhash(att_value)) + if distance > default_distance: + r.incr('missed') + continue + r.incr('hits') + r.sadd('all_hits', e['uuid']) + r.zadd('r:{}'.format(e['uuid']), '{}:{}'.format(e['event_id'], att), distance) + else: + print("don't compare self values") + + + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Get all the attributes in MISP and calculate the SimHash distance') + parser.add_argument("-q", "--quiet", action='store_true', help="Only display URLs to MISP") + parser.add_argument("-s", "--skip", action='store_true', help="Skip duplicate match from same MISP event", default=False) + parser.add_argument("-t", "--type", default='text') + parser.add_argument("-d", "--distance", default=19, type=int) + parser.add_argument("-o", "--output", help="Output file") + + args = parser.parse_args() + + if args.output is not None and os.path.exists(args.output): + print('Output file already exists, abort.') + exit(0) + + misp = init(misp_url, misp_key) + + search(misp, args.quiet, misp_url, args.output, custom_type_attribute=args.type, default_distance=args.distance, skip=args.skip) diff --git a/keys.py.sample b/keys.py.sample new file mode 100644 index 0000000..168b765 --- /dev/null +++ b/keys.py.sample @@ -0,0 +1,6 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +misp_url = 'https:///' +misp_key = 'Your MISP auth key' # The MISP auth key can be found on the MISP web interface under the automation section +misp_verifycert = True