mirror of
https://github.com/adulau/threatintel-attributes-compare.git
synced 2024-12-22 00:36:00 +00:00
Initial import of my dirty script/test to calculate SimHash from MISP
attributes and store it in Redis backend.
This commit is contained in:
commit
78b8ea3bb6
3 changed files with 104 additions and 0 deletions
18
README.md
Normal file
18
README.md
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
|
||||||
|
# threatintel-attributes-compare
|
||||||
|
|
||||||
|
A quick-and-dirty test to deduce the appropriate SimHash distance to use with a [MISP](https://github.com/MISP/MISP) dataset (per type). The idea is to analyse existing types and defines a
|
||||||
|
specific [SimHash](http://www.wwwconference.org/www2007/papers/paper215.pdf) distance depending of the attribute type (such as sigma, yara, text, comment or what ever type supported) in MISP when the correlation engine will support it.
|
||||||
|
|
||||||
|
|
||||||
|
# Usage
|
||||||
|
|
||||||
|
~~~~
|
||||||
|
python3 build_similarities.py --quiet --type=yara --distance=10
|
||||||
|
~~~~
|
||||||
|
|
||||||
|
# Requirements
|
||||||
|
|
||||||
|
- Redis
|
||||||
|
- SimHash Python library
|
||||||
|
- PyMISP
|
80
build_similarities.py
Executable file
80
build_similarities.py
Executable file
|
@ -0,0 +1,80 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# A quick-and-dirty test to deduce the appropriate SimHash distance to use with a
|
||||||
|
# MISP dataset (per type). The idea is to analyse existing types and defines a
|
||||||
|
# specific SimHash distance depending of the attribute type (such as sigma,
|
||||||
|
# yara, text, comment or what ever type supported) in MISP when the correlation
|
||||||
|
# engine will support it.
|
||||||
|
#
|
||||||
|
# Software licensed under the AGPL version 3 or later.
|
||||||
|
#
|
||||||
|
# Copyright (C) 2018 Alexandre Dulaunoy - a@foo.be
|
||||||
|
|
||||||
|
from pymisp import PyMISP
|
||||||
|
from keys import misp_url, misp_key, misp_verifycert
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
from simhash import Simhash
|
||||||
|
import redis
|
||||||
|
|
||||||
|
r = redis.Redis(host='localhost', port=6380, encoding='utf-8', decode_responses=True)
|
||||||
|
|
||||||
|
def init(url, key):
|
||||||
|
return PyMISP(url, key, misp_verifycert, 'json')
|
||||||
|
|
||||||
|
def search(m, quiet, url, out=None, custom_type_attribute="text",
|
||||||
|
default_distance=19, skip=False):
|
||||||
|
controller = 'attributes'
|
||||||
|
result = m.search(controller, type_attribute=custom_type_attribute)
|
||||||
|
|
||||||
|
# Redis data structure
|
||||||
|
# v: (key/value) -> values of a specific uuid
|
||||||
|
# r: (zrank) -> a set ranked by the SimHash distance of an attribute value
|
||||||
|
# compared to other attribute value
|
||||||
|
# all (set) -> all uuid analysed
|
||||||
|
# all_hits (set) -> all uuid matching the SimHash distance set
|
||||||
|
# hits (key - counter) -> number of times a distance is matched between two
|
||||||
|
# attributes values
|
||||||
|
# missed (key - counter) -> number of times a distance is not matched between
|
||||||
|
# two attributes values
|
||||||
|
|
||||||
|
for e in result['response']['Attribute']:
|
||||||
|
r.sadd("all", e['uuid'])
|
||||||
|
r.set("v:{}".format(e['uuid']), e['value'])
|
||||||
|
|
||||||
|
for att in r.smembers("all"):
|
||||||
|
if not quiet:
|
||||||
|
print(att)
|
||||||
|
if att != e['uuid']:
|
||||||
|
att_value = r.get("v:{}".format(att))
|
||||||
|
distance = Simhash(e['value']).distance(Simhash(att_value))
|
||||||
|
if distance > default_distance:
|
||||||
|
r.incr('missed')
|
||||||
|
continue
|
||||||
|
r.incr('hits')
|
||||||
|
r.sadd('all_hits', e['uuid'])
|
||||||
|
r.zadd('r:{}'.format(e['uuid']), '{}:{}'.format(e['event_id'], att), distance)
|
||||||
|
else:
|
||||||
|
print("don't compare self values")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser(description='Get all the attributes in MISP and calculate the SimHash distance')
|
||||||
|
parser.add_argument("-q", "--quiet", action='store_true', help="Only display URLs to MISP")
|
||||||
|
parser.add_argument("-s", "--skip", action='store_true', help="Skip duplicate match from same MISP event", default=False)
|
||||||
|
parser.add_argument("-t", "--type", default='text')
|
||||||
|
parser.add_argument("-d", "--distance", default=19, type=int)
|
||||||
|
parser.add_argument("-o", "--output", help="Output file")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.output is not None and os.path.exists(args.output):
|
||||||
|
print('Output file already exists, abort.')
|
||||||
|
exit(0)
|
||||||
|
|
||||||
|
misp = init(misp_url, misp_key)
|
||||||
|
|
||||||
|
search(misp, args.quiet, misp_url, args.output, custom_type_attribute=args.type, default_distance=args.distance, skip=args.skip)
|
6
keys.py.sample
Normal file
6
keys.py.sample
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
misp_url = 'https://<your MISP URL>/'
|
||||||
|
misp_key = 'Your MISP auth key' # The MISP auth key can be found on the MISP web interface under the automation section
|
||||||
|
misp_verifycert = True
|
Loading…
Reference in a new issue