mirror of
https://github.com/adulau/threatintel-attributes-compare.git
synced 2024-12-22 00:36:00 +00:00
81 lines
3.1 KiB
Python
81 lines
3.1 KiB
Python
|
#!/usr/bin/env python
|
||
|
# -*- coding: utf-8 -*-
|
||
|
#
|
||
|
# A quick-and-dirty test to deduce the appropriate SimHash distance to use with a
|
||
|
# MISP dataset (per type). The idea is to analyse existing types and defines a
|
||
|
# specific SimHash distance depending of the attribute type (such as sigma,
|
||
|
# yara, text, comment or what ever type supported) in MISP when the correlation
|
||
|
# engine will support it.
|
||
|
#
|
||
|
# Software licensed under the AGPL version 3 or later.
|
||
|
#
|
||
|
# Copyright (C) 2018 Alexandre Dulaunoy - a@foo.be
|
||
|
|
||
|
from pymisp import PyMISP
|
||
|
from keys import misp_url, misp_key, misp_verifycert
|
||
|
import argparse
|
||
|
import os
|
||
|
from simhash import Simhash
|
||
|
import redis
|
||
|
|
||
|
r = redis.Redis(host='localhost', port=6380, encoding='utf-8', decode_responses=True)
|
||
|
|
||
|
def init(url, key):
|
||
|
return PyMISP(url, key, misp_verifycert, 'json')
|
||
|
|
||
|
def search(m, quiet, url, out=None, custom_type_attribute="text",
|
||
|
default_distance=19, skip=False):
|
||
|
controller = 'attributes'
|
||
|
result = m.search(controller, type_attribute=custom_type_attribute)
|
||
|
|
||
|
# Redis data structure
|
||
|
# v: (key/value) -> values of a specific uuid
|
||
|
# r: (zrank) -> a set ranked by the SimHash distance of an attribute value
|
||
|
# compared to other attribute value
|
||
|
# all (set) -> all uuid analysed
|
||
|
# all_hits (set) -> all uuid matching the SimHash distance set
|
||
|
# hits (key - counter) -> number of times a distance is matched between two
|
||
|
# attributes values
|
||
|
# missed (key - counter) -> number of times a distance is not matched between
|
||
|
# two attributes values
|
||
|
|
||
|
for e in result['response']['Attribute']:
|
||
|
r.sadd("all", e['uuid'])
|
||
|
r.set("v:{}".format(e['uuid']), e['value'])
|
||
|
|
||
|
for att in r.smembers("all"):
|
||
|
if not quiet:
|
||
|
print(att)
|
||
|
if att != e['uuid']:
|
||
|
att_value = r.get("v:{}".format(att))
|
||
|
distance = Simhash(e['value']).distance(Simhash(att_value))
|
||
|
if distance > default_distance:
|
||
|
r.incr('missed')
|
||
|
continue
|
||
|
r.incr('hits')
|
||
|
r.sadd('all_hits', e['uuid'])
|
||
|
r.zadd('r:{}'.format(e['uuid']), '{}:{}'.format(e['event_id'], att), distance)
|
||
|
else:
|
||
|
print("don't compare self values")
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
parser = argparse.ArgumentParser(description='Get all the attributes in MISP and calculate the SimHash distance')
|
||
|
parser.add_argument("-q", "--quiet", action='store_true', help="Only display URLs to MISP")
|
||
|
parser.add_argument("-s", "--skip", action='store_true', help="Skip duplicate match from same MISP event", default=False)
|
||
|
parser.add_argument("-t", "--type", default='text')
|
||
|
parser.add_argument("-d", "--distance", default=19, type=int)
|
||
|
parser.add_argument("-o", "--output", help="Output file")
|
||
|
|
||
|
args = parser.parse_args()
|
||
|
|
||
|
if args.output is not None and os.path.exists(args.output):
|
||
|
print('Output file already exists, abort.')
|
||
|
exit(0)
|
||
|
|
||
|
misp = init(misp_url, misp_key)
|
||
|
|
||
|
search(misp, args.quiet, misp_url, args.output, custom_type_attribute=args.type, default_distance=args.distance, skip=args.skip)
|