new: [feature] session handling added

A user can now create a session, assign lookup results to a session
and retrieve the lookup session results in one shot.

This partially implement feature requested in issue #2 to support
DFIR sessions.

Thanks to Koen Van Impe for the idea.
This commit is contained in:
Alexandre Dulaunoy 2021-08-14 10:32:00 +02:00
parent e87e8a412d
commit 284e4719c7
Signed by: adulau
GPG key ID: 09E2CD4944E6CBCD
3 changed files with 119 additions and 4 deletions

View file

@ -223,6 +223,53 @@ curl -X 'POST' 'https://hashlookup.circl.lu/bulk/sha1' -H "Content-Type: appli
|404| 404 means the searched hash is not present in the any of the database|
|400| 400 means the input used for the hash is in an incorrect format|
# API and Session
A session feature can be enabled on the server side (not enabled on the public instance of CIRCL) to easily track submitted hashes.
The session created has a TTL, and after the expiration, the associated queries of the session will be removed. This feature can be used
to separate different forensic analysis and gather all the results in one go later.
## Create a session
A session can be created via the `/session/create/` endpoint with the name of the session. If the session is recreated, the TTL is reset to the default value.
~~~
curl -X 'GET' 'http://127.0.0.1:5000/session/create/test' -H 'accept: application/json'
~~~
~~~json
{
"message": "Session test created and session will expire in 86400 seconds"
}
~~~
## Use a session
To assign the results to a specific session, the `hashlookup_session` header requires to be set with the name of the created session. This can be used on all the `lookup` api endpoints.
~~~
curl -X 'GET' 'http://127.0.0.1:5000/lookup/md5/8ED4B4ED952526D89899E723F3488DE4' -H 'hashlookup_session: test' -H 'accept: application/json' | jq .
~~~
## Fetch a session
~~~
curl -s -X 'GET' 'http://127.0.0.1:5000/session/get/test' -H 'accept: application/json' | jq .
~~~
~~~json
{
"nx": [
"8ED4B4ED952526D89899E723F3488DE2",
"8ED4B4ED952526D89899E723F3488DE3"
],
"exist": [
"8ED4B4ED952526D89899E723F3488DE4"
],
"info": "{'ip_addr': '127.0.0.1', 'user_agent': 'curl/7.78.0'}"
}
~~~
# Querying the hashlookup database via DNS
@ -266,6 +313,8 @@ dig +short -t TXT 931606baaa7a2b4ef61198406f8fc3f4.dns.hashlookup.circl.lu | jq
}
~~~
# Sample digital forensic use-cases
## How to quickly check a set of files in a local directory?

View file

@ -9,6 +9,9 @@ config = configparser.ConfigParser()
config.read('../etc/server.conf')
stats = config['global'].getboolean('stats')
stats_pubsub = config['global'].getboolean('stats')
score = 1
session = config['session'].getboolean('enable')
session_ttl = config['session'].get('ttl')
app = Flask(__name__)
app.url_map.strict_slashes = False
api = Api(app, version=version, title='hashlookup CIRCL API', description='![](https://www.circl.lu/assets/images/circl-logo.png)\n[CIRCL hash lookup](https://hashlookup.circl.lu/) is a public API to lookup hash values against known database of files. NSRL RDS database is included. More database will be included in the future. The API is accessible via HTTP ReST API and the API is also [described as an OpenAPI](https://hashlookup.circl.lu/swagger.json). A [documentation is available with](https://www.circl.lu/services/hashlookup/) some sample queries. The API can be tested live in the interface below.', doc='/', license='CC-BY', contact='info@circl.lu', ordered=True)
@ -29,6 +32,7 @@ def client_info():
ip = request.environ['HTTP_X_FORWARDED_FOR']
user_agent = request.headers.get('User-Agent')
return ({'ip_addr': ip, 'user_agent': user_agent})
def pub_lookup(channel=None, k=None):
if channel is None:
return False
@ -39,6 +43,18 @@ def pub_lookup(channel=None, k=None):
rdb.publish(channel, json.dumps(client))
return True
def get_session():
if session is False:
return False
if request.headers.get('hashlookup_session') is None:
return False
session_name = request.headers.get('hashlookup_session')
if not rdb.exists("session:{}".format(session_name)):
return False
print("Using session_name: {}".format(session_name))
ttl = rdb.ttl("session:{}".format(session_name))
return ttl
@api.route('/lookup/md5/<string:md5>')
@api.doc(description="Lookup MD5.")
class lookup(Resource):
@ -48,17 +64,27 @@ class lookup(Resource):
if not is_hex(md5):
return {'message': 'MD5 is not in hex format'}, 400
k = md5.upper()
score = 1
ttl = False
if session:
ttl = get_session()
if not rdb.exists("l:{}".format(k)):
if stats:
rdb.zincrby("s:nx:md5", score, k)
if stats_pubsub:
pub_lookup(channel='nx', k=k)
if session and ttl is not False:
session_key = "session:{}:nx".format(request.headers.get('hashlookup_session'))
rdb.sadd(session_key, k)
rdb.expire(session_key, ttl)
return {'message': 'Non existing MD5', 'query': md5}, 404
if stats:
rdb.zincrby("s:exist:md5", score, k)
if stats_pubsub:
pub_lookup(channel='exist', k=k)
pub_lookup(channel='exist', k=k)
if session and ttl is not False:
session_key = "session:{}:exist".format(request.headers.get('hashlookup_session'))
rdb.sadd(session_key, k)
rdb.expire(session_key, ttl)
sha1 = rdb.get("l:{}".format(k))
h = rdb.hgetall("h:{}".format(sha1))
if "OpSystemCode" in h:
@ -78,17 +104,24 @@ class lookup(Resource):
if not is_hex(sha1):
return {'message': 'SHA-1 is not in hex format'}, 400
k = sha1.upper()
score = 1
if not rdb.exists("h:{}".format(k)):
if stats:
rdb.zincrby("s:nx:sha1", score, k)
if stats_pubsub:
pub_lookup(channel='nx', k=k)
if session and ttl is not False:
session_key = "session:{}:nx".format(request.headers.get('hashlookup_session'))
rdb.sadd(session_key, k)
rdb.expire(session_key, ttl)
return {'message': 'Non existing SHA-1', 'query': sha1}, 404
if stats:
rdb.zincrby("s:exist:sha1", score, k)
if stats_pubsub:
pub_lookup(channel='exist', k=k)
if session and ttl is not False:
session_key = "session:{}:exist".format(request.headers.get('hashlookup_session'))
rdb.sadd(session_key, k)
rdb.expire(session_key, ttl)
h = rdb.hgetall("h:{}".format(k))
if "OpSystemCode" in h:
if rdb.exists("h-OpSystemCode:{}".format(h['OpSystemCode'])):
@ -141,6 +174,37 @@ class bulksha1(Resource):
ret.append(rdb.hgetall("h:{}".format(val.upper())))
return ret
@api.route('/session/create/<string:name>')
@api.doc(description="Create a session key to keep search context. The session is attached to a name. After the session is created, the header `hashlookup_session` can be set to the session name.")
class sessioncreate(Resource):
def get(self, name):
if name is None or len(name) > 120:
return {'message': 'Expecting a name for the session'}, 400
if session is False:
return {'message': 'Session feature is not enabled'}, 500
rdb.set('session:{}'.format(name), str(client_info()))
rdb.expire('session:{}'.format(name), session_ttl)
return {'message': 'Session {} created and session will expire in {} seconds'.format(name, session_ttl)}
@api.route('/session/get/<string:name>')
@api.doc(description="Return set of matching and non-matching hashes from a session.")
class sessioncreate(Resource):
def get(self, name):
if name is None or len(name) > 120:
return {'message': 'Expecting a name for the session'}, 400
if session is False:
return {'message': 'Session feature is not enabled'}, 500
if not rdb.exists('session:{}'.format(name)):
return {'message': 'Non-existing session'}, 404
nx = rdb.smembers('session:{}:nx'.format(name))
exist = rdb.smembers('session:{}:exist'.format(name))
ret = {}
ret['nx'] = list(nx)
ret['exist'] = list(exist)
ret['info'] = rdb.get('session:{}'.format(name))
return ret
if __name__ == '__main__':
app.run(host='0.0.0.0')

View file

@ -1,4 +1,6 @@
[global]
stats = yes
stats_pubsub = yes
[session]
enable = yes
ttl = 86400