2022-01-15 13:55:09 +00:00
#!/usr/bin/env python
2021-12-04 12:59:19 +00:00
version = " 1.2 "
2021-07-15 15:49:52 +00:00
from flask import Flask , url_for , send_from_directory , render_template , make_response , request
from flask_restx import Resource , Api , reqparse
import redis
2021-08-13 20:13:25 +00:00
import configparser
2021-08-13 20:42:41 +00:00
import json
2021-07-15 15:49:52 +00:00
2021-08-13 20:13:25 +00:00
config = configparser . ConfigParser ( )
config . read ( ' ../etc/server.conf ' )
stats = config [ ' global ' ] . getboolean ( ' stats ' )
2021-08-29 09:52:07 +00:00
stats_pubsub = config [ ' global ' ] . getboolean ( ' stats_pubsub ' )
2021-08-29 12:06:35 +00:00
stats_public = config [ ' global ' ] . getboolean ( ' stats_public ' )
2021-08-14 08:32:00 +00:00
score = 1
session = config [ ' session ' ] . getboolean ( ' enable ' )
session_ttl = config [ ' session ' ] . get ( ' ttl ' )
2021-07-15 15:49:52 +00:00
app = Flask ( __name__ )
app . url_map . strict_slashes = False
2022-01-15 13:55:09 +00:00
api = Api ( app , version = version , title = ' hashlookup CIRCL API ' , description = ' ![](https://www.circl.lu/assets/images/circl-logo.png) \n [CIRCL hash lookup](https://hashlookup.circl.lu/) is a public API to lookup hash values against known database of files. For more details about all the datasets included [visit the website of the project](https://www.circl.lu/services/hashlookup/). The API is accessible via HTTP ReST API and the API is also [described as an OpenAPI](https://hashlookup.circl.lu/swagger.json). A [documentation is available with](https://www.circl.lu/services/hashlookup/) with sample queries and software using hashlookup. An offline version as Bloom filter is also [available](https://circl.lu/services/hashlookup/#how-to-quickly-check-a-set-of-files-in-a-local-directory). The API can be tested live in the interface below. ' , doc = ' / ' , license = ' CC-BY ' , contact = ' info@circl.lu ' , ordered = True )
2021-07-15 15:49:52 +00:00
rdb = redis . Redis ( host = ' 127.0.0.1 ' , port = ' 6666 ' , decode_responses = True )
def is_hex ( s ) :
try :
int ( s , 16 )
return True
except ValueError :
return False
2021-08-29 10:25:43 +00:00
def check_md5 ( value = None ) :
if value is None or len ( value ) != 32 :
return False
if not is_hex ( value ) :
return False
k = value . upper ( )
return k
def check_sha1 ( value = None ) :
if value is None or len ( value ) != 40 :
return False
if not is_hex ( value ) :
return False
k = value . upper ( )
return k
2021-11-19 06:26:50 +00:00
def check_sha256 ( value = None ) :
if value is None or len ( value ) != 64 :
return False
if not is_hex ( value ) :
return False
k = value . upper ( )
return k
2021-08-13 20:42:41 +00:00
def client_info ( ) :
if request . environ . get ( ' HTTP_X_FORWARDED_FOR ' ) is None :
ip = request . environ [ ' REMOTE_ADDR ' ]
else :
ip = request . environ [ ' HTTP_X_FORWARDED_FOR ' ]
user_agent = request . headers . get ( ' User-Agent ' )
2021-10-31 08:04:25 +00:00
if request . environ . get ( ' HTTP_AUTHORIZATION ' ) is not None :
2021-10-25 19:39:38 +00:00
auth = request . environ . get ( ' HTTP_AUTHORIZATION ' )
else :
auth = None
return ( { ' ip_addr ' : ip , ' user_agent ' : user_agent , ' auth ' : auth } )
2021-08-14 08:32:00 +00:00
2021-08-13 20:42:41 +00:00
def pub_lookup ( channel = None , k = None ) :
if channel is None :
return False
if k is None :
return False
client = client_info ( )
client [ ' value ' ] = k
rdb . publish ( channel , json . dumps ( client ) )
return True
2021-08-14 08:32:00 +00:00
def get_session ( ) :
if session is False :
return False
if request . headers . get ( ' hashlookup_session ' ) is None :
return False
session_name = request . headers . get ( ' hashlookup_session ' )
if not rdb . exists ( " session: {} " . format ( session_name ) ) :
return False
print ( " Using session_name: {} " . format ( session_name ) )
ttl = rdb . ttl ( " session: {} " . format ( session_name ) )
return ttl
2021-12-02 06:33:20 +00:00
def calculate_trust ( hobject = None ) :
""" Trust level is between 0 and 100. 50 means we don ' t know the trust. Above 50, the trust level is more important as the file has been seen on various sources. """
if hobject is None :
return False
hashlookup_trust = 50
if ' hashlookup:parent-total ' in hobject :
hashlookup_trust = hashlookup_trust + ( 5 * hobject [ ' hashlookup:parent-total ' ] )
if ' KnownMalicious ' in hobject :
hashlookup_trust = hashlookup_trust - 20
if hashlookup_trust > 100 :
hashlookup_trust = 100
hobject [ ' hashlookup:trust ' ] = hashlookup_trust
return hobject
2021-07-15 15:49:52 +00:00
@api.route ( ' /lookup/md5/<string:md5> ' )
@api.doc ( description = " Lookup MD5. " )
class lookup ( Resource ) :
def get ( self , md5 ) :
2021-08-29 10:25:43 +00:00
if check_md5 ( value = md5 ) is False :
return { ' message ' : ' MD5 value incorrect, expecting a MD5 value in hex format ' } , 400
k = check_md5 ( value = md5 )
2021-08-14 08:32:00 +00:00
ttl = False
if session :
ttl = get_session ( )
2021-09-01 17:37:21 +00:00
if not ( rdb . exists ( " l: {} " . format ( k ) ) or rdb . exists ( " h: {} " . format ( k ) ) ) :
2021-08-13 20:42:41 +00:00
if stats :
rdb . zincrby ( " s:nx:md5 " , score , k )
if stats_pubsub :
pub_lookup ( channel = ' nx ' , k = k )
2021-08-14 08:32:00 +00:00
if session and ttl is not False :
session_key = " session: {} :nx " . format ( request . headers . get ( ' hashlookup_session ' ) )
rdb . sadd ( session_key , k )
rdb . expire ( session_key , ttl )
2021-07-15 15:49:52 +00:00
return { ' message ' : ' Non existing MD5 ' , ' query ' : md5 } , 404
2021-08-13 20:13:25 +00:00
if stats :
rdb . zincrby ( " s:exist:md5 " , score , k )
2021-08-13 20:42:41 +00:00
if stats_pubsub :
2021-08-14 08:32:00 +00:00
pub_lookup ( channel = ' exist ' , k = k )
if session and ttl is not False :
session_key = " session: {} :exist " . format ( request . headers . get ( ' hashlookup_session ' ) )
rdb . sadd ( session_key , k )
rdb . expire ( session_key , ttl )
2021-09-01 17:56:50 +00:00
if rdb . exists ( " h: {} " . format ( k ) ) and not rdb . exists ( " l: {} " . format ( k ) ) :
2021-09-01 17:37:21 +00:00
h = rdb . hgetall ( " h: {} " . format ( k ) )
sha1 = k
2021-09-01 17:26:58 +00:00
else :
sha1 = rdb . get ( " l: {} " . format ( k ) )
h = rdb . hgetall ( " h: {} " . format ( sha1 ) )
2021-07-15 15:49:52 +00:00
if " OpSystemCode " in h :
if rdb . exists ( " h-OpSystemCode: {} " . format ( h [ ' OpSystemCode ' ] ) ) :
h [ ' OpSystemCode ' ] = rdb . hgetall ( " h-OpSystemCode: {} " . format ( h [ ' OpSystemCode ' ] ) )
if " ProductCode " in h :
if rdb . exists ( " h-ProductCode: {} " . format ( h [ ' ProductCode ' ] ) ) :
h [ ' ProductCode ' ] = rdb . hgetall ( " h-ProductCode: {} " . format ( h [ ' ProductCode ' ] ) )
2021-08-22 21:23:52 +00:00
if rdb . exists ( " p: {} " . format ( sha1 ) ) :
parents = [ ]
2021-09-01 19:36:39 +00:00
card = rdb . scard ( " p: {} " . format ( sha1 ) )
if card < = 15 :
p = rdb . smembers ( " p: {} " . format ( sha1 ) )
else :
p = rdb . srandmember ( " p: {} " . format ( sha1 ) , number = 10 )
2021-10-31 08:04:25 +00:00
h [ ' hashlookup:parent-total ' ] = card
2021-09-01 19:36:39 +00:00
for parent in p :
2021-08-22 21:23:52 +00:00
parent_details = rdb . hgetall ( " h: {} " . format ( parent ) )
parents . append ( parent_details )
h [ ' parents ' ] = parents
2021-08-24 12:53:43 +00:00
if rdb . exists ( " c: {} " . format ( sha1 ) ) :
children = [ ]
2021-12-04 12:59:19 +00:00
card = rdb . scard ( " c: {} " . format ( sha1 ) )
if card < = 15 :
c = rdb . smembers ( " c: {} " . format ( sha1 ) )
else :
c = rdb . srandmember ( " c: {} " . format ( sha1 ) , number = 10 )
h [ ' hashlookup:children-total ' ] = card
for child in c :
child_details = rdb . hgetall ( " h: {} " . format ( child ) )
children . append ( child_details )
2021-08-24 12:53:43 +00:00
h [ ' children ' ] = children
2021-12-02 06:33:20 +00:00
h = calculate_trust ( hobject = h )
2021-12-04 12:59:19 +00:00
return h
2021-07-15 15:49:52 +00:00
@api.route ( ' /lookup/sha1/<string:sha1> ' )
@api.doc ( description = " Lookup SHA-1. " )
class lookup ( Resource ) :
def get ( self , sha1 ) :
2021-08-29 10:25:43 +00:00
if check_sha1 ( value = sha1 ) is False :
return { ' message ' : ' SHA1 value incorrect, expecting a SHA1 value in hex format ' } , 400
k = check_sha1 ( value = sha1 )
2021-08-22 14:48:06 +00:00
ttl = False
if session :
ttl = get_session ( )
2021-08-13 20:13:25 +00:00
if not rdb . exists ( " h: {} " . format ( k ) ) :
2021-08-13 20:42:41 +00:00
if stats :
rdb . zincrby ( " s:nx:sha1 " , score , k )
if stats_pubsub :
pub_lookup ( channel = ' nx ' , k = k )
2021-08-14 08:32:00 +00:00
if session and ttl is not False :
session_key = " session: {} :nx " . format ( request . headers . get ( ' hashlookup_session ' ) )
rdb . sadd ( session_key , k )
rdb . expire ( session_key , ttl )
2021-07-15 15:49:52 +00:00
return { ' message ' : ' Non existing SHA-1 ' , ' query ' : sha1 } , 404
2021-08-13 20:13:25 +00:00
if stats :
rdb . zincrby ( " s:exist:sha1 " , score , k )
2021-08-13 20:42:41 +00:00
if stats_pubsub :
pub_lookup ( channel = ' exist ' , k = k )
2021-08-14 08:32:00 +00:00
if session and ttl is not False :
session_key = " session: {} :exist " . format ( request . headers . get ( ' hashlookup_session ' ) )
rdb . sadd ( session_key , k )
rdb . expire ( session_key , ttl )
2021-08-13 20:13:25 +00:00
h = rdb . hgetall ( " h: {} " . format ( k ) )
2021-07-15 15:49:52 +00:00
if " OpSystemCode " in h :
if rdb . exists ( " h-OpSystemCode: {} " . format ( h [ ' OpSystemCode ' ] ) ) :
h [ ' OpSystemCode ' ] = rdb . hgetall ( " h-OpSystemCode: {} " . format ( h [ ' OpSystemCode ' ] ) )
if " ProductCode " in h :
if rdb . exists ( " h-ProductCode: {} " . format ( h [ ' ProductCode ' ] ) ) :
h [ ' ProductCode ' ] = rdb . hgetall ( " h-ProductCode: {} " . format ( h [ ' ProductCode ' ] ) )
2021-08-22 21:23:52 +00:00
if rdb . exists ( " p: {} " . format ( k ) ) :
parents = [ ]
2021-09-01 19:36:39 +00:00
card = rdb . scard ( " p: {} " . format ( k ) )
if card < = 15 :
p = rdb . smembers ( " p: {} " . format ( k ) )
else :
p = [ ]
2021-09-05 05:36:44 +00:00
p = rdb . srandmember ( " p: {} " . format ( k ) , number = 10 )
2021-10-31 08:04:25 +00:00
h [ ' hashlookup:parent-total ' ] = card
2021-09-01 19:36:39 +00:00
for parent in p :
2021-08-22 21:23:52 +00:00
parent_details = rdb . hgetall ( " h: {} " . format ( parent ) )
parents . append ( parent_details )
2021-12-04 12:59:19 +00:00
h [ ' parents ' ] = parents
2021-08-24 12:53:43 +00:00
if rdb . exists ( " c: {} " . format ( k ) ) :
children = [ ]
2021-12-04 12:59:19 +00:00
card = rdb . scard ( " c: {} " . format ( k ) )
if card < = 15 :
c = rdb . smembers ( " c: {} " . format ( k ) )
else :
c = rdb . srandmember ( " c: {} " . format ( k ) , number = 10 )
h [ ' hashlookup:children-total ' ] = card
for child in c :
child_details = rdb . hgetall ( " h: {} " . format ( child ) )
children . append ( child_details )
2021-08-24 12:53:43 +00:00
h [ ' children ' ] = children
2021-12-04 12:59:19 +00:00
2021-12-02 06:33:20 +00:00
h = calculate_trust ( hobject = h )
2021-07-15 15:49:52 +00:00
return h
2021-11-19 06:26:50 +00:00
@api.route ( ' /lookup/sha256/<string:sha256> ' )
@api.doc ( description = " Lookup SHA-256. " )
class lookup ( Resource ) :
def get ( self , sha256 ) :
if check_sha256 ( value = sha256 ) is False :
return { ' message ' : ' SHA-256 value incorrect, expecting a SHA-256 value in hex format ' } , 400
k = check_sha256 ( value = sha256 )
ttl = False
if session :
ttl = get_session ( )
if not ( rdb . exists ( " l: {} " . format ( k ) ) or rdb . exists ( " h: {} " . format ( k ) ) ) :
if stats :
rdb . zincrby ( " s:nx:sha256 " , score , k )
if stats_pubsub :
pub_lookup ( channel = ' nx ' , k = k )
if session and ttl is not False :
session_key = " session: {} :nx " . format ( request . headers . get ( ' hashlookup_session ' ) )
rdb . sadd ( session_key , k )
rdb . expire ( session_key , ttl )
return { ' message ' : ' Non existing SHA-256 ' , ' query ' : sha256 } , 404
if stats :
rdb . zincrby ( " s:exist:sha256 " , score , k )
if stats_pubsub :
pub_lookup ( channel = ' exist ' , k = k )
if session and ttl is not False :
session_key = " session: {} :exist " . format ( request . headers . get ( ' hashlookup_session ' ) )
rdb . sadd ( session_key , k )
rdb . expire ( session_key , ttl )
if rdb . exists ( " h: {} " . format ( k ) ) and not rdb . exists ( " l: {} " . format ( k ) ) :
h = rdb . hgetall ( " h: {} " . format ( k ) )
sha1 = k
else :
sha1 = rdb . get ( " l: {} " . format ( k ) )
h = rdb . hgetall ( " h: {} " . format ( sha1 ) )
if " OpSystemCode " in h :
if rdb . exists ( " h-OpSystemCode: {} " . format ( h [ ' OpSystemCode ' ] ) ) :
h [ ' OpSystemCode ' ] = rdb . hgetall ( " h-OpSystemCode: {} " . format ( h [ ' OpSystemCode ' ] ) )
if " ProductCode " in h :
if rdb . exists ( " h-ProductCode: {} " . format ( h [ ' ProductCode ' ] ) ) :
h [ ' ProductCode ' ] = rdb . hgetall ( " h-ProductCode: {} " . format ( h [ ' ProductCode ' ] ) )
if rdb . exists ( " p: {} " . format ( sha1 ) ) :
parents = [ ]
card = rdb . scard ( " p: {} " . format ( sha1 ) )
if card < = 15 :
p = rdb . smembers ( " p: {} " . format ( sha1 ) )
else :
p = rdb . srandmember ( " p: {} " . format ( sha1 ) , number = 10 )
h [ ' hashlookup:parent-total ' ] = card
for parent in p :
parent_details = rdb . hgetall ( " h: {} " . format ( parent ) )
parents . append ( parent_details )
h [ ' parents ' ] = parents
if rdb . exists ( " c: {} " . format ( sha1 ) ) :
children = [ ]
2021-12-04 12:59:19 +00:00
card = rdb . scard ( " c: {} " . format ( sha1 ) )
if card < = 15 :
c = rdb . smembers ( " c: {} " . format ( sha1 ) )
else :
c = rdb . srandmember ( " c: {} " . format ( sha1 ) , number = 10 )
h [ ' hashlookup:children-total ' ] = card
for child in c :
child_details = rdb . hgetall ( " h: {} " . format ( child ) )
children . append ( child_details )
2021-11-19 06:26:50 +00:00
h [ ' children ' ] = children
2021-12-04 12:59:19 +00:00
2021-12-02 06:33:20 +00:00
h = calculate_trust ( hobject = h )
2021-11-19 06:26:50 +00:00
return h
2021-07-15 15:49:52 +00:00
@api.route ( ' /info ' )
@api.doc ( description = " Info about the hashlookup database " )
class info ( Resource ) :
def get ( self ) :
info = { }
2021-11-19 09:26:00 +00:00
lookup = rdb . info ( )
2021-07-15 15:49:52 +00:00
info [ ' nsrl-version ' ] = rdb . get ( ' nsrl-version ' )
2021-11-19 09:26:00 +00:00
info [ ' stat:hashlookup_total_keys ' ] = lookup [ ' estimate_keys[default] ' ]
2021-09-09 05:34:37 +00:00
info [ ' stat:nsrl_modern_rds ' ] = rdb . get ( ' stat:nsrl_modern_rds ' )
info [ ' stat:nsrl_legacy ' ] = rdb . get ( ' stat:nsrl_legacy ' )
info [ ' stat:nsrl_ios ' ] = rdb . get ( ' stat:nsrl_ios ' )
info [ ' stat:nsrl_android ' ] = rdb . get ( ' stat:nsrl_android ' )
2021-07-15 15:49:52 +00:00
info [ ' hashlookup-version ' ] = version
return info
@api.route ( ' /bulk/md5 ' )
@api.doc ( description = " Bulk search of MD5 hashes in a JSON array with the key \' hashes \' . " )
class bulkmd5 ( Resource ) :
def post ( self ) :
json_data = request . get_json ( force = True )
if not ' hashes ' in json_data :
return { ' message ' : ' JSON format incorrect. An array of hashes in the key \' hashes \' is expected. ' } , 404
ret = [ ]
for val in json_data [ ' hashes ' ] :
2021-08-29 09:52:07 +00:00
k = val . upper ( )
2021-08-29 10:25:43 +00:00
if check_md5 ( value = k ) is False :
continue
2021-08-29 09:52:07 +00:00
if not rdb . exists ( " l: {} " . format ( k ) ) :
if stats_pubsub :
pub_lookup ( channel = ' nx ' , k = k )
2021-07-15 15:49:52 +00:00
continue
2021-08-29 09:52:07 +00:00
sha1 = rdb . get ( " l: {} " . format ( k ) )
2021-07-15 15:49:52 +00:00
ret . append ( rdb . hgetall ( " h: {} " . format ( sha1 ) ) )
2021-08-29 09:52:07 +00:00
if stats :
rdb . zincrby ( " s:exist:sha1 " , score , k )
if stats_pubsub :
pub_lookup ( channel = ' exist ' , k = k )
2021-07-15 15:49:52 +00:00
return ret
@api.route ( ' /bulk/sha1 ' )
@api.doc ( description = " Bulk search of SHA1 hashes in a JSON array with the \' hashes \' . " )
class bulksha1 ( Resource ) :
def post ( self ) :
json_data = request . get_json ( force = True )
if not ' hashes ' in json_data :
return { ' message ' : ' JSON format incorrect. An array of hashes in the key \' hashes \' is expected. ' } , 404
ret = [ ]
for val in json_data [ ' hashes ' ] :
2021-08-29 09:52:07 +00:00
k = val . upper ( )
2021-08-29 10:25:43 +00:00
if check_sha1 ( value = k ) is False :
continue
2021-08-29 09:52:07 +00:00
if not rdb . exists ( " h: {} " . format ( k ) ) :
if stats_pubsub :
pub_lookup ( channel = ' nx ' , k = k )
continue
k = val . upper ( )
ret . append ( rdb . hgetall ( " h: {} " . format ( k ) ) )
if stats :
rdb . zincrby ( " s:exist:sha1 " , score , k )
if stats_pubsub :
pub_lookup ( channel = ' exist ' , k = k )
2021-07-15 15:49:52 +00:00
return ret
2021-08-14 08:32:00 +00:00
@api.route ( ' /session/create/<string:name> ' )
@api.doc ( description = " Create a session key to keep search context. The session is attached to a name. After the session is created, the header `hashlookup_session` can be set to the session name. " )
class sessioncreate ( Resource ) :
def get ( self , name ) :
if name is None or len ( name ) > 120 :
return { ' message ' : ' Expecting a name for the session ' } , 400
if session is False :
return { ' message ' : ' Session feature is not enabled ' } , 500
rdb . set ( ' session: {} ' . format ( name ) , str ( client_info ( ) ) )
rdb . expire ( ' session: {} ' . format ( name ) , session_ttl )
return { ' message ' : ' Session {} created and session will expire in {} seconds ' . format ( name , session_ttl ) }
@api.route ( ' /session/get/<string:name> ' )
@api.doc ( description = " Return set of matching and non-matching hashes from a session. " )
class sessioncreate ( Resource ) :
def get ( self , name ) :
if name is None or len ( name ) > 120 :
return { ' message ' : ' Expecting a name for the session ' } , 400
if session is False :
return { ' message ' : ' Session feature is not enabled ' } , 500
if not rdb . exists ( ' session: {} ' . format ( name ) ) :
return { ' message ' : ' Non-existing session ' } , 404
nx = rdb . smembers ( ' session: {} :nx ' . format ( name ) )
exist = rdb . smembers ( ' session: {} :exist ' . format ( name ) )
ret = { }
ret [ ' nx ' ] = list ( nx )
ret [ ' exist ' ] = list ( exist )
ret [ ' info ' ] = rdb . get ( ' session: {} ' . format ( name ) )
return ret
2021-08-29 12:06:35 +00:00
@api.route ( ' /stats/top ' )
@api.doc ( description = " Return the top 100 of most queried values. " )
class stattop ( Resource ) :
def get ( self ) :
if stats_public is False :
return { ' message ' : ' Public statistics not enabled ' } , 400
ret = { }
ret [ ' nx ' ] = rdb . zrevrange ( " s:nx:sha1 " , 0 , 100 , withscores = True )
2021-09-05 19:59:20 +00:00
for val in ret [ ' nx ' ] :
2021-09-10 22:02:34 +00:00
if rdb . exists ( " h: {} " . format ( val [ 0 ] ) ) :
2021-09-05 19:59:20 +00:00
ret [ ' nx ' ] . remove ( val )
2021-08-29 12:06:35 +00:00
exist = rdb . zrevrange ( " s:exist:sha1 " , 0 , 100 , withscores = True )
ret [ ' exist ' ] = [ ]
for value in exist :
name = rdb . hget ( " h: {} " . format ( value [ 0 ] ) , " FileName " )
entry = { }
entry [ ' FileName ' ] = name
entry [ ' SHA-1 ' ] = value
ret [ ' exist ' ] . append ( entry )
return ret
2021-07-15 15:49:52 +00:00
if __name__ == ' __main__ ' :
app . run ( host = ' 0.0.0.0 ' )