2021-07-15 15:49:52 +00:00
version = " 1.0 "
from flask import Flask , url_for , send_from_directory , render_template , make_response , request
from flask_restx import Resource , Api , reqparse
import redis
2021-08-13 20:13:25 +00:00
import configparser
2021-08-13 20:42:41 +00:00
import json
2021-07-15 15:49:52 +00:00
2021-08-13 20:13:25 +00:00
config = configparser . ConfigParser ( )
config . read ( ' ../etc/server.conf ' )
stats = config [ ' global ' ] . getboolean ( ' stats ' )
2021-08-29 09:52:07 +00:00
stats_pubsub = config [ ' global ' ] . getboolean ( ' stats_pubsub ' )
2021-08-29 12:06:35 +00:00
stats_public = config [ ' global ' ] . getboolean ( ' stats_public ' )
2021-08-14 08:32:00 +00:00
score = 1
session = config [ ' session ' ] . getboolean ( ' enable ' )
session_ttl = config [ ' session ' ] . get ( ' ttl ' )
2021-07-15 15:49:52 +00:00
app = Flask ( __name__ )
app . url_map . strict_slashes = False
api = Api ( app , version = version , title = ' hashlookup CIRCL API ' , description = ' ![](https://www.circl.lu/assets/images/circl-logo.png) \n [CIRCL hash lookup](https://hashlookup.circl.lu/) is a public API to lookup hash values against known database of files. NSRL RDS database is included. More database will be included in the future. The API is accessible via HTTP ReST API and the API is also [described as an OpenAPI](https://hashlookup.circl.lu/swagger.json). A [documentation is available with](https://www.circl.lu/services/hashlookup/) some sample queries. The API can be tested live in the interface below. ' , doc = ' / ' , license = ' CC-BY ' , contact = ' info@circl.lu ' , ordered = True )
rdb = redis . Redis ( host = ' 127.0.0.1 ' , port = ' 6666 ' , decode_responses = True )
def is_hex ( s ) :
try :
int ( s , 16 )
return True
except ValueError :
return False
2021-08-29 10:25:43 +00:00
def check_md5 ( value = None ) :
if value is None or len ( value ) != 32 :
return False
if not is_hex ( value ) :
return False
k = value . upper ( )
return k
def check_sha1 ( value = None ) :
if value is None or len ( value ) != 40 :
return False
if not is_hex ( value ) :
return False
k = value . upper ( )
return k
2021-08-13 20:42:41 +00:00
def client_info ( ) :
if request . environ . get ( ' HTTP_X_FORWARDED_FOR ' ) is None :
ip = request . environ [ ' REMOTE_ADDR ' ]
else :
ip = request . environ [ ' HTTP_X_FORWARDED_FOR ' ]
user_agent = request . headers . get ( ' User-Agent ' )
2021-10-31 08:04:25 +00:00
if request . environ . get ( ' HTTP_AUTHORIZATION ' ) is not None :
2021-10-25 19:39:38 +00:00
auth = request . environ . get ( ' HTTP_AUTHORIZATION ' )
else :
auth = None
return ( { ' ip_addr ' : ip , ' user_agent ' : user_agent , ' auth ' : auth } )
2021-08-14 08:32:00 +00:00
2021-08-13 20:42:41 +00:00
def pub_lookup ( channel = None , k = None ) :
if channel is None :
return False
if k is None :
return False
client = client_info ( )
client [ ' value ' ] = k
rdb . publish ( channel , json . dumps ( client ) )
return True
2021-08-14 08:32:00 +00:00
def get_session ( ) :
if session is False :
return False
if request . headers . get ( ' hashlookup_session ' ) is None :
return False
session_name = request . headers . get ( ' hashlookup_session ' )
if not rdb . exists ( " session: {} " . format ( session_name ) ) :
return False
print ( " Using session_name: {} " . format ( session_name ) )
ttl = rdb . ttl ( " session: {} " . format ( session_name ) )
return ttl
2021-07-15 15:49:52 +00:00
@api.route ( ' /lookup/md5/<string:md5> ' )
@api.doc ( description = " Lookup MD5. " )
class lookup ( Resource ) :
def get ( self , md5 ) :
2021-08-29 10:25:43 +00:00
if check_md5 ( value = md5 ) is False :
return { ' message ' : ' MD5 value incorrect, expecting a MD5 value in hex format ' } , 400
k = check_md5 ( value = md5 )
2021-08-14 08:32:00 +00:00
ttl = False
if session :
ttl = get_session ( )
2021-09-01 17:37:21 +00:00
if not ( rdb . exists ( " l: {} " . format ( k ) ) or rdb . exists ( " h: {} " . format ( k ) ) ) :
2021-08-13 20:42:41 +00:00
if stats :
rdb . zincrby ( " s:nx:md5 " , score , k )
if stats_pubsub :
pub_lookup ( channel = ' nx ' , k = k )
2021-08-14 08:32:00 +00:00
if session and ttl is not False :
session_key = " session: {} :nx " . format ( request . headers . get ( ' hashlookup_session ' ) )
rdb . sadd ( session_key , k )
rdb . expire ( session_key , ttl )
2021-07-15 15:49:52 +00:00
return { ' message ' : ' Non existing MD5 ' , ' query ' : md5 } , 404
2021-08-13 20:13:25 +00:00
if stats :
rdb . zincrby ( " s:exist:md5 " , score , k )
2021-08-13 20:42:41 +00:00
if stats_pubsub :
2021-08-14 08:32:00 +00:00
pub_lookup ( channel = ' exist ' , k = k )
if session and ttl is not False :
session_key = " session: {} :exist " . format ( request . headers . get ( ' hashlookup_session ' ) )
rdb . sadd ( session_key , k )
rdb . expire ( session_key , ttl )
2021-09-01 17:56:50 +00:00
if rdb . exists ( " h: {} " . format ( k ) ) and not rdb . exists ( " l: {} " . format ( k ) ) :
2021-09-01 17:37:21 +00:00
h = rdb . hgetall ( " h: {} " . format ( k ) )
sha1 = k
2021-09-01 17:26:58 +00:00
else :
sha1 = rdb . get ( " l: {} " . format ( k ) )
h = rdb . hgetall ( " h: {} " . format ( sha1 ) )
2021-07-15 15:49:52 +00:00
if " OpSystemCode " in h :
if rdb . exists ( " h-OpSystemCode: {} " . format ( h [ ' OpSystemCode ' ] ) ) :
h [ ' OpSystemCode ' ] = rdb . hgetall ( " h-OpSystemCode: {} " . format ( h [ ' OpSystemCode ' ] ) )
if " ProductCode " in h :
if rdb . exists ( " h-ProductCode: {} " . format ( h [ ' ProductCode ' ] ) ) :
h [ ' ProductCode ' ] = rdb . hgetall ( " h-ProductCode: {} " . format ( h [ ' ProductCode ' ] ) )
2021-08-22 21:23:52 +00:00
if rdb . exists ( " p: {} " . format ( sha1 ) ) :
parents = [ ]
2021-09-01 19:36:39 +00:00
card = rdb . scard ( " p: {} " . format ( sha1 ) )
if card < = 15 :
p = rdb . smembers ( " p: {} " . format ( sha1 ) )
else :
p = rdb . srandmember ( " p: {} " . format ( sha1 ) , number = 10 )
2021-10-31 08:04:25 +00:00
h [ ' hashlookup:parent-total ' ] = card
2021-09-01 19:36:39 +00:00
for parent in p :
2021-08-22 21:23:52 +00:00
parent_details = rdb . hgetall ( " h: {} " . format ( parent ) )
parents . append ( parent_details )
h [ ' parents ' ] = parents
2021-08-24 12:53:43 +00:00
if rdb . exists ( " c: {} " . format ( sha1 ) ) :
children = [ ]
for child in rdb . smembers ( " c: {} " . format ( sha1 ) ) :
children . append ( child )
h [ ' children ' ] = children
2021-07-15 15:49:52 +00:00
return h
@api.route ( ' /lookup/sha1/<string:sha1> ' )
@api.doc ( description = " Lookup SHA-1. " )
class lookup ( Resource ) :
def get ( self , sha1 ) :
2021-08-29 10:25:43 +00:00
if check_sha1 ( value = sha1 ) is False :
return { ' message ' : ' SHA1 value incorrect, expecting a SHA1 value in hex format ' } , 400
k = check_sha1 ( value = sha1 )
2021-08-22 14:48:06 +00:00
ttl = False
if session :
ttl = get_session ( )
2021-08-13 20:13:25 +00:00
if not rdb . exists ( " h: {} " . format ( k ) ) :
2021-08-13 20:42:41 +00:00
if stats :
rdb . zincrby ( " s:nx:sha1 " , score , k )
if stats_pubsub :
pub_lookup ( channel = ' nx ' , k = k )
2021-08-14 08:32:00 +00:00
if session and ttl is not False :
session_key = " session: {} :nx " . format ( request . headers . get ( ' hashlookup_session ' ) )
rdb . sadd ( session_key , k )
rdb . expire ( session_key , ttl )
2021-07-15 15:49:52 +00:00
return { ' message ' : ' Non existing SHA-1 ' , ' query ' : sha1 } , 404
2021-08-13 20:13:25 +00:00
if stats :
rdb . zincrby ( " s:exist:sha1 " , score , k )
2021-08-13 20:42:41 +00:00
if stats_pubsub :
pub_lookup ( channel = ' exist ' , k = k )
2021-08-14 08:32:00 +00:00
if session and ttl is not False :
session_key = " session: {} :exist " . format ( request . headers . get ( ' hashlookup_session ' ) )
rdb . sadd ( session_key , k )
rdb . expire ( session_key , ttl )
2021-08-13 20:13:25 +00:00
h = rdb . hgetall ( " h: {} " . format ( k ) )
2021-07-15 15:49:52 +00:00
if " OpSystemCode " in h :
if rdb . exists ( " h-OpSystemCode: {} " . format ( h [ ' OpSystemCode ' ] ) ) :
h [ ' OpSystemCode ' ] = rdb . hgetall ( " h-OpSystemCode: {} " . format ( h [ ' OpSystemCode ' ] ) )
if " ProductCode " in h :
if rdb . exists ( " h-ProductCode: {} " . format ( h [ ' ProductCode ' ] ) ) :
h [ ' ProductCode ' ] = rdb . hgetall ( " h-ProductCode: {} " . format ( h [ ' ProductCode ' ] ) )
2021-08-22 21:23:52 +00:00
if rdb . exists ( " p: {} " . format ( k ) ) :
parents = [ ]
2021-09-01 19:36:39 +00:00
card = rdb . scard ( " p: {} " . format ( k ) )
if card < = 15 :
p = rdb . smembers ( " p: {} " . format ( k ) )
else :
p = [ ]
2021-09-05 05:36:44 +00:00
p = rdb . srandmember ( " p: {} " . format ( k ) , number = 10 )
2021-10-31 08:04:25 +00:00
h [ ' hashlookup:parent-total ' ] = card
2021-09-01 19:36:39 +00:00
for parent in p :
2021-08-22 21:23:52 +00:00
parent_details = rdb . hgetall ( " h: {} " . format ( parent ) )
parents . append ( parent_details )
h [ ' parents ' ] = parents
2021-08-24 12:53:43 +00:00
if rdb . exists ( " c: {} " . format ( k ) ) :
children = [ ]
for child in rdb . smembers ( " c: {} " . format ( k ) ) :
children . append ( child )
h [ ' children ' ] = children
2021-07-15 15:49:52 +00:00
return h
@api.route ( ' /info ' )
@api.doc ( description = " Info about the hashlookup database " )
class info ( Resource ) :
def get ( self ) :
info = { }
info [ ' nsrl-version ' ] = rdb . get ( ' nsrl-version ' )
2021-09-09 05:34:37 +00:00
info [ ' stat:nsrl_modern_rds ' ] = rdb . get ( ' stat:nsrl_modern_rds ' )
info [ ' stat:nsrl_legacy ' ] = rdb . get ( ' stat:nsrl_legacy ' )
info [ ' stat:nsrl_ios ' ] = rdb . get ( ' stat:nsrl_ios ' )
info [ ' stat:nsrl_android ' ] = rdb . get ( ' stat:nsrl_android ' )
2021-07-15 15:49:52 +00:00
info [ ' hashlookup-version ' ] = version
return info
@api.route ( ' /bulk/md5 ' )
@api.doc ( description = " Bulk search of MD5 hashes in a JSON array with the key \' hashes \' . " )
class bulkmd5 ( Resource ) :
def post ( self ) :
json_data = request . get_json ( force = True )
if not ' hashes ' in json_data :
return { ' message ' : ' JSON format incorrect. An array of hashes in the key \' hashes \' is expected. ' } , 404
ret = [ ]
for val in json_data [ ' hashes ' ] :
2021-08-29 09:52:07 +00:00
k = val . upper ( )
2021-08-29 10:25:43 +00:00
if check_md5 ( value = k ) is False :
continue
2021-08-29 09:52:07 +00:00
if not rdb . exists ( " l: {} " . format ( k ) ) :
if stats_pubsub :
pub_lookup ( channel = ' nx ' , k = k )
2021-07-15 15:49:52 +00:00
continue
2021-08-29 09:52:07 +00:00
sha1 = rdb . get ( " l: {} " . format ( k ) )
2021-07-15 15:49:52 +00:00
ret . append ( rdb . hgetall ( " h: {} " . format ( sha1 ) ) )
2021-08-29 09:52:07 +00:00
if stats :
rdb . zincrby ( " s:exist:sha1 " , score , k )
if stats_pubsub :
pub_lookup ( channel = ' exist ' , k = k )
2021-07-15 15:49:52 +00:00
return ret
@api.route ( ' /bulk/sha1 ' )
@api.doc ( description = " Bulk search of SHA1 hashes in a JSON array with the \' hashes \' . " )
class bulksha1 ( Resource ) :
def post ( self ) :
json_data = request . get_json ( force = True )
if not ' hashes ' in json_data :
return { ' message ' : ' JSON format incorrect. An array of hashes in the key \' hashes \' is expected. ' } , 404
ret = [ ]
for val in json_data [ ' hashes ' ] :
2021-08-29 09:52:07 +00:00
k = val . upper ( )
2021-08-29 10:25:43 +00:00
if check_sha1 ( value = k ) is False :
continue
2021-08-29 09:52:07 +00:00
if not rdb . exists ( " h: {} " . format ( k ) ) :
if stats_pubsub :
pub_lookup ( channel = ' nx ' , k = k )
continue
k = val . upper ( )
ret . append ( rdb . hgetall ( " h: {} " . format ( k ) ) )
if stats :
rdb . zincrby ( " s:exist:sha1 " , score , k )
if stats_pubsub :
pub_lookup ( channel = ' exist ' , k = k )
2021-07-15 15:49:52 +00:00
return ret
2021-08-14 08:32:00 +00:00
@api.route ( ' /session/create/<string:name> ' )
@api.doc ( description = " Create a session key to keep search context. The session is attached to a name. After the session is created, the header `hashlookup_session` can be set to the session name. " )
class sessioncreate ( Resource ) :
def get ( self , name ) :
if name is None or len ( name ) > 120 :
return { ' message ' : ' Expecting a name for the session ' } , 400
if session is False :
return { ' message ' : ' Session feature is not enabled ' } , 500
rdb . set ( ' session: {} ' . format ( name ) , str ( client_info ( ) ) )
rdb . expire ( ' session: {} ' . format ( name ) , session_ttl )
return { ' message ' : ' Session {} created and session will expire in {} seconds ' . format ( name , session_ttl ) }
@api.route ( ' /session/get/<string:name> ' )
@api.doc ( description = " Return set of matching and non-matching hashes from a session. " )
class sessioncreate ( Resource ) :
def get ( self , name ) :
if name is None or len ( name ) > 120 :
return { ' message ' : ' Expecting a name for the session ' } , 400
if session is False :
return { ' message ' : ' Session feature is not enabled ' } , 500
if not rdb . exists ( ' session: {} ' . format ( name ) ) :
return { ' message ' : ' Non-existing session ' } , 404
nx = rdb . smembers ( ' session: {} :nx ' . format ( name ) )
exist = rdb . smembers ( ' session: {} :exist ' . format ( name ) )
ret = { }
ret [ ' nx ' ] = list ( nx )
ret [ ' exist ' ] = list ( exist )
ret [ ' info ' ] = rdb . get ( ' session: {} ' . format ( name ) )
return ret
2021-08-29 12:06:35 +00:00
@api.route ( ' /stats/top ' )
@api.doc ( description = " Return the top 100 of most queried values. " )
class stattop ( Resource ) :
def get ( self ) :
if stats_public is False :
return { ' message ' : ' Public statistics not enabled ' } , 400
ret = { }
ret [ ' nx ' ] = rdb . zrevrange ( " s:nx:sha1 " , 0 , 100 , withscores = True )
2021-09-05 19:59:20 +00:00
for val in ret [ ' nx ' ] :
2021-09-10 22:02:34 +00:00
if rdb . exists ( " h: {} " . format ( val [ 0 ] ) ) :
2021-09-05 19:59:20 +00:00
ret [ ' nx ' ] . remove ( val )
2021-08-29 12:06:35 +00:00
exist = rdb . zrevrange ( " s:exist:sha1 " , 0 , 100 , withscores = True )
ret [ ' exist ' ] = [ ]
for value in exist :
name = rdb . hget ( " h: {} " . format ( value [ 0 ] ) , " FileName " )
entry = { }
entry [ ' FileName ' ] = name
entry [ ' SHA-1 ' ] = value
ret [ ' exist ' ] . append ( entry )
return ret
2021-07-15 15:49:52 +00:00
if __name__ == ' __main__ ' :
app . run ( host = ' 0.0.0.0 ' )