2022-01-15 13:55:09 +00:00
#!/usr/bin/env python
2021-12-04 12:59:19 +00:00
version = " 1.2 "
2022-01-15 13:56:47 +00:00
from flask import (
Flask ,
url_for ,
send_from_directory ,
render_template ,
make_response ,
request ,
)
2021-07-15 15:49:52 +00:00
from flask_restx import Resource , Api , reqparse
import redis
2021-08-13 20:13:25 +00:00
import configparser
2021-08-13 20:42:41 +00:00
import json
2021-07-15 15:49:52 +00:00
2021-08-13 20:13:25 +00:00
config = configparser . ConfigParser ( )
config . read ( ' ../etc/server.conf ' )
stats = config [ ' global ' ] . getboolean ( ' stats ' )
2021-08-29 09:52:07 +00:00
stats_pubsub = config [ ' global ' ] . getboolean ( ' stats_pubsub ' )
2021-08-29 12:06:35 +00:00
stats_public = config [ ' global ' ] . getboolean ( ' stats_public ' )
2021-08-14 08:32:00 +00:00
score = 1
session = config [ ' session ' ] . getboolean ( ' enable ' )
session_ttl = config [ ' session ' ] . get ( ' ttl ' )
2021-07-15 15:49:52 +00:00
app = Flask ( __name__ )
app . url_map . strict_slashes = False
2022-01-15 13:56:47 +00:00
api = Api (
app ,
version = version ,
title = ' hashlookup CIRCL API ' ,
description = ' ![](https://www.circl.lu/assets/images/circl-logo.png) \n [CIRCL hash lookup](https://hashlookup.circl.lu/) is a public API to lookup hash values against known database of files. For more details about all the datasets included [visit the website of the project](https://www.circl.lu/services/hashlookup/). The API is accessible via HTTP ReST API and the API is also [described as an OpenAPI](https://hashlookup.circl.lu/swagger.json). A [documentation is available with](https://www.circl.lu/services/hashlookup/) with sample queries and software using hashlookup. An offline version as Bloom filter is also [available](https://circl.lu/services/hashlookup/#how-to-quickly-check-a-set-of-files-in-a-local-directory). The API can be tested live in the interface below. ' ,
doc = ' / ' ,
license = ' CC-BY ' ,
contact = ' info@circl.lu ' ,
ordered = True ,
)
2021-07-15 15:49:52 +00:00
rdb = redis . Redis ( host = ' 127.0.0.1 ' , port = ' 6666 ' , decode_responses = True )
2022-01-15 13:56:47 +00:00
2021-07-15 15:49:52 +00:00
def is_hex ( s ) :
try :
int ( s , 16 )
return True
except ValueError :
return False
2022-01-15 13:56:47 +00:00
2021-08-29 10:25:43 +00:00
def check_md5 ( value = None ) :
if value is None or len ( value ) != 32 :
return False
if not is_hex ( value ) :
return False
k = value . upper ( )
return k
2022-01-15 13:56:47 +00:00
2021-08-29 10:25:43 +00:00
def check_sha1 ( value = None ) :
if value is None or len ( value ) != 40 :
return False
if not is_hex ( value ) :
return False
k = value . upper ( )
return k
2022-01-15 13:56:47 +00:00
2021-11-19 06:26:50 +00:00
def check_sha256 ( value = None ) :
if value is None or len ( value ) != 64 :
return False
if not is_hex ( value ) :
return False
k = value . upper ( )
return k
2022-01-15 13:56:47 +00:00
2021-08-13 20:42:41 +00:00
def client_info ( ) :
if request . environ . get ( ' HTTP_X_FORWARDED_FOR ' ) is None :
ip = request . environ [ ' REMOTE_ADDR ' ]
else :
ip = request . environ [ ' HTTP_X_FORWARDED_FOR ' ]
user_agent = request . headers . get ( ' User-Agent ' )
2021-10-31 08:04:25 +00:00
if request . environ . get ( ' HTTP_AUTHORIZATION ' ) is not None :
2021-10-25 19:39:38 +00:00
auth = request . environ . get ( ' HTTP_AUTHORIZATION ' )
else :
auth = None
2022-01-15 13:56:47 +00:00
return { ' ip_addr ' : ip , ' user_agent ' : user_agent , ' auth ' : auth }
2021-08-14 08:32:00 +00:00
2021-08-13 20:42:41 +00:00
def pub_lookup ( channel = None , k = None ) :
if channel is None :
return False
if k is None :
return False
client = client_info ( )
client [ ' value ' ] = k
rdb . publish ( channel , json . dumps ( client ) )
return True
2022-01-15 13:56:47 +00:00
2021-08-14 08:32:00 +00:00
def get_session ( ) :
if session is False :
return False
if request . headers . get ( ' hashlookup_session ' ) is None :
return False
session_name = request . headers . get ( ' hashlookup_session ' )
if not rdb . exists ( " session: {} " . format ( session_name ) ) :
return False
print ( " Using session_name: {} " . format ( session_name ) )
ttl = rdb . ttl ( " session: {} " . format ( session_name ) )
return ttl
2022-01-15 13:56:47 +00:00
2021-12-02 06:33:20 +00:00
def calculate_trust ( hobject = None ) :
""" Trust level is between 0 and 100. 50 means we don ' t know the trust. Above 50, the trust level is more important as the file has been seen on various sources. """
if hobject is None :
return False
hashlookup_trust = 50
if ' hashlookup:parent-total ' in hobject :
2022-01-15 13:56:47 +00:00
hashlookup_trust = hashlookup_trust + ( 5 * hobject [ ' hashlookup:parent-total ' ] )
2021-12-02 06:33:20 +00:00
if ' KnownMalicious ' in hobject :
hashlookup_trust = hashlookup_trust - 20
if hashlookup_trust > 100 :
hashlookup_trust = 100
hobject [ ' hashlookup:trust ' ] = hashlookup_trust
return hobject
2022-01-15 13:56:47 +00:00
2021-07-15 15:49:52 +00:00
@api.route ( ' /lookup/md5/<string:md5> ' )
@api.doc ( description = " Lookup MD5. " )
class lookup ( Resource ) :
2022-06-16 14:00:46 +00:00
@api.doc ( id = ' get_lookup_md5 ' )
@api.response ( 200 , ' Success ' )
@api.response ( 400 , ' MD5 value incorrect, expecting a MD5 value in hex format ' )
@api.response ( 404 , ' Non existing MD5 ' )
2021-07-15 15:49:52 +00:00
def get ( self , md5 ) :
2021-08-29 10:25:43 +00:00
if check_md5 ( value = md5 ) is False :
2022-01-15 13:56:47 +00:00
return {
' message ' : ' MD5 value incorrect, expecting a MD5 value in hex format '
} , 400
2021-08-29 10:25:43 +00:00
k = check_md5 ( value = md5 )
2021-08-14 08:32:00 +00:00
ttl = False
if session :
ttl = get_session ( )
2021-09-01 17:37:21 +00:00
if not ( rdb . exists ( " l: {} " . format ( k ) ) or rdb . exists ( " h: {} " . format ( k ) ) ) :
2021-08-13 20:42:41 +00:00
if stats :
rdb . zincrby ( " s:nx:md5 " , score , k )
if stats_pubsub :
pub_lookup ( channel = ' nx ' , k = k )
2021-08-14 08:32:00 +00:00
if session and ttl is not False :
2022-01-15 13:56:47 +00:00
session_key = " session: {} :nx " . format (
request . headers . get ( ' hashlookup_session ' )
)
2021-08-14 08:32:00 +00:00
rdb . sadd ( session_key , k )
rdb . expire ( session_key , ttl )
2021-07-15 15:49:52 +00:00
return { ' message ' : ' Non existing MD5 ' , ' query ' : md5 } , 404
2021-08-13 20:13:25 +00:00
if stats :
rdb . zincrby ( " s:exist:md5 " , score , k )
2021-08-13 20:42:41 +00:00
if stats_pubsub :
2021-08-14 08:32:00 +00:00
pub_lookup ( channel = ' exist ' , k = k )
if session and ttl is not False :
2022-01-15 13:56:47 +00:00
session_key = " session: {} :exist " . format (
request . headers . get ( ' hashlookup_session ' )
)
2021-08-14 08:32:00 +00:00
rdb . sadd ( session_key , k )
rdb . expire ( session_key , ttl )
2021-09-01 17:56:50 +00:00
if rdb . exists ( " h: {} " . format ( k ) ) and not rdb . exists ( " l: {} " . format ( k ) ) :
2021-09-01 17:37:21 +00:00
h = rdb . hgetall ( " h: {} " . format ( k ) )
sha1 = k
2021-09-01 17:26:58 +00:00
else :
sha1 = rdb . get ( " l: {} " . format ( k ) )
h = rdb . hgetall ( " h: {} " . format ( sha1 ) )
2021-07-15 15:49:52 +00:00
if " OpSystemCode " in h :
if rdb . exists ( " h-OpSystemCode: {} " . format ( h [ ' OpSystemCode ' ] ) ) :
2022-01-15 13:56:47 +00:00
h [ ' OpSystemCode ' ] = rdb . hgetall (
" h-OpSystemCode: {} " . format ( h [ ' OpSystemCode ' ] )
)
2021-07-15 15:49:52 +00:00
if " ProductCode " in h :
if rdb . exists ( " h-ProductCode: {} " . format ( h [ ' ProductCode ' ] ) ) :
2022-01-15 13:56:47 +00:00
h [ ' ProductCode ' ] = rdb . hgetall (
" h-ProductCode: {} " . format ( h [ ' ProductCode ' ] )
)
2021-08-22 21:23:52 +00:00
if rdb . exists ( " p: {} " . format ( sha1 ) ) :
parents = [ ]
2021-09-01 19:36:39 +00:00
card = rdb . scard ( " p: {} " . format ( sha1 ) )
if card < = 15 :
p = rdb . smembers ( " p: {} " . format ( sha1 ) )
else :
p = rdb . srandmember ( " p: {} " . format ( sha1 ) , number = 10 )
2021-10-31 08:04:25 +00:00
h [ ' hashlookup:parent-total ' ] = card
2021-09-01 19:36:39 +00:00
for parent in p :
2021-08-22 21:23:52 +00:00
parent_details = rdb . hgetall ( " h: {} " . format ( parent ) )
parents . append ( parent_details )
h [ ' parents ' ] = parents
2021-08-24 12:53:43 +00:00
if rdb . exists ( " c: {} " . format ( sha1 ) ) :
children = [ ]
2021-12-04 12:59:19 +00:00
card = rdb . scard ( " c: {} " . format ( sha1 ) )
if card < = 15 :
c = rdb . smembers ( " c: {} " . format ( sha1 ) )
else :
c = rdb . srandmember ( " c: {} " . format ( sha1 ) , number = 10 )
h [ ' hashlookup:children-total ' ] = card
for child in c :
child_details = rdb . hgetall ( " h: {} " . format ( child ) )
children . append ( child_details )
2021-08-24 12:53:43 +00:00
h [ ' children ' ] = children
2021-12-02 06:33:20 +00:00
h = calculate_trust ( hobject = h )
2021-12-04 12:59:19 +00:00
return h
2021-07-15 15:49:52 +00:00
2022-01-15 13:56:47 +00:00
2021-07-15 15:49:52 +00:00
@api.route ( ' /lookup/sha1/<string:sha1> ' )
@api.doc ( description = " Lookup SHA-1. " )
class lookup ( Resource ) :
2022-06-16 14:00:46 +00:00
@api.doc ( id = ' get_lookup_sha1 ' )
@api.response ( 200 , ' Success ' )
@api.response ( 400 , ' SHA1 value incorrect, expecting a SHA1 value in hex format ' )
@api.response ( 404 , ' Non existing SHA1 ' )
2021-07-15 15:49:52 +00:00
def get ( self , sha1 ) :
2021-08-29 10:25:43 +00:00
if check_sha1 ( value = sha1 ) is False :
2022-01-15 13:56:47 +00:00
return {
' message ' : ' SHA1 value incorrect, expecting a SHA1 value in hex format '
} , 400
2021-08-29 10:25:43 +00:00
k = check_sha1 ( value = sha1 )
2021-08-22 14:48:06 +00:00
ttl = False
if session :
ttl = get_session ( )
2021-08-13 20:13:25 +00:00
if not rdb . exists ( " h: {} " . format ( k ) ) :
2021-08-13 20:42:41 +00:00
if stats :
rdb . zincrby ( " s:nx:sha1 " , score , k )
if stats_pubsub :
pub_lookup ( channel = ' nx ' , k = k )
2021-08-14 08:32:00 +00:00
if session and ttl is not False :
2022-01-15 13:56:47 +00:00
session_key = " session: {} :nx " . format (
request . headers . get ( ' hashlookup_session ' )
)
2021-08-14 08:32:00 +00:00
rdb . sadd ( session_key , k )
rdb . expire ( session_key , ttl )
2021-07-15 15:49:52 +00:00
return { ' message ' : ' Non existing SHA-1 ' , ' query ' : sha1 } , 404
2021-08-13 20:13:25 +00:00
if stats :
rdb . zincrby ( " s:exist:sha1 " , score , k )
2021-08-13 20:42:41 +00:00
if stats_pubsub :
pub_lookup ( channel = ' exist ' , k = k )
2021-08-14 08:32:00 +00:00
if session and ttl is not False :
2022-01-15 13:56:47 +00:00
session_key = " session: {} :exist " . format (
request . headers . get ( ' hashlookup_session ' )
)
2021-08-14 08:32:00 +00:00
rdb . sadd ( session_key , k )
rdb . expire ( session_key , ttl )
2021-08-13 20:13:25 +00:00
h = rdb . hgetall ( " h: {} " . format ( k ) )
2021-07-15 15:49:52 +00:00
if " OpSystemCode " in h :
if rdb . exists ( " h-OpSystemCode: {} " . format ( h [ ' OpSystemCode ' ] ) ) :
2022-01-15 13:56:47 +00:00
h [ ' OpSystemCode ' ] = rdb . hgetall (
" h-OpSystemCode: {} " . format ( h [ ' OpSystemCode ' ] )
)
2021-07-15 15:49:52 +00:00
if " ProductCode " in h :
if rdb . exists ( " h-ProductCode: {} " . format ( h [ ' ProductCode ' ] ) ) :
2022-01-15 13:56:47 +00:00
h [ ' ProductCode ' ] = rdb . hgetall (
" h-ProductCode: {} " . format ( h [ ' ProductCode ' ] )
)
2021-08-22 21:23:52 +00:00
if rdb . exists ( " p: {} " . format ( k ) ) :
parents = [ ]
2021-09-01 19:36:39 +00:00
card = rdb . scard ( " p: {} " . format ( k ) )
if card < = 15 :
p = rdb . smembers ( " p: {} " . format ( k ) )
else :
p = [ ]
2021-09-05 05:36:44 +00:00
p = rdb . srandmember ( " p: {} " . format ( k ) , number = 10 )
2021-10-31 08:04:25 +00:00
h [ ' hashlookup:parent-total ' ] = card
2021-09-01 19:36:39 +00:00
for parent in p :
2021-08-22 21:23:52 +00:00
parent_details = rdb . hgetall ( " h: {} " . format ( parent ) )
parents . append ( parent_details )
2021-12-04 12:59:19 +00:00
h [ ' parents ' ] = parents
2021-08-24 12:53:43 +00:00
if rdb . exists ( " c: {} " . format ( k ) ) :
children = [ ]
2021-12-04 12:59:19 +00:00
card = rdb . scard ( " c: {} " . format ( k ) )
if card < = 15 :
c = rdb . smembers ( " c: {} " . format ( k ) )
else :
c = rdb . srandmember ( " c: {} " . format ( k ) , number = 10 )
h [ ' hashlookup:children-total ' ] = card
for child in c :
child_details = rdb . hgetall ( " h: {} " . format ( child ) )
children . append ( child_details )
2021-08-24 12:53:43 +00:00
h [ ' children ' ] = children
2021-12-04 12:59:19 +00:00
2021-12-02 06:33:20 +00:00
h = calculate_trust ( hobject = h )
2021-07-15 15:49:52 +00:00
return h
2022-01-15 13:56:47 +00:00
2021-11-19 06:26:50 +00:00
@api.route ( ' /lookup/sha256/<string:sha256> ' )
@api.doc ( description = " Lookup SHA-256. " )
class lookup ( Resource ) :
2022-06-16 14:00:46 +00:00
@api.doc ( id = ' get_lookup_sha256 ' )
@api.response ( 200 , ' Success ' )
@api.response ( 400 , ' SHA-256 value incorrect, expecting a SHA-256 value in hex format ' )
@api.response ( 404 , ' Non existing SHA-256 ' )
2021-11-19 06:26:50 +00:00
def get ( self , sha256 ) :
if check_sha256 ( value = sha256 ) is False :
2022-01-15 13:56:47 +00:00
return {
' message ' : ' SHA-256 value incorrect, expecting a SHA-256 value in hex format '
} , 400
2021-11-19 06:26:50 +00:00
k = check_sha256 ( value = sha256 )
ttl = False
if session :
ttl = get_session ( )
if not ( rdb . exists ( " l: {} " . format ( k ) ) or rdb . exists ( " h: {} " . format ( k ) ) ) :
if stats :
rdb . zincrby ( " s:nx:sha256 " , score , k )
if stats_pubsub :
pub_lookup ( channel = ' nx ' , k = k )
if session and ttl is not False :
2022-01-15 13:56:47 +00:00
session_key = " session: {} :nx " . format (
request . headers . get ( ' hashlookup_session ' )
)
2021-11-19 06:26:50 +00:00
rdb . sadd ( session_key , k )
rdb . expire ( session_key , ttl )
return { ' message ' : ' Non existing SHA-256 ' , ' query ' : sha256 } , 404
if stats :
rdb . zincrby ( " s:exist:sha256 " , score , k )
if stats_pubsub :
pub_lookup ( channel = ' exist ' , k = k )
if session and ttl is not False :
2022-01-15 13:56:47 +00:00
session_key = " session: {} :exist " . format (
request . headers . get ( ' hashlookup_session ' )
)
2021-11-19 06:26:50 +00:00
rdb . sadd ( session_key , k )
rdb . expire ( session_key , ttl )
if rdb . exists ( " h: {} " . format ( k ) ) and not rdb . exists ( " l: {} " . format ( k ) ) :
h = rdb . hgetall ( " h: {} " . format ( k ) )
sha1 = k
else :
sha1 = rdb . get ( " l: {} " . format ( k ) )
h = rdb . hgetall ( " h: {} " . format ( sha1 ) )
if " OpSystemCode " in h :
if rdb . exists ( " h-OpSystemCode: {} " . format ( h [ ' OpSystemCode ' ] ) ) :
2022-01-15 13:56:47 +00:00
h [ ' OpSystemCode ' ] = rdb . hgetall (
" h-OpSystemCode: {} " . format ( h [ ' OpSystemCode ' ] )
)
2021-11-19 06:26:50 +00:00
if " ProductCode " in h :
if rdb . exists ( " h-ProductCode: {} " . format ( h [ ' ProductCode ' ] ) ) :
2022-01-15 13:56:47 +00:00
h [ ' ProductCode ' ] = rdb . hgetall (
" h-ProductCode: {} " . format ( h [ ' ProductCode ' ] )
)
2021-11-19 06:26:50 +00:00
if rdb . exists ( " p: {} " . format ( sha1 ) ) :
parents = [ ]
card = rdb . scard ( " p: {} " . format ( sha1 ) )
if card < = 15 :
p = rdb . smembers ( " p: {} " . format ( sha1 ) )
else :
p = rdb . srandmember ( " p: {} " . format ( sha1 ) , number = 10 )
h [ ' hashlookup:parent-total ' ] = card
for parent in p :
parent_details = rdb . hgetall ( " h: {} " . format ( parent ) )
parents . append ( parent_details )
h [ ' parents ' ] = parents
if rdb . exists ( " c: {} " . format ( sha1 ) ) :
children = [ ]
2021-12-04 12:59:19 +00:00
card = rdb . scard ( " c: {} " . format ( sha1 ) )
if card < = 15 :
c = rdb . smembers ( " c: {} " . format ( sha1 ) )
else :
c = rdb . srandmember ( " c: {} " . format ( sha1 ) , number = 10 )
h [ ' hashlookup:children-total ' ] = card
for child in c :
child_details = rdb . hgetall ( " h: {} " . format ( child ) )
children . append ( child_details )
2021-11-19 06:26:50 +00:00
h [ ' children ' ] = children
2021-12-04 12:59:19 +00:00
2021-12-02 06:33:20 +00:00
h = calculate_trust ( hobject = h )
2021-11-19 06:26:50 +00:00
return h
new: [server] /children and /parents end-points added
The two new endpoints `children` and `parents` allow to paginate over the large-set of parents or
children.
- The first value is the SHA1 value having children or parents.
- The second value is the number of elements to get (by default is 100 if
the value is set to 0).
- The third value is the cursor to paginate over the element (for
starting the cursor must be set to 0).
A sample usage:
~~~~
adulau@kolmogorov ~ $ curl -s http://127.0.0.1:5000/children/31C43D24d696BC5F5309CCBFA5BDEF65A7170439/10/0 | jq .
{
"children": [
"003587440172055C75130EF1A063C3BB050C3251",
"007C1E16B3F0F2E48C114E458308397953C7D224",
"014D1060C674FBBCEAFFD94B85D60AD00618B56B",
"01A2FACD61D157FC80DD0C5F6B525CC9EDE4B6DE",
"01D1A98F559966A05923A74EE239C6BBEEB0FDAC",
"01D381F2FCDD1BDF642AF83C9E96083F2C8D1C03",
"02B37BA21D1831C120C1C9C1D41893B4DB424EE7",
"02DED521ADCF17AA8818EA1142F63E05F558E668",
"0364E0EFE65D9B6502084813189B4D888C117859",
"05C9A276A0E03F7A5F99DE5CC8911583FD8FD60E"
],
"cursor": "05C9A276A0E03F7A5F99DE5CC8911583FD8FD60E",
"total": 774
}
adulau@kolmogorov ~ $ curl -s http://127.0.0.1:5000/children/31C43D24d696BC5F5309CCBFA5BDEF65A7170439/10/05C9A276A0E03F7A5F99DE5CC8911583FD8FD60E | jq .
{
"children": [
"063EC5526DA21372D77AFC3C40F694478521829B",
"0647EA948ED37383F74CC68A94E2DC3CBC2A9E4E",
"0648AAAC06A76A58CB1E999882447BBDEEA42C57",
"06A62F10F269824FFD75A917A35ACD3F2461981C",
"0727FE9E2437B15B3F879C7617973AE11E55BA13",
"074A0CA7131AE8FD9665CFE68A0C124EB6AD0170",
"075B11AE383071BDA9BE66E336C916F6E6E1F49C",
"081A336DE7D636F95F0150B7708C614592CBBDAE",
"08DF546EE44D4B7546FCE5A7B7E284CA35F1B059",
"0947CE713B69C2318CA684BBB63912621CC17A6A"
],
"cursor": "0947CE713B69C2318CA684BBB63912621CC17A6A",
"total": 774
}
~~~~
2022-05-21 15:43:24 +00:00
@api.route ( ' /parents/<string:sha1>/<int:count>/<string:cursor> ' )
@api.doc (
description = " Return parents from a given SHA1. A number of element to return and an offset must be given. If not set it will be the 100 first elements. A cursor must be given to paginate over. The starting cursor is 0. "
)
class parents ( Resource ) :
2022-06-16 14:00:46 +00:00
@api.response ( 200 , ' Success ' )
@api.response ( 400 , ' SHA1 value incorrect, expecting a SHA1 value in hex format ' )
@api.response ( 404 , ' The SHA1 value has no known parent. ' )
new: [server] /children and /parents end-points added
The two new endpoints `children` and `parents` allow to paginate over the large-set of parents or
children.
- The first value is the SHA1 value having children or parents.
- The second value is the number of elements to get (by default is 100 if
the value is set to 0).
- The third value is the cursor to paginate over the element (for
starting the cursor must be set to 0).
A sample usage:
~~~~
adulau@kolmogorov ~ $ curl -s http://127.0.0.1:5000/children/31C43D24d696BC5F5309CCBFA5BDEF65A7170439/10/0 | jq .
{
"children": [
"003587440172055C75130EF1A063C3BB050C3251",
"007C1E16B3F0F2E48C114E458308397953C7D224",
"014D1060C674FBBCEAFFD94B85D60AD00618B56B",
"01A2FACD61D157FC80DD0C5F6B525CC9EDE4B6DE",
"01D1A98F559966A05923A74EE239C6BBEEB0FDAC",
"01D381F2FCDD1BDF642AF83C9E96083F2C8D1C03",
"02B37BA21D1831C120C1C9C1D41893B4DB424EE7",
"02DED521ADCF17AA8818EA1142F63E05F558E668",
"0364E0EFE65D9B6502084813189B4D888C117859",
"05C9A276A0E03F7A5F99DE5CC8911583FD8FD60E"
],
"cursor": "05C9A276A0E03F7A5F99DE5CC8911583FD8FD60E",
"total": 774
}
adulau@kolmogorov ~ $ curl -s http://127.0.0.1:5000/children/31C43D24d696BC5F5309CCBFA5BDEF65A7170439/10/05C9A276A0E03F7A5F99DE5CC8911583FD8FD60E | jq .
{
"children": [
"063EC5526DA21372D77AFC3C40F694478521829B",
"0647EA948ED37383F74CC68A94E2DC3CBC2A9E4E",
"0648AAAC06A76A58CB1E999882447BBDEEA42C57",
"06A62F10F269824FFD75A917A35ACD3F2461981C",
"0727FE9E2437B15B3F879C7617973AE11E55BA13",
"074A0CA7131AE8FD9665CFE68A0C124EB6AD0170",
"075B11AE383071BDA9BE66E336C916F6E6E1F49C",
"081A336DE7D636F95F0150B7708C614592CBBDAE",
"08DF546EE44D4B7546FCE5A7B7E284CA35F1B059",
"0947CE713B69C2318CA684BBB63912621CC17A6A"
],
"cursor": "0947CE713B69C2318CA684BBB63912621CC17A6A",
"total": 774
}
~~~~
2022-05-21 15:43:24 +00:00
def get ( self , sha1 , count , cursor ) :
if check_sha1 ( value = sha1 ) is False :
return {
' message ' : ' SHA1 value incorrect, expecting a SHA1 value in hex format. '
} , 400
sha1 = check_sha1 ( value = sha1 )
if not count :
count = 100
if not cursor :
cursor = 0
if not rdb . exists ( " p: {} " . format ( sha1 ) ) :
return { ' message ' : ' The SHA1 value has no known parent. ' } , 404
parents = [ ]
cursor , parents = rdb . sscan ( " p: {} " . format ( sha1 ) , count = count , cursor = cursor )
h = { }
h [ ' parents ' ] = parents
h [ ' cursor ' ] = cursor
h [ ' total ' ] = rdb . scard ( " p: {} " . format ( sha1 ) )
return h
@api.route ( ' /children/<string:sha1>/<int:count>/<string:cursor> ' )
@api.doc (
description = " Return children from a given SHA1. A number of element to return and an offset must be given. If not set it will be the 100 first elements. A cursor must be given to paginate over. The starting cursor is 0. "
)
class children ( Resource ) :
2022-06-16 14:00:46 +00:00
@api.response ( 200 , ' Success ' )
@api.response ( 400 , ' SHA1 value incorrect, expecting a SHA1 value in hex format ' )
@api.response ( 404 , ' The SHA1 value has no known child. ' )
new: [server] /children and /parents end-points added
The two new endpoints `children` and `parents` allow to paginate over the large-set of parents or
children.
- The first value is the SHA1 value having children or parents.
- The second value is the number of elements to get (by default is 100 if
the value is set to 0).
- The third value is the cursor to paginate over the element (for
starting the cursor must be set to 0).
A sample usage:
~~~~
adulau@kolmogorov ~ $ curl -s http://127.0.0.1:5000/children/31C43D24d696BC5F5309CCBFA5BDEF65A7170439/10/0 | jq .
{
"children": [
"003587440172055C75130EF1A063C3BB050C3251",
"007C1E16B3F0F2E48C114E458308397953C7D224",
"014D1060C674FBBCEAFFD94B85D60AD00618B56B",
"01A2FACD61D157FC80DD0C5F6B525CC9EDE4B6DE",
"01D1A98F559966A05923A74EE239C6BBEEB0FDAC",
"01D381F2FCDD1BDF642AF83C9E96083F2C8D1C03",
"02B37BA21D1831C120C1C9C1D41893B4DB424EE7",
"02DED521ADCF17AA8818EA1142F63E05F558E668",
"0364E0EFE65D9B6502084813189B4D888C117859",
"05C9A276A0E03F7A5F99DE5CC8911583FD8FD60E"
],
"cursor": "05C9A276A0E03F7A5F99DE5CC8911583FD8FD60E",
"total": 774
}
adulau@kolmogorov ~ $ curl -s http://127.0.0.1:5000/children/31C43D24d696BC5F5309CCBFA5BDEF65A7170439/10/05C9A276A0E03F7A5F99DE5CC8911583FD8FD60E | jq .
{
"children": [
"063EC5526DA21372D77AFC3C40F694478521829B",
"0647EA948ED37383F74CC68A94E2DC3CBC2A9E4E",
"0648AAAC06A76A58CB1E999882447BBDEEA42C57",
"06A62F10F269824FFD75A917A35ACD3F2461981C",
"0727FE9E2437B15B3F879C7617973AE11E55BA13",
"074A0CA7131AE8FD9665CFE68A0C124EB6AD0170",
"075B11AE383071BDA9BE66E336C916F6E6E1F49C",
"081A336DE7D636F95F0150B7708C614592CBBDAE",
"08DF546EE44D4B7546FCE5A7B7E284CA35F1B059",
"0947CE713B69C2318CA684BBB63912621CC17A6A"
],
"cursor": "0947CE713B69C2318CA684BBB63912621CC17A6A",
"total": 774
}
~~~~
2022-05-21 15:43:24 +00:00
def get ( self , sha1 , count , cursor ) :
if check_sha1 ( value = sha1 ) is False :
return {
' message ' : ' SHA1 value incorrect, expecting a SHA1 value in hex format. '
} , 400
sha1 = check_sha1 ( value = sha1 )
if not count :
count = 100
if not cursor :
cursor = 0
if not rdb . exists ( " c: {} " . format ( sha1 ) ) :
return { ' message ' : ' The SHA1 value has no known child. ' } , 404
children = [ ]
cursor , children = rdb . sscan ( " c: {} " . format ( sha1 ) , count = count , cursor = cursor )
h = { }
h [ ' children ' ] = children
h [ ' cursor ' ] = cursor
h [ ' total ' ] = rdb . scard ( " c: {} " . format ( sha1 ) )
return h
2021-07-15 15:49:52 +00:00
@api.route ( ' /info ' )
@api.doc ( description = " Info about the hashlookup database " )
class info ( Resource ) :
def get ( self ) :
info = { }
2021-11-19 09:26:00 +00:00
lookup = rdb . info ( )
2021-07-15 15:49:52 +00:00
info [ ' nsrl-version ' ] = rdb . get ( ' nsrl-version ' )
2021-11-19 09:26:00 +00:00
info [ ' stat:hashlookup_total_keys ' ] = lookup [ ' estimate_keys[default] ' ]
2021-09-09 05:34:37 +00:00
info [ ' stat:nsrl_modern_rds ' ] = rdb . get ( ' stat:nsrl_modern_rds ' )
info [ ' stat:nsrl_legacy ' ] = rdb . get ( ' stat:nsrl_legacy ' )
info [ ' stat:nsrl_ios ' ] = rdb . get ( ' stat:nsrl_ios ' )
info [ ' stat:nsrl_android ' ] = rdb . get ( ' stat:nsrl_android ' )
2021-07-15 15:49:52 +00:00
info [ ' hashlookup-version ' ] = version
return info
2022-01-15 13:56:47 +00:00
2021-07-15 15:49:52 +00:00
@api.route ( ' /bulk/md5 ' )
2022-01-15 13:56:47 +00:00
@api.doc (
description = " Bulk search of MD5 hashes in a JSON array with the key \' hashes \' . "
)
2021-07-15 15:49:52 +00:00
class bulkmd5 ( Resource ) :
2022-06-16 14:00:46 +00:00
@api.response ( 200 , ' Success ' )
@api.response ( 404 , ' JSON format incorrect. An array of hashes in the key \' hashes \' is expected. ' )
2021-07-15 15:49:52 +00:00
def post ( self ) :
json_data = request . get_json ( force = True )
if not ' hashes ' in json_data :
2022-01-15 13:56:47 +00:00
return {
' message ' : ' JSON format incorrect. An array of hashes in the key \' hashes \' is expected. '
} , 404
2021-07-15 15:49:52 +00:00
ret = [ ]
for val in json_data [ ' hashes ' ] :
2021-08-29 09:52:07 +00:00
k = val . upper ( )
2021-08-29 10:25:43 +00:00
if check_md5 ( value = k ) is False :
continue
2021-08-29 09:52:07 +00:00
if not rdb . exists ( " l: {} " . format ( k ) ) :
if stats_pubsub :
pub_lookup ( channel = ' nx ' , k = k )
2021-07-15 15:49:52 +00:00
continue
2021-08-29 09:52:07 +00:00
sha1 = rdb . get ( " l: {} " . format ( k ) )
2021-07-15 15:49:52 +00:00
ret . append ( rdb . hgetall ( " h: {} " . format ( sha1 ) ) )
2021-08-29 09:52:07 +00:00
if stats :
rdb . zincrby ( " s:exist:sha1 " , score , k )
if stats_pubsub :
pub_lookup ( channel = ' exist ' , k = k )
2021-07-15 15:49:52 +00:00
return ret
2022-01-15 13:56:47 +00:00
2021-07-15 15:49:52 +00:00
@api.route ( ' /bulk/sha1 ' )
@api.doc ( description = " Bulk search of SHA1 hashes in a JSON array with the \' hashes \' . " )
class bulksha1 ( Resource ) :
2022-06-16 14:00:46 +00:00
@api.response ( 200 , ' Success ' )
@api.response ( 404 , ' JSON format incorrect. An array of hashes in the key \' hashes \' is expected. ' )
2021-07-15 15:49:52 +00:00
def post ( self ) :
json_data = request . get_json ( force = True )
if not ' hashes ' in json_data :
2022-01-15 13:56:47 +00:00
return {
' message ' : ' JSON format incorrect. An array of hashes in the key \' hashes \' is expected. '
} , 404
2021-07-15 15:49:52 +00:00
ret = [ ]
for val in json_data [ ' hashes ' ] :
2021-08-29 09:52:07 +00:00
k = val . upper ( )
2021-08-29 10:25:43 +00:00
if check_sha1 ( value = k ) is False :
continue
2021-08-29 09:52:07 +00:00
if not rdb . exists ( " h: {} " . format ( k ) ) :
if stats_pubsub :
pub_lookup ( channel = ' nx ' , k = k )
continue
k = val . upper ( )
ret . append ( rdb . hgetall ( " h: {} " . format ( k ) ) )
if stats :
rdb . zincrby ( " s:exist:sha1 " , score , k )
if stats_pubsub :
pub_lookup ( channel = ' exist ' , k = k )
2021-07-15 15:49:52 +00:00
return ret
2022-01-15 13:56:47 +00:00
2021-08-14 08:32:00 +00:00
@api.route ( ' /session/create/<string:name> ' )
2022-01-15 13:56:47 +00:00
@api.doc (
description = " Create a session key to keep search context. The session is attached to a name. After the session is created, the header `hashlookup_session` can be set to the session name. "
)
2021-08-14 08:32:00 +00:00
class sessioncreate ( Resource ) :
2022-06-16 14:00:46 +00:00
@api.doc ( id = ' get_session_create ' )
@api.response ( 200 , ' Success ' )
@api.response ( 400 , ' Expecting a name for the session ' )
@api.response ( 500 , ' Session feature is not enabled ' )
2021-08-14 08:32:00 +00:00
def get ( self , name ) :
if name is None or len ( name ) > 120 :
return { ' message ' : ' Expecting a name for the session ' } , 400
if session is False :
return { ' message ' : ' Session feature is not enabled ' } , 500
rdb . set ( ' session: {} ' . format ( name ) , str ( client_info ( ) ) )
rdb . expire ( ' session: {} ' . format ( name ) , session_ttl )
2022-01-15 13:56:47 +00:00
return {
' message ' : ' Session {} created and session will expire in {} seconds ' . format (
name , session_ttl
)
}
2021-08-14 08:32:00 +00:00
@api.route ( ' /session/get/<string:name> ' )
@api.doc ( description = " Return set of matching and non-matching hashes from a session. " )
class sessioncreate ( Resource ) :
2022-06-16 14:00:46 +00:00
@api.doc ( id = ' get_session_matches ' )
@api.response ( 200 , ' Success ' )
@api.response ( 400 , ' Expecting a name for the session ' )
@api.response ( 500 , ' Session feature is not enabled ' )
2021-08-14 08:32:00 +00:00
def get ( self , name ) :
if name is None or len ( name ) > 120 :
return { ' message ' : ' Expecting a name for the session ' } , 400
if session is False :
return { ' message ' : ' Session feature is not enabled ' } , 500
if not rdb . exists ( ' session: {} ' . format ( name ) ) :
return { ' message ' : ' Non-existing session ' } , 404
nx = rdb . smembers ( ' session: {} :nx ' . format ( name ) )
exist = rdb . smembers ( ' session: {} :exist ' . format ( name ) )
ret = { }
ret [ ' nx ' ] = list ( nx )
ret [ ' exist ' ] = list ( exist )
ret [ ' info ' ] = rdb . get ( ' session: {} ' . format ( name ) )
return ret
2022-01-15 13:56:47 +00:00
2021-08-29 12:06:35 +00:00
@api.route ( ' /stats/top ' )
@api.doc ( description = " Return the top 100 of most queried values. " )
class stattop ( Resource ) :
2022-06-16 14:00:46 +00:00
@api.response ( 200 , ' Success ' )
@api.response ( 400 , ' Public statistics not enabled ' )
2021-08-29 12:06:35 +00:00
def get ( self ) :
if stats_public is False :
return { ' message ' : ' Public statistics not enabled ' } , 400
ret = { }
ret [ ' nx ' ] = rdb . zrevrange ( " s:nx:sha1 " , 0 , 100 , withscores = True )
2021-09-05 19:59:20 +00:00
for val in ret [ ' nx ' ] :
2021-09-10 22:02:34 +00:00
if rdb . exists ( " h: {} " . format ( val [ 0 ] ) ) :
2021-09-05 19:59:20 +00:00
ret [ ' nx ' ] . remove ( val )
2021-08-29 12:06:35 +00:00
exist = rdb . zrevrange ( " s:exist:sha1 " , 0 , 100 , withscores = True )
ret [ ' exist ' ] = [ ]
for value in exist :
name = rdb . hget ( " h: {} " . format ( value [ 0 ] ) , " FileName " )
entry = { }
entry [ ' FileName ' ] = name
entry [ ' SHA-1 ' ] = value
ret [ ' exist ' ] . append ( entry )
return ret
2021-07-15 15:49:52 +00:00
2022-01-15 13:56:47 +00:00
2021-07-15 15:49:52 +00:00
if __name__ == ' __main__ ' :
2022-01-15 13:56:47 +00:00
app . run ( host = ' 0.0.0.0 ' )