new: [statistics] add an optional statistic option in the server to have

a sorted set of hashes matching and non-matching.
This commit is contained in:
Alexandre Dulaunoy 2021-08-13 22:13:25 +02:00
parent fc11f3dd88
commit 72b462b5ea
Signed by: adulau
GPG key ID: 09E2CD4944E6CBCD
5 changed files with 37 additions and 10 deletions

View file

@ -25,7 +25,7 @@
"url": "https://s3.amazonaws.com/rds.nsrl.nist.gov/RDS/current/RDS_legacy.iso" "url": "https://s3.amazonaws.com/rds.nsrl.nist.gov/RDS/current/RDS_legacy.iso"
} }
}, },
"local_path": "/home/koenv/nsrl/", "local_path": "/home/adulau/",
"import": { "import": {
"max_value": 500000000, "max_value": 500000000,
"mod_lines": 2500 "mod_lines": 2500

View file

@ -2,7 +2,11 @@ version = "1.0"
from flask import Flask, url_for, send_from_directory, render_template, make_response, request from flask import Flask, url_for, send_from_directory, render_template, make_response, request
from flask_restx import Resource, Api, reqparse from flask_restx import Resource, Api, reqparse
import redis import redis
import configparser
config = configparser.ConfigParser()
config.read('../etc/server.conf')
stats = config['global'].getboolean('stats')
app = Flask(__name__) app = Flask(__name__)
app.url_map.strict_slashes = False app.url_map.strict_slashes = False
api = Api(app, version=version, title='hashlookup CIRCL API', description='![](https://www.circl.lu/assets/images/circl-logo.png)\n[CIRCL hash lookup](https://hashlookup.circl.lu/) is a public API to lookup hash values against known database of files. NSRL RDS database is included. More database will be included in the future. The API is accessible via HTTP ReST API and the API is also [described as an OpenAPI](https://hashlookup.circl.lu/swagger.json). A [documentation is available with](https://www.circl.lu/services/hashlookup/) some sample queries. The API can be tested live in the interface below.', doc='/', license='CC-BY', contact='info@circl.lu', ordered=True) api = Api(app, version=version, title='hashlookup CIRCL API', description='![](https://www.circl.lu/assets/images/circl-logo.png)\n[CIRCL hash lookup](https://hashlookup.circl.lu/) is a public API to lookup hash values against known database of files. NSRL RDS database is included. More database will be included in the future. The API is accessible via HTTP ReST API and the API is also [described as an OpenAPI](https://hashlookup.circl.lu/swagger.json). A [documentation is available with](https://www.circl.lu/services/hashlookup/) some sample queries. The API can be tested live in the interface below.', doc='/', license='CC-BY', contact='info@circl.lu', ordered=True)
@ -24,9 +28,14 @@ class lookup(Resource):
return {'message': 'Expecting a MD5 hex value'}, 400 return {'message': 'Expecting a MD5 hex value'}, 400
if not is_hex(md5): if not is_hex(md5):
return {'message': 'MD5 is not in hex format'}, 400 return {'message': 'MD5 is not in hex format'}, 400
if not rdb.exists("l:{}".format(md5.upper())): k = md5.upper()
score = 1
if not rdb.exists("l:{}".format(k)):
rdb.zincrby("s:nx:md5", score, k)
return {'message': 'Non existing MD5', 'query': md5}, 404 return {'message': 'Non existing MD5', 'query': md5}, 404
sha1 = rdb.get("l:{}".format(md5.upper())) if stats:
rdb.zincrby("s:exist:md5", score, k)
sha1 = rdb.get("l:{}".format(k))
h = rdb.hgetall("h:{}".format(sha1)) h = rdb.hgetall("h:{}".format(sha1))
if "OpSystemCode" in h: if "OpSystemCode" in h:
if rdb.exists("h-OpSystemCode:{}".format(h['OpSystemCode'])): if rdb.exists("h-OpSystemCode:{}".format(h['OpSystemCode'])):
@ -44,9 +53,14 @@ class lookup(Resource):
return {'message': 'Expecting a SHA-1 hex value'}, 400 return {'message': 'Expecting a SHA-1 hex value'}, 400
if not is_hex(sha1): if not is_hex(sha1):
return {'message': 'SHA-1 is not in hex format'}, 400 return {'message': 'SHA-1 is not in hex format'}, 400
if not rdb.exists("h:{}".format(sha1.upper())): k = sha1.upper()
score = 1
if not rdb.exists("h:{}".format(k)):
rdb.zincrby("s:nx:sha1", score, k)
return {'message': 'Non existing SHA-1', 'query': sha1}, 404 return {'message': 'Non existing SHA-1', 'query': sha1}, 404
h = rdb.hgetall("h:{}".format(sha1.upper())) if stats:
rdb.zincrby("s:exist:sha1", score, k)
h = rdb.hgetall("h:{}".format(k))
if "OpSystemCode" in h: if "OpSystemCode" in h:
if rdb.exists("h-OpSystemCode:{}".format(h['OpSystemCode'])): if rdb.exists("h-OpSystemCode:{}".format(h['OpSystemCode'])):
h['OpSystemCode'] = rdb.hgetall("h-OpSystemCode:{}".format(h['OpSystemCode'])) h['OpSystemCode'] = rdb.hgetall("h-OpSystemCode:{}".format(h['OpSystemCode']))

8
doc/DATABASE.md Normal file
View file

@ -0,0 +1,8 @@
# Database structure of hashlookup
# Statistics
- `s:nx:md5` sorted set of MD5 non-existing hashes looked up
- `s:nx:sha1` sorted set of SHA1 non-existing hashes looked up
- `s:exist:md5` sorted set of SHA1 existing hashes looked up
- `s:exixt:sha1` sorted set of SHA1 existing hashes looked up

View file

@ -67,7 +67,7 @@ db-name nsrl
# #
# The DB will be written inside this directory # The DB will be written inside this directory
# Note that you must specify a directory here, not a file name. # Note that you must specify a directory here, not a file name.
dir /home/adulau/nsrl/db dir /home/adulau/git/hashlookup-server/db
# The logs of server will be stored in this directory. If you don't specify # The logs of server will be stored in this directory. If you don't specify
# one directory, by default, we store logs in the working directory that set # one directory, by default, we store logs in the working directory that set
@ -77,7 +77,7 @@ dir /home/adulau/nsrl/db
# When running daemonized, kvrocks writes a pid file in ${CONFIG_DIR}/kvrocks.pid by # When running daemonized, kvrocks writes a pid file in ${CONFIG_DIR}/kvrocks.pid by
# default. You can specify a custom pid file location here. # default. You can specify a custom pid file location here.
# pidfile /var/run/kvrocks.pid # pidfile /var/run/kvrocks.pid
pidfile /home/adulau/nsrl/db/kvrocks.pid pidfile /home/adulau/git/hashlookup-server/db/kvrocks.pid
# You can configure a slave instance to accept writes or not. Writing against # You can configure a slave instance to accept writes or not. Writing against
# a slave instance may be useful to store some ephemeral data (because data # a slave instance may be useful to store some ephemeral data (because data
@ -379,7 +379,7 @@ rocksdb.wal_size_limit_mb 16384
# compression is enabled. # compression is enabled.
# #
# Default: 4KB # Default: 4KB
rocksdb.block_size 16384 rocksdb.block_size 2048
# Indicating if we'd put index/filter blocks to the block cache # Indicating if we'd put index/filter blocks to the block cache
# #
@ -439,5 +439,8 @@ rocksdb.disable_auto_compactions no
################################ NAMESPACE ##################################### ################################ NAMESPACE #####################################
# namespace.test change.me # namespace.test change.me
backup-dir /home/adulau/nsrl/db/backup backup-dir /home/adulau/git/hashlookup-server/db/backup
log-dir /home/adulau/nsrl/db log-dir /home/adulau/git/hashlookup-server/db
auto-resize-block-and-sst yes
cluster-enabled no

2
etc/server.conf.sample Normal file
View file

@ -0,0 +1,2 @@
[global]
stats = yes