diff --git a/bin/import-poc/config.json b/bin/import-poc/config.json index b22c0ef..dfe36e2 100644 --- a/bin/import-poc/config.json +++ b/bin/import-poc/config.json @@ -25,7 +25,7 @@ "url": "https://s3.amazonaws.com/rds.nsrl.nist.gov/RDS/current/RDS_legacy.iso" } }, - "local_path": "/home/koenv/nsrl/", + "local_path": "/home/adulau/", "import": { "max_value": 500000000, "mod_lines": 2500 diff --git a/bin/server.py b/bin/server.py index 5105bce..f2dc9b2 100644 --- a/bin/server.py +++ b/bin/server.py @@ -2,7 +2,11 @@ version = "1.0" from flask import Flask, url_for, send_from_directory, render_template, make_response, request from flask_restx import Resource, Api, reqparse import redis +import configparser +config = configparser.ConfigParser() +config.read('../etc/server.conf') +stats = config['global'].getboolean('stats') app = Flask(__name__) app.url_map.strict_slashes = False api = Api(app, version=version, title='hashlookup CIRCL API', description='![](https://www.circl.lu/assets/images/circl-logo.png)\n[CIRCL hash lookup](https://hashlookup.circl.lu/) is a public API to lookup hash values against known database of files. NSRL RDS database is included. More database will be included in the future. The API is accessible via HTTP ReST API and the API is also [described as an OpenAPI](https://hashlookup.circl.lu/swagger.json). A [documentation is available with](https://www.circl.lu/services/hashlookup/) some sample queries. The API can be tested live in the interface below.', doc='/', license='CC-BY', contact='info@circl.lu', ordered=True) @@ -24,9 +28,14 @@ class lookup(Resource): return {'message': 'Expecting a MD5 hex value'}, 400 if not is_hex(md5): return {'message': 'MD5 is not in hex format'}, 400 - if not rdb.exists("l:{}".format(md5.upper())): + k = md5.upper() + score = 1 + if not rdb.exists("l:{}".format(k)): + rdb.zincrby("s:nx:md5", score, k) return {'message': 'Non existing MD5', 'query': md5}, 404 - sha1 = rdb.get("l:{}".format(md5.upper())) + if stats: + rdb.zincrby("s:exist:md5", score, k) + sha1 = rdb.get("l:{}".format(k)) h = rdb.hgetall("h:{}".format(sha1)) if "OpSystemCode" in h: if rdb.exists("h-OpSystemCode:{}".format(h['OpSystemCode'])): @@ -44,9 +53,14 @@ class lookup(Resource): return {'message': 'Expecting a SHA-1 hex value'}, 400 if not is_hex(sha1): return {'message': 'SHA-1 is not in hex format'}, 400 - if not rdb.exists("h:{}".format(sha1.upper())): + k = sha1.upper() + score = 1 + if not rdb.exists("h:{}".format(k)): + rdb.zincrby("s:nx:sha1", score, k) return {'message': 'Non existing SHA-1', 'query': sha1}, 404 - h = rdb.hgetall("h:{}".format(sha1.upper())) + if stats: + rdb.zincrby("s:exist:sha1", score, k) + h = rdb.hgetall("h:{}".format(k)) if "OpSystemCode" in h: if rdb.exists("h-OpSystemCode:{}".format(h['OpSystemCode'])): h['OpSystemCode'] = rdb.hgetall("h-OpSystemCode:{}".format(h['OpSystemCode'])) diff --git a/doc/DATABASE.md b/doc/DATABASE.md new file mode 100644 index 0000000..972d5b0 --- /dev/null +++ b/doc/DATABASE.md @@ -0,0 +1,8 @@ +# Database structure of hashlookup + +# Statistics + +- `s:nx:md5` sorted set of MD5 non-existing hashes looked up +- `s:nx:sha1` sorted set of SHA1 non-existing hashes looked up +- `s:exist:md5` sorted set of SHA1 existing hashes looked up +- `s:exixt:sha1` sorted set of SHA1 existing hashes looked up diff --git a/etc/kvrocks.conf b/etc/kvrocks.conf index 9700d1f..97cfae9 100644 --- a/etc/kvrocks.conf +++ b/etc/kvrocks.conf @@ -67,7 +67,7 @@ db-name nsrl # # The DB will be written inside this directory # Note that you must specify a directory here, not a file name. -dir /home/adulau/nsrl/db +dir /home/adulau/git/hashlookup-server/db # The logs of server will be stored in this directory. If you don't specify # one directory, by default, we store logs in the working directory that set @@ -77,7 +77,7 @@ dir /home/adulau/nsrl/db # When running daemonized, kvrocks writes a pid file in ${CONFIG_DIR}/kvrocks.pid by # default. You can specify a custom pid file location here. # pidfile /var/run/kvrocks.pid -pidfile /home/adulau/nsrl/db/kvrocks.pid +pidfile /home/adulau/git/hashlookup-server/db/kvrocks.pid # You can configure a slave instance to accept writes or not. Writing against # a slave instance may be useful to store some ephemeral data (because data @@ -379,7 +379,7 @@ rocksdb.wal_size_limit_mb 16384 # compression is enabled. # # Default: 4KB -rocksdb.block_size 16384 +rocksdb.block_size 2048 # Indicating if we'd put index/filter blocks to the block cache # @@ -439,5 +439,8 @@ rocksdb.disable_auto_compactions no ################################ NAMESPACE ##################################### # namespace.test change.me -backup-dir /home/adulau/nsrl/db/backup -log-dir /home/adulau/nsrl/db +backup-dir /home/adulau/git/hashlookup-server/db/backup +log-dir /home/adulau/git/hashlookup-server/db + +auto-resize-block-and-sst yes +cluster-enabled no diff --git a/etc/server.conf.sample b/etc/server.conf.sample new file mode 100644 index 0000000..d7bc1fd --- /dev/null +++ b/etc/server.conf.sample @@ -0,0 +1,2 @@ +[global] +stats = yes