From 26244739dd8fa465376466ad84537cdd84b9d0f8 Mon Sep 17 00:00:00 2001
From: Alexandre Dulaunoy <a@foo.be>
Date: Thu, 8 Oct 2020 23:22:00 +0200
Subject: [PATCH] new: [option] Don't flush the redisdb, useful when you want
 to process multiple files and aggregate the results.

---
 README.md     | 24 +++++++++++++++---------
 bin/napkin.py |  6 +++++-
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 1932b2a..9a16a9b 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,8 @@ Analysis features are :
 
 Verbs and nouns are in their lemmatized form by default but the option `--verbatim` allows to keep the original inflection.
 
+Intermediate results are stored in a Redis database to allow the analysis of multiple text files.
+
 # requirements
 
 - Python >= 3.6
@@ -25,19 +27,23 @@ Verbs and nouns are in their lemmatized form by default but the option `--verbat
 
 ~~~~
 usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim]
+                 [--no-flushdb]
 
 Extract statistical analysis of text
 
 optional arguments:
-  -h, --help  show this help message and exit
-  -v V        verbose output
-  -f F        file to analyse
-  -t T        maximum value for the top list (default is 100) -1 is no limit
-  -s          display the overall statistics (default is False)
-  -o O        output format (default is csv), json
-  -l L        language used for the analysis (default is en)
-  --verbatim  Don't use the lemmatized form, use verbatim. (default is the
-              lematized form)
+  -h, --help    show this help message and exit
+  -v V          verbose output
+  -f F          file to analyse
+  -t T          maximum value for the top list (default is 100) -1 is no limit
+  -s            display the overall statistics (default is False)
+  -o O          output format (default is csv), json
+  -l L          language used for the analysis (default is en)
+  --verbatim    Don't use the lemmatized form, use verbatim. (default is the
+                lematized form)
+  --no-flushdb  Don't flush the redisdb, useful when you want to process
+                multiple files and aggregate the results. (by default the
+                redis database is flushed at each run)
 ~~~~
 
 # example usage of napkin
diff --git a/bin/napkin.py b/bin/napkin.py
index 5be5c05..e66198d 100644
--- a/bin/napkin.py
+++ b/bin/napkin.py
@@ -16,6 +16,7 @@ parser.add_argument('-s', help="display the overall statistics (default is False
 parser.add_argument('-o', help="output format (default is csv), json", default="csv")
 parser.add_argument('-l', help="language used for the analysis (default is en)", default="en")
 parser.add_argument('--verbatim', help="Don't use the lemmatized form, use verbatim. (default is the lematized form)", default=False, action='store_true')
+parser.add_argument('--no-flushdb', help="Don't flush the redisdb, useful when you want to process multiple files and aggregate the results. (by default the redis database is flushed at each run)", default=False, action='store_true')
 
 args = parser.parse_args()
 if args.f is None:
@@ -25,11 +26,14 @@ if args.f is None:
 redisdb = redis.Redis(host="localhost", port=6380, db=5)
 
 try:
-    redisdb.flushdb()
+    redisdb.ping()
 except:
     print("Redis database on port 6380 is not running...", file=sys.stderr)
     sys.exit()
 
+if not args.no_flushdb:
+    redisdb.flushdb()
+
 if args.l == "fr":
     nlp = spacy.load("fr_core_news_md")
 else: