new: [option] Don't flush the redisdb, useful when you want to process multiple files and aggregate the results.

2024-12-22 00:26:00 +00:00 · 2020-10-08 23:22:00 +02:00 · 2020-10-08 23:22:00 +02:00 · 26244739dd
commit 26244739dd
parent 949e41d19f
2 changed files with 20 additions and 10 deletions
--- a/README.md
+++ b/README.md
@ -15,6 +15,8 @@ Analysis features are :

 Verbs and nouns are in their lemmatized form by default but the option `--verbatim` allows to keep the original inflection.

+Intermediate results are stored in a Redis database to allow the analysis of multiple text files.
+
 # requirements

 - Python >= 3.6
@ -25,19 +27,23 @@ Verbs and nouns are in their lemmatized form by default but the option `--verbat

 ~~~~
 usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim]
+                 [--no-flushdb]

 Extract statistical analysis of text

 optional arguments:
-  -h, --help  show this help message and exit
-  -v V        verbose output
-  -f F        file to analyse
-  -t T        maximum value for the top list (default is 100) -1 is no limit
-  -s          display the overall statistics (default is False)
-  -o O        output format (default is csv), json
-  -l L        language used for the analysis (default is en)
-  --verbatim  Don't use the lemmatized form, use verbatim. (default is the
-              lematized form)
+  -h, --help    show this help message and exit
+  -v V          verbose output
+  -f F          file to analyse
+  -t T          maximum value for the top list (default is 100) -1 is no limit
+  -s            display the overall statistics (default is False)
+  -o O          output format (default is csv), json
+  -l L          language used for the analysis (default is en)
+  --verbatim    Don't use the lemmatized form, use verbatim. (default is the
+                lematized form)
+  --no-flushdb  Don't flush the redisdb, useful when you want to process
+                multiple files and aggregate the results. (by default the
+                redis database is flushed at each run)
 ~~~~

 # example usage of napkin
--- a/bin/napkin.py
+++ b/bin/napkin.py
@ -16,6 +16,7 @@ parser.add_argument('-s', help="display the overall statistics (default is False
 parser.add_argument('-o', help="output format (default is csv), json", default="csv")
 parser.add_argument('-l', help="language used for the analysis (default is en)", default="en")
 parser.add_argument('--verbatim', help="Don't use the lemmatized form, use verbatim. (default is the lematized form)", default=False, action='store_true')
+parser.add_argument('--no-flushdb', help="Don't flush the redisdb, useful when you want to process multiple files and aggregate the results. (by default the redis database is flushed at each run)", default=False, action='store_true')

 args = parser.parse_args()
 if args.f is None:
@ -25,11 +26,14 @@ if args.f is None:
 redisdb = redis.Redis(host="localhost", port=6380, db=5)

 try:
-    redisdb.flushdb()
+    redisdb.ping()
 except:
    print("Redis database on port 6380 is not running...", file=sys.stderr)
    sys.exit()

+if not args.no_flushdb:
+    redisdb.flushdb()
+
 if args.l == "fr":
    nlp = spacy.load("fr_core_news_md")
 else: