From 26244739dd8fa465376466ad84537cdd84b9d0f8 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Thu, 8 Oct 2020 23:22:00 +0200 Subject: [PATCH] new: [option] Don't flush the redisdb, useful when you want to process multiple files and aggregate the results. --- README.md | 24 +++++++++++++++--------- bin/napkin.py | 6 +++++- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 1932b2a..9a16a9b 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ Analysis features are : Verbs and nouns are in their lemmatized form by default but the option `--verbatim` allows to keep the original inflection. +Intermediate results are stored in a Redis database to allow the analysis of multiple text files. + # requirements - Python >= 3.6 @@ -25,19 +27,23 @@ Verbs and nouns are in their lemmatized form by default but the option `--verbat ~~~~ usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim] + [--no-flushdb] Extract statistical analysis of text optional arguments: - -h, --help show this help message and exit - -v V verbose output - -f F file to analyse - -t T maximum value for the top list (default is 100) -1 is no limit - -s display the overall statistics (default is False) - -o O output format (default is csv), json - -l L language used for the analysis (default is en) - --verbatim Don't use the lemmatized form, use verbatim. (default is the - lematized form) + -h, --help show this help message and exit + -v V verbose output + -f F file to analyse + -t T maximum value for the top list (default is 100) -1 is no limit + -s display the overall statistics (default is False) + -o O output format (default is csv), json + -l L language used for the analysis (default is en) + --verbatim Don't use the lemmatized form, use verbatim. (default is the + lematized form) + --no-flushdb Don't flush the redisdb, useful when you want to process + multiple files and aggregate the results. (by default the + redis database is flushed at each run) ~~~~ # example usage of napkin diff --git a/bin/napkin.py b/bin/napkin.py index 5be5c05..e66198d 100644 --- a/bin/napkin.py +++ b/bin/napkin.py @@ -16,6 +16,7 @@ parser.add_argument('-s', help="display the overall statistics (default is False parser.add_argument('-o', help="output format (default is csv), json", default="csv") parser.add_argument('-l', help="language used for the analysis (default is en)", default="en") parser.add_argument('--verbatim', help="Don't use the lemmatized form, use verbatim. (default is the lematized form)", default=False, action='store_true') +parser.add_argument('--no-flushdb', help="Don't flush the redisdb, useful when you want to process multiple files and aggregate the results. (by default the redis database is flushed at each run)", default=False, action='store_true') args = parser.parse_args() if args.f is None: @@ -25,11 +26,14 @@ if args.f is None: redisdb = redis.Redis(host="localhost", port=6380, db=5) try: - redisdb.flushdb() + redisdb.ping() except: print("Redis database on port 6380 is not running...", file=sys.stderr) sys.exit() +if not args.no_flushdb: + redisdb.flushdb() + if args.l == "fr": nlp = spacy.load("fr_core_news_md") else: