new: [option] Don't flush the redisdb, useful when you want to process multiple files and aggregate the results.

This commit is contained in:
Alexandre Dulaunoy 2020-10-08 23:22:00 +02:00
parent 949e41d19f
commit 26244739dd
Signed by: adulau
GPG key ID: 09E2CD4944E6CBCD
2 changed files with 20 additions and 10 deletions

View file

@ -15,6 +15,8 @@ Analysis features are :
Verbs and nouns are in their lemmatized form by default but the option `--verbatim` allows to keep the original inflection. Verbs and nouns are in their lemmatized form by default but the option `--verbatim` allows to keep the original inflection.
Intermediate results are stored in a Redis database to allow the analysis of multiple text files.
# requirements # requirements
- Python >= 3.6 - Python >= 3.6
@ -25,19 +27,23 @@ Verbs and nouns are in their lemmatized form by default but the option `--verbat
~~~~ ~~~~
usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim] usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim]
[--no-flushdb]
Extract statistical analysis of text Extract statistical analysis of text
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
-v V verbose output -v V verbose output
-f F file to analyse -f F file to analyse
-t T maximum value for the top list (default is 100) -1 is no limit -t T maximum value for the top list (default is 100) -1 is no limit
-s display the overall statistics (default is False) -s display the overall statistics (default is False)
-o O output format (default is csv), json -o O output format (default is csv), json
-l L language used for the analysis (default is en) -l L language used for the analysis (default is en)
--verbatim Don't use the lemmatized form, use verbatim. (default is the --verbatim Don't use the lemmatized form, use verbatim. (default is the
lematized form) lematized form)
--no-flushdb Don't flush the redisdb, useful when you want to process
multiple files and aggregate the results. (by default the
redis database is flushed at each run)
~~~~ ~~~~
# example usage of napkin # example usage of napkin

View file

@ -16,6 +16,7 @@ parser.add_argument('-s', help="display the overall statistics (default is False
parser.add_argument('-o', help="output format (default is csv), json", default="csv") parser.add_argument('-o', help="output format (default is csv), json", default="csv")
parser.add_argument('-l', help="language used for the analysis (default is en)", default="en") parser.add_argument('-l', help="language used for the analysis (default is en)", default="en")
parser.add_argument('--verbatim', help="Don't use the lemmatized form, use verbatim. (default is the lematized form)", default=False, action='store_true') parser.add_argument('--verbatim', help="Don't use the lemmatized form, use verbatim. (default is the lematized form)", default=False, action='store_true')
parser.add_argument('--no-flushdb', help="Don't flush the redisdb, useful when you want to process multiple files and aggregate the results. (by default the redis database is flushed at each run)", default=False, action='store_true')
args = parser.parse_args() args = parser.parse_args()
if args.f is None: if args.f is None:
@ -25,11 +26,14 @@ if args.f is None:
redisdb = redis.Redis(host="localhost", port=6380, db=5) redisdb = redis.Redis(host="localhost", port=6380, db=5)
try: try:
redisdb.flushdb() redisdb.ping()
except: except:
print("Redis database on port 6380 is not running...", file=sys.stderr) print("Redis database on port 6380 is not running...", file=sys.stderr)
sys.exit() sys.exit()
if not args.no_flushdb:
redisdb.flushdb()
if args.l == "fr": if args.l == "fr":
nlp = spacy.load("fr_core_news_md") nlp = spacy.load("fr_core_news_md")
else: else: