new: [option] --analysis to limit the output to a specific analysis

This commit is contained in:
Alexandre Dulaunoy 2020-10-09 23:23:36 +02:00
parent 32a899a4a0
commit 24e69a8ad9
Signed by: adulau
GPG key ID: 09E2CD4944E6CBCD
2 changed files with 24 additions and 15 deletions

View file

@ -31,24 +31,28 @@ Intermediate results are stored in a Redis database to allow the analysis of mul
~~~~
usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim]
[--no-flushdb] [--binary]
[--no-flushdb] [--binary] [--analysis ANALYSIS]
Extract statistical analysis of text
optional arguments:
-h, --help show this help message and exit
-v V verbose output
-f F file to analyse
-t T maximum value for the top list (default is 100) -1 is no limit
-s display the overall statistics (default is False)
-o O output format (default is csv), json, readable
-l L language used for the analysis (default is en)
--verbatim Don't use the lemmatized form, use verbatim. (default is the
lematized form)
--no-flushdb Don't flush the redisdb, useful when you want to process
multiple files and aggregate the results. (by default the
redis database is flushed at each run)
--binary Output response in binary instead of UTF-8 (default)
-h, --help show this help message and exit
-v V verbose output
-f F file to analyse
-t T maximum value for the top list (default is 100) -1 is
no limit
-s display the overall statistics (default is False)
-o O output format (default is csv), json, readable
-l L language used for the analysis (default is en)
--verbatim Don't use the lemmatized form, use verbatim. (default
is the lematized form)
--no-flushdb Don't flush the redisdb, useful when you want to
process multiple files and aggregate the results. (by
default the redis database is flushed at each run)
--binary set output in binary instead of UTF-8 (default)
--analysis ANALYSIS Limit output to a specific analysis (verb, noun,
hashtag, mention, digit, url, oov, labels, punct).
(Default is all analysis are displayed)
~~~~
# example usage of napkin

View file

@ -18,7 +18,8 @@ parser.add_argument('-o', help="output format (default is csv), json, readable",
parser.add_argument('-l', help="language used for the analysis (default is en)", default="en")
parser.add_argument('--verbatim', help="Don't use the lemmatized form, use verbatim. (default is the lematized form)", default=False, action='store_true')
parser.add_argument('--no-flushdb', help="Don't flush the redisdb, useful when you want to process multiple files and aggregate the results. (by default the redis database is flushed at each run)", default=False, action='store_true')
parser.add_argument('--binary', help="Output response in binary instead of UTF-8 (default)", default=False, action='store_true')
parser.add_argument('--binary', help="set output in binary instead of UTF-8 (default)", default=False, action='store_true')
parser.add_argument('--analysis', help="Limit output to a specific analysis (verb, noun, hashtag, mention, digit, url, oov, labels, punct). (Default is all analysis are displayed)", default='all')
args = parser.parse_args()
if args.f is None:
@ -118,6 +119,10 @@ for entity in doc.ents:
if args.o == "json":
output_json = {"format":"napkin"}
for anal in analysis:
if args.analysis == "all" or args.analysis == anal:
pass
else:
continue
if args.o == "readable":
previous_value = None
x = redisdb.zrevrange(anal, 1, args.t, withscores=True, score_cast_func=int)