new: [option] --analysis to limit the output to a specific analysis

This commit is contained in:
Alexandre Dulaunoy 2020-10-09 23:23:36 +02:00
parent 32a899a4a0
commit 24e69a8ad9
Signed by: adulau
GPG key ID: 09E2CD4944E6CBCD
2 changed files with 24 additions and 15 deletions

View file

@ -31,7 +31,7 @@ Intermediate results are stored in a Redis database to allow the analysis of mul
~~~~ ~~~~
usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim] usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim]
[--no-flushdb] [--binary] [--no-flushdb] [--binary] [--analysis ANALYSIS]
Extract statistical analysis of text Extract statistical analysis of text
@ -39,16 +39,20 @@ optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
-v V verbose output -v V verbose output
-f F file to analyse -f F file to analyse
-t T maximum value for the top list (default is 100) -1 is no limit -t T maximum value for the top list (default is 100) -1 is
no limit
-s display the overall statistics (default is False) -s display the overall statistics (default is False)
-o O output format (default is csv), json, readable -o O output format (default is csv), json, readable
-l L language used for the analysis (default is en) -l L language used for the analysis (default is en)
--verbatim Don't use the lemmatized form, use verbatim. (default is the --verbatim Don't use the lemmatized form, use verbatim. (default
lematized form) is the lematized form)
--no-flushdb Don't flush the redisdb, useful when you want to process --no-flushdb Don't flush the redisdb, useful when you want to
multiple files and aggregate the results. (by default the process multiple files and aggregate the results. (by
redis database is flushed at each run) default the redis database is flushed at each run)
--binary Output response in binary instead of UTF-8 (default) --binary set output in binary instead of UTF-8 (default)
--analysis ANALYSIS Limit output to a specific analysis (verb, noun,
hashtag, mention, digit, url, oov, labels, punct).
(Default is all analysis are displayed)
~~~~ ~~~~
# example usage of napkin # example usage of napkin

View file

@ -18,7 +18,8 @@ parser.add_argument('-o', help="output format (default is csv), json, readable",
parser.add_argument('-l', help="language used for the analysis (default is en)", default="en") parser.add_argument('-l', help="language used for the analysis (default is en)", default="en")
parser.add_argument('--verbatim', help="Don't use the lemmatized form, use verbatim. (default is the lematized form)", default=False, action='store_true') parser.add_argument('--verbatim', help="Don't use the lemmatized form, use verbatim. (default is the lematized form)", default=False, action='store_true')
parser.add_argument('--no-flushdb', help="Don't flush the redisdb, useful when you want to process multiple files and aggregate the results. (by default the redis database is flushed at each run)", default=False, action='store_true') parser.add_argument('--no-flushdb', help="Don't flush the redisdb, useful when you want to process multiple files and aggregate the results. (by default the redis database is flushed at each run)", default=False, action='store_true')
parser.add_argument('--binary', help="Output response in binary instead of UTF-8 (default)", default=False, action='store_true') parser.add_argument('--binary', help="set output in binary instead of UTF-8 (default)", default=False, action='store_true')
parser.add_argument('--analysis', help="Limit output to a specific analysis (verb, noun, hashtag, mention, digit, url, oov, labels, punct). (Default is all analysis are displayed)", default='all')
args = parser.parse_args() args = parser.parse_args()
if args.f is None: if args.f is None:
@ -118,6 +119,10 @@ for entity in doc.ents:
if args.o == "json": if args.o == "json":
output_json = {"format":"napkin"} output_json = {"format":"napkin"}
for anal in analysis: for anal in analysis:
if args.analysis == "all" or args.analysis == anal:
pass
else:
continue
if args.o == "readable": if args.o == "readable":
previous_value = None previous_value = None
x = redisdb.zrevrange(anal, 1, args.t, withscores=True, score_cast_func=int) x = redisdb.zrevrange(anal, 1, args.t, withscores=True, score_cast_func=int)