diff --git a/README.md b/README.md index 46a2288..179eb03 100644 --- a/README.md +++ b/README.md @@ -31,24 +31,28 @@ Intermediate results are stored in a Redis database to allow the analysis of mul ~~~~ usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim] - [--no-flushdb] [--binary] + [--no-flushdb] [--binary] [--analysis ANALYSIS] Extract statistical analysis of text optional arguments: - -h, --help show this help message and exit - -v V verbose output - -f F file to analyse - -t T maximum value for the top list (default is 100) -1 is no limit - -s display the overall statistics (default is False) - -o O output format (default is csv), json, readable - -l L language used for the analysis (default is en) - --verbatim Don't use the lemmatized form, use verbatim. (default is the - lematized form) - --no-flushdb Don't flush the redisdb, useful when you want to process - multiple files and aggregate the results. (by default the - redis database is flushed at each run) - --binary Output response in binary instead of UTF-8 (default) + -h, --help show this help message and exit + -v V verbose output + -f F file to analyse + -t T maximum value for the top list (default is 100) -1 is + no limit + -s display the overall statistics (default is False) + -o O output format (default is csv), json, readable + -l L language used for the analysis (default is en) + --verbatim Don't use the lemmatized form, use verbatim. (default + is the lematized form) + --no-flushdb Don't flush the redisdb, useful when you want to + process multiple files and aggregate the results. (by + default the redis database is flushed at each run) + --binary set output in binary instead of UTF-8 (default) + --analysis ANALYSIS Limit output to a specific analysis (verb, noun, + hashtag, mention, digit, url, oov, labels, punct). + (Default is all analysis are displayed) ~~~~ # example usage of napkin diff --git a/bin/napkin.py b/bin/napkin.py index 164d564..ec10f85 100644 --- a/bin/napkin.py +++ b/bin/napkin.py @@ -18,7 +18,8 @@ parser.add_argument('-o', help="output format (default is csv), json, readable", parser.add_argument('-l', help="language used for the analysis (default is en)", default="en") parser.add_argument('--verbatim', help="Don't use the lemmatized form, use verbatim. (default is the lematized form)", default=False, action='store_true') parser.add_argument('--no-flushdb', help="Don't flush the redisdb, useful when you want to process multiple files and aggregate the results. (by default the redis database is flushed at each run)", default=False, action='store_true') -parser.add_argument('--binary', help="Output response in binary instead of UTF-8 (default)", default=False, action='store_true') +parser.add_argument('--binary', help="set output in binary instead of UTF-8 (default)", default=False, action='store_true') +parser.add_argument('--analysis', help="Limit output to a specific analysis (verb, noun, hashtag, mention, digit, url, oov, labels, punct). (Default is all analysis are displayed)", default='all') args = parser.parse_args() if args.f is None: @@ -118,6 +119,10 @@ for entity in doc.ents: if args.o == "json": output_json = {"format":"napkin"} for anal in analysis: + if args.analysis == "all" or args.analysis == anal: + pass + else: + continue if args.o == "readable": previous_value = None x = redisdb.zrevrange(anal, 1, args.t, withscores=True, score_cast_func=int)