mirror of
https://github.com/adulau/napkin-text-analysis.git
synced 2024-12-22 00:26:00 +00:00
new: [option] --analysis to limit the output to a specific analysis
This commit is contained in:
parent
32a899a4a0
commit
24e69a8ad9
2 changed files with 24 additions and 15 deletions
32
README.md
32
README.md
|
@ -31,24 +31,28 @@ Intermediate results are stored in a Redis database to allow the analysis of mul
|
|||
|
||||
~~~~
|
||||
usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim]
|
||||
[--no-flushdb] [--binary]
|
||||
[--no-flushdb] [--binary] [--analysis ANALYSIS]
|
||||
|
||||
Extract statistical analysis of text
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-v V verbose output
|
||||
-f F file to analyse
|
||||
-t T maximum value for the top list (default is 100) -1 is no limit
|
||||
-s display the overall statistics (default is False)
|
||||
-o O output format (default is csv), json, readable
|
||||
-l L language used for the analysis (default is en)
|
||||
--verbatim Don't use the lemmatized form, use verbatim. (default is the
|
||||
lematized form)
|
||||
--no-flushdb Don't flush the redisdb, useful when you want to process
|
||||
multiple files and aggregate the results. (by default the
|
||||
redis database is flushed at each run)
|
||||
--binary Output response in binary instead of UTF-8 (default)
|
||||
-h, --help show this help message and exit
|
||||
-v V verbose output
|
||||
-f F file to analyse
|
||||
-t T maximum value for the top list (default is 100) -1 is
|
||||
no limit
|
||||
-s display the overall statistics (default is False)
|
||||
-o O output format (default is csv), json, readable
|
||||
-l L language used for the analysis (default is en)
|
||||
--verbatim Don't use the lemmatized form, use verbatim. (default
|
||||
is the lematized form)
|
||||
--no-flushdb Don't flush the redisdb, useful when you want to
|
||||
process multiple files and aggregate the results. (by
|
||||
default the redis database is flushed at each run)
|
||||
--binary set output in binary instead of UTF-8 (default)
|
||||
--analysis ANALYSIS Limit output to a specific analysis (verb, noun,
|
||||
hashtag, mention, digit, url, oov, labels, punct).
|
||||
(Default is all analysis are displayed)
|
||||
~~~~
|
||||
|
||||
# example usage of napkin
|
||||
|
|
|
@ -18,7 +18,8 @@ parser.add_argument('-o', help="output format (default is csv), json, readable",
|
|||
parser.add_argument('-l', help="language used for the analysis (default is en)", default="en")
|
||||
parser.add_argument('--verbatim', help="Don't use the lemmatized form, use verbatim. (default is the lematized form)", default=False, action='store_true')
|
||||
parser.add_argument('--no-flushdb', help="Don't flush the redisdb, useful when you want to process multiple files and aggregate the results. (by default the redis database is flushed at each run)", default=False, action='store_true')
|
||||
parser.add_argument('--binary', help="Output response in binary instead of UTF-8 (default)", default=False, action='store_true')
|
||||
parser.add_argument('--binary', help="set output in binary instead of UTF-8 (default)", default=False, action='store_true')
|
||||
parser.add_argument('--analysis', help="Limit output to a specific analysis (verb, noun, hashtag, mention, digit, url, oov, labels, punct). (Default is all analysis are displayed)", default='all')
|
||||
|
||||
args = parser.parse_args()
|
||||
if args.f is None:
|
||||
|
@ -118,6 +119,10 @@ for entity in doc.ents:
|
|||
if args.o == "json":
|
||||
output_json = {"format":"napkin"}
|
||||
for anal in analysis:
|
||||
if args.analysis == "all" or args.analysis == anal:
|
||||
pass
|
||||
else:
|
||||
continue
|
||||
if args.o == "readable":
|
||||
previous_value = None
|
||||
x = redisdb.zrevrange(anal, 1, args.t, withscores=True, score_cast_func=int)
|
||||
|
|
Loading…
Reference in a new issue