mirror of
https://github.com/adulau/napkin-text-analysis.git
synced 2024-11-22 01:47:06 +00:00
new: [option] --analysis to limit the output to a specific analysis
This commit is contained in:
parent
32a899a4a0
commit
24e69a8ad9
2 changed files with 24 additions and 15 deletions
20
README.md
20
README.md
|
@ -31,7 +31,7 @@ Intermediate results are stored in a Redis database to allow the analysis of mul
|
||||||
|
|
||||||
~~~~
|
~~~~
|
||||||
usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim]
|
usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim]
|
||||||
[--no-flushdb] [--binary]
|
[--no-flushdb] [--binary] [--analysis ANALYSIS]
|
||||||
|
|
||||||
Extract statistical analysis of text
|
Extract statistical analysis of text
|
||||||
|
|
||||||
|
@ -39,16 +39,20 @@ optional arguments:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
-v V verbose output
|
-v V verbose output
|
||||||
-f F file to analyse
|
-f F file to analyse
|
||||||
-t T maximum value for the top list (default is 100) -1 is no limit
|
-t T maximum value for the top list (default is 100) -1 is
|
||||||
|
no limit
|
||||||
-s display the overall statistics (default is False)
|
-s display the overall statistics (default is False)
|
||||||
-o O output format (default is csv), json, readable
|
-o O output format (default is csv), json, readable
|
||||||
-l L language used for the analysis (default is en)
|
-l L language used for the analysis (default is en)
|
||||||
--verbatim Don't use the lemmatized form, use verbatim. (default is the
|
--verbatim Don't use the lemmatized form, use verbatim. (default
|
||||||
lematized form)
|
is the lematized form)
|
||||||
--no-flushdb Don't flush the redisdb, useful when you want to process
|
--no-flushdb Don't flush the redisdb, useful when you want to
|
||||||
multiple files and aggregate the results. (by default the
|
process multiple files and aggregate the results. (by
|
||||||
redis database is flushed at each run)
|
default the redis database is flushed at each run)
|
||||||
--binary Output response in binary instead of UTF-8 (default)
|
--binary set output in binary instead of UTF-8 (default)
|
||||||
|
--analysis ANALYSIS Limit output to a specific analysis (verb, noun,
|
||||||
|
hashtag, mention, digit, url, oov, labels, punct).
|
||||||
|
(Default is all analysis are displayed)
|
||||||
~~~~
|
~~~~
|
||||||
|
|
||||||
# example usage of napkin
|
# example usage of napkin
|
||||||
|
|
|
@ -18,7 +18,8 @@ parser.add_argument('-o', help="output format (default is csv), json, readable",
|
||||||
parser.add_argument('-l', help="language used for the analysis (default is en)", default="en")
|
parser.add_argument('-l', help="language used for the analysis (default is en)", default="en")
|
||||||
parser.add_argument('--verbatim', help="Don't use the lemmatized form, use verbatim. (default is the lematized form)", default=False, action='store_true')
|
parser.add_argument('--verbatim', help="Don't use the lemmatized form, use verbatim. (default is the lematized form)", default=False, action='store_true')
|
||||||
parser.add_argument('--no-flushdb', help="Don't flush the redisdb, useful when you want to process multiple files and aggregate the results. (by default the redis database is flushed at each run)", default=False, action='store_true')
|
parser.add_argument('--no-flushdb', help="Don't flush the redisdb, useful when you want to process multiple files and aggregate the results. (by default the redis database is flushed at each run)", default=False, action='store_true')
|
||||||
parser.add_argument('--binary', help="Output response in binary instead of UTF-8 (default)", default=False, action='store_true')
|
parser.add_argument('--binary', help="set output in binary instead of UTF-8 (default)", default=False, action='store_true')
|
||||||
|
parser.add_argument('--analysis', help="Limit output to a specific analysis (verb, noun, hashtag, mention, digit, url, oov, labels, punct). (Default is all analysis are displayed)", default='all')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.f is None:
|
if args.f is None:
|
||||||
|
@ -118,6 +119,10 @@ for entity in doc.ents:
|
||||||
if args.o == "json":
|
if args.o == "json":
|
||||||
output_json = {"format":"napkin"}
|
output_json = {"format":"napkin"}
|
||||||
for anal in analysis:
|
for anal in analysis:
|
||||||
|
if args.analysis == "all" or args.analysis == anal:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
continue
|
||||||
if args.o == "readable":
|
if args.o == "readable":
|
||||||
previous_value = None
|
previous_value = None
|
||||||
x = redisdb.zrevrange(anal, 1, args.t, withscores=True, score_cast_func=int)
|
x = redisdb.zrevrange(anal, 1, args.t, withscores=True, score_cast_func=int)
|
||||||
|
|
Loading…
Reference in a new issue