diff --git a/README.md b/README.md index 179eb03..aa8bbe9 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ Intermediate results are stored in a Redis database to allow the analysis of mul ~~~~ usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim] [--no-flushdb] [--binary] [--analysis ANALYSIS] + [--disable-parser] [--disable-tagger] Extract statistical analysis of text @@ -53,6 +54,8 @@ optional arguments: --analysis ANALYSIS Limit output to a specific analysis (verb, noun, hashtag, mention, digit, url, oov, labels, punct). (Default is all analysis are displayed) + --disable-parser disable parser component in Spacy + --disable-tagger disable tagger component in Spacy ~~~~ # example usage of napkin diff --git a/bin/napkin.py b/bin/napkin.py index ec10f85..193e8aa 100644 --- a/bin/napkin.py +++ b/bin/napkin.py @@ -20,6 +20,8 @@ parser.add_argument('--verbatim', help="Don't use the lemmatized form, use verba parser.add_argument('--no-flushdb', help="Don't flush the redisdb, useful when you want to process multiple files and aggregate the results. (by default the redis database is flushed at each run)", default=False, action='store_true') parser.add_argument('--binary', help="set output in binary instead of UTF-8 (default)", default=False, action='store_true') parser.add_argument('--analysis', help="Limit output to a specific analysis (verb, noun, hashtag, mention, digit, url, oov, labels, punct). (Default is all analysis are displayed)", default='all') +parser.add_argument('--disable-parser', help="disable parser component in Spacy", default=False, action='store_true') +parser.add_argument('--disable-tagger', help="disable tagger component in Spacy", default=False, action='store_true') args = parser.parse_args() if args.f is None: @@ -40,10 +42,16 @@ except: if not args.no_flushdb: redisdb.flushdb() +disable = [] +if args.disable_parser: + disable.append("parser") +if args.disable_tagger: + disable.append("tagger") + if args.l == "fr": - nlp = spacy.load("fr_core_news_md") + nlp = spacy.load("fr_core_news_md", disable=disable) elif args.l == "en": - nlp = spacy.load("en_core_web_md") + nlp = spacy.load("en_core_web_md", disable=disable) else: sys.exit("Language not supported")