mirror of
https://github.com/adulau/napkin-text-analysis.git
synced 2024-11-25 03:17:07 +00:00
new: [option] to disable parser and/or tagger from the standard processing pipeline of Spacy
If you don't need any of the syntactic information while using napkin, you can disable parser and tagger. You can gain some memory space and time for processing. By default, it's still active as napkin might use of the syntactic information in the future.
This commit is contained in:
parent
ab728e60c6
commit
85044335f4
2 changed files with 13 additions and 2 deletions
|
@ -32,6 +32,7 @@ Intermediate results are stored in a Redis database to allow the analysis of mul
|
||||||
~~~~
|
~~~~
|
||||||
usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim]
|
usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim]
|
||||||
[--no-flushdb] [--binary] [--analysis ANALYSIS]
|
[--no-flushdb] [--binary] [--analysis ANALYSIS]
|
||||||
|
[--disable-parser] [--disable-tagger]
|
||||||
|
|
||||||
Extract statistical analysis of text
|
Extract statistical analysis of text
|
||||||
|
|
||||||
|
@ -53,6 +54,8 @@ optional arguments:
|
||||||
--analysis ANALYSIS Limit output to a specific analysis (verb, noun,
|
--analysis ANALYSIS Limit output to a specific analysis (verb, noun,
|
||||||
hashtag, mention, digit, url, oov, labels, punct).
|
hashtag, mention, digit, url, oov, labels, punct).
|
||||||
(Default is all analysis are displayed)
|
(Default is all analysis are displayed)
|
||||||
|
--disable-parser disable parser component in Spacy
|
||||||
|
--disable-tagger disable tagger component in Spacy
|
||||||
~~~~
|
~~~~
|
||||||
|
|
||||||
# example usage of napkin
|
# example usage of napkin
|
||||||
|
|
|
@ -20,6 +20,8 @@ parser.add_argument('--verbatim', help="Don't use the lemmatized form, use verba
|
||||||
parser.add_argument('--no-flushdb', help="Don't flush the redisdb, useful when you want to process multiple files and aggregate the results. (by default the redis database is flushed at each run)", default=False, action='store_true')
|
parser.add_argument('--no-flushdb', help="Don't flush the redisdb, useful when you want to process multiple files and aggregate the results. (by default the redis database is flushed at each run)", default=False, action='store_true')
|
||||||
parser.add_argument('--binary', help="set output in binary instead of UTF-8 (default)", default=False, action='store_true')
|
parser.add_argument('--binary', help="set output in binary instead of UTF-8 (default)", default=False, action='store_true')
|
||||||
parser.add_argument('--analysis', help="Limit output to a specific analysis (verb, noun, hashtag, mention, digit, url, oov, labels, punct). (Default is all analysis are displayed)", default='all')
|
parser.add_argument('--analysis', help="Limit output to a specific analysis (verb, noun, hashtag, mention, digit, url, oov, labels, punct). (Default is all analysis are displayed)", default='all')
|
||||||
|
parser.add_argument('--disable-parser', help="disable parser component in Spacy", default=False, action='store_true')
|
||||||
|
parser.add_argument('--disable-tagger', help="disable tagger component in Spacy", default=False, action='store_true')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.f is None:
|
if args.f is None:
|
||||||
|
@ -40,10 +42,16 @@ except:
|
||||||
if not args.no_flushdb:
|
if not args.no_flushdb:
|
||||||
redisdb.flushdb()
|
redisdb.flushdb()
|
||||||
|
|
||||||
|
disable = []
|
||||||
|
if args.disable_parser:
|
||||||
|
disable.append("parser")
|
||||||
|
if args.disable_tagger:
|
||||||
|
disable.append("tagger")
|
||||||
|
|
||||||
if args.l == "fr":
|
if args.l == "fr":
|
||||||
nlp = spacy.load("fr_core_news_md")
|
nlp = spacy.load("fr_core_news_md", disable=disable)
|
||||||
elif args.l == "en":
|
elif args.l == "en":
|
||||||
nlp = spacy.load("en_core_web_md")
|
nlp = spacy.load("en_core_web_md", disable=disable)
|
||||||
else:
|
else:
|
||||||
sys.exit("Language not supported")
|
sys.exit("Language not supported")
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue