From 85044335f4e99712d82249e8725404f694b9ef93 Mon Sep 17 00:00:00 2001
From: Alexandre Dulaunoy <a@foo.be>
Date: Sun, 11 Oct 2020 11:04:30 +0200
Subject: [PATCH] new: [option] to disable parser and/or tagger from the
 standard processing pipeline of Spacy

If you don't need any of the syntactic information while using napkin,
you can disable parser and tagger. You can gain some memory space and time
for processing. By default, it's still active as napkin might use
of the syntactic information in the future.
---
 README.md     |  3 +++
 bin/napkin.py | 12 ++++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 179eb03..aa8bbe9 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,7 @@ Intermediate results are stored in a Redis database to allow the analysis of mul
 ~~~~
 usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim]
                  [--no-flushdb] [--binary] [--analysis ANALYSIS]
+                 [--disable-parser] [--disable-tagger]
 
 Extract statistical analysis of text
 
@@ -53,6 +54,8 @@ optional arguments:
   --analysis ANALYSIS  Limit output to a specific analysis (verb, noun,
                        hashtag, mention, digit, url, oov, labels, punct).
                        (Default is all analysis are displayed)
+  --disable-parser     disable parser component in Spacy
+  --disable-tagger     disable tagger component in Spacy
 ~~~~
 
 # example usage of napkin
diff --git a/bin/napkin.py b/bin/napkin.py
index ec10f85..193e8aa 100644
--- a/bin/napkin.py
+++ b/bin/napkin.py
@@ -20,6 +20,8 @@ parser.add_argument('--verbatim', help="Don't use the lemmatized form, use verba
 parser.add_argument('--no-flushdb', help="Don't flush the redisdb, useful when you want to process multiple files and aggregate the results. (by default the redis database is flushed at each run)", default=False, action='store_true')
 parser.add_argument('--binary', help="set output in binary instead of UTF-8 (default)", default=False, action='store_true')
 parser.add_argument('--analysis', help="Limit output to a specific analysis (verb, noun, hashtag, mention, digit, url, oov, labels, punct). (Default is all analysis are displayed)", default='all')
+parser.add_argument('--disable-parser', help="disable parser component in Spacy", default=False, action='store_true')
+parser.add_argument('--disable-tagger', help="disable tagger component in Spacy", default=False, action='store_true')
 
 args = parser.parse_args()
 if args.f is None:
@@ -40,10 +42,16 @@ except:
 if not args.no_flushdb:
     redisdb.flushdb()
 
+disable = []
+if args.disable_parser:
+    disable.append("parser")
+if args.disable_tagger:
+    disable.append("tagger")
+
 if args.l == "fr":
-    nlp = spacy.load("fr_core_news_md")
+    nlp = spacy.load("fr_core_news_md", disable=disable)
 elif args.l == "en":
-    nlp = spacy.load("en_core_web_md")
+    nlp = spacy.load("en_core_web_md", disable=disable)
 else:
     sys.exit("Language not supported")