From 3d71d9288e5b79049b54846c853715019c218493 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Thu, 1 Oct 2020 23:06:39 +0200 Subject: [PATCH] chg: [args] add an option to force the language --- bin/napkin.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bin/napkin.py b/bin/napkin.py index 7f2a704..ae1ef89 100644 --- a/bin/napkin.py +++ b/bin/napkin.py @@ -14,6 +14,8 @@ parser.add_argument('-f', help="file to analyse") parser.add_argument('-t', help="maximum value for the top list (default is 100) -1 is no limit", default=100) parser.add_argument('-s', help="display the overall statistics (default is False)", default=False, action='store_true') parser.add_argument('-o', help="output format (default is csv), json", default="csv") +parser.add_argument("-l", help="language used for the analysis (default is en)", default="en") + args = parser.parse_args() if args.f is None: parser.print_help() @@ -27,7 +29,11 @@ except: print("Redis database on port 6380 is not running...", file=sys.stderr) sys.exit() -nlp = spacy.load("en_core_web_md") +if args.l == "fr": + nlp = spacy.load("fr_core_news_md") +else: + nlp = spacy.load("en_core_web_md") + nlp.add_pipe(LanguageDetector(), name='language_detector', last=True) nlp.max_length = 2000000