chg: [download] if a spacy.io model is missing, napkin automatically downloads it

Fix #1

TODO: map cld3 language to potential model to be downloaded
This commit is contained in:
Alexandre Dulaunoy 2020-10-18 22:37:00 +02:00
parent 5b6136cfaf
commit 1192d17e86
Signed by: adulau
GPG key ID: 09E2CD4944E6CBCD

View file

@ -27,11 +27,17 @@ parser.add_argument('--disable-tagger', help="disable tagger component in Spacy"
parser.add_argument('--token-span', default= None, help='Find the sentences where a specific token is located') parser.add_argument('--token-span', default= None, help='Find the sentences where a specific token is located')
parser.add_argument('--table-format', help="set tabulate format (default is fancy_grid)", default="fancy_grid") parser.add_argument('--table-format', help="set tabulate format (default is fancy_grid)", default="fancy_grid")
parser.add_argument('--full-labels', help="store each label value in a ranked set (default is False)", action='store_true', default=False) parser.add_argument('--full-labels', help="store each label value in a ranked set (default is False)", action='store_true', default=False)
#parser.add_argument('--geolocation', help="export geolocation (default is False)", action='store_true', default=False)
args = parser.parse_args() args = parser.parse_args()
if args.f is None: if args.f is None:
parser.print_help() parser.print_help()
sys.exit() sys.exit()
#if args.geolocation:
# args.full_labels = True
if not args.binary: if not args.binary:
redisdb = redis.Redis(host="localhost", port=6380, db=5, encoding='utf-8', decode_responses=True) redisdb = redis.Redis(host="localhost", port=6380, db=5, encoding='utf-8', decode_responses=True)
else: else:
@ -53,8 +59,18 @@ if args.disable_tagger:
disable.append("tagger") disable.append("tagger")
if args.l == "fr": if args.l == "fr":
try :
nlp = spacy.load("fr_core_news_md", disable=disable)
except:
print("Downloading missing model")
spacy.cli.download("en_core_web_md")
nlp = spacy.load("fr_core_news_md", disable=disable) nlp = spacy.load("fr_core_news_md", disable=disable)
elif args.l == "en": elif args.l == "en":
try:
nlp = spacy.load("en_core_web_md", disable=disable)
except:
print("Downloading missing model")
spacy.cli.download("en_core_web_md")
nlp = spacy.load("en_core_web_md", disable=disable) nlp = spacy.load("en_core_web_md", disable=disable)
else: else:
sys.exit("Language not supported") sys.exit("Language not supported")