mirror of
https://github.com/adulau/napkin-text-analysis.git
synced 2024-12-22 00:26:00 +00:00
chg: [download] if a spacy.io model is missing, napkin automatically downloads it
Fix #1 TODO: map cld3 language to potential model to be downloaded
This commit is contained in:
parent
5b6136cfaf
commit
1192d17e86
1 changed files with 16 additions and 0 deletions
|
@ -27,11 +27,17 @@ parser.add_argument('--disable-tagger', help="disable tagger component in Spacy"
|
|||
parser.add_argument('--token-span', default= None, help='Find the sentences where a specific token is located')
|
||||
parser.add_argument('--table-format', help="set tabulate format (default is fancy_grid)", default="fancy_grid")
|
||||
parser.add_argument('--full-labels', help="store each label value in a ranked set (default is False)", action='store_true', default=False)
|
||||
#parser.add_argument('--geolocation', help="export geolocation (default is False)", action='store_true', default=False)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.f is None:
|
||||
parser.print_help()
|
||||
sys.exit()
|
||||
|
||||
#if args.geolocation:
|
||||
# args.full_labels = True
|
||||
|
||||
if not args.binary:
|
||||
redisdb = redis.Redis(host="localhost", port=6380, db=5, encoding='utf-8', decode_responses=True)
|
||||
else:
|
||||
|
@ -53,8 +59,18 @@ if args.disable_tagger:
|
|||
disable.append("tagger")
|
||||
|
||||
if args.l == "fr":
|
||||
try :
|
||||
nlp = spacy.load("fr_core_news_md", disable=disable)
|
||||
except:
|
||||
print("Downloading missing model")
|
||||
spacy.cli.download("en_core_web_md")
|
||||
nlp = spacy.load("fr_core_news_md", disable=disable)
|
||||
elif args.l == "en":
|
||||
try:
|
||||
nlp = spacy.load("en_core_web_md", disable=disable)
|
||||
except:
|
||||
print("Downloading missing model")
|
||||
spacy.cli.download("en_core_web_md")
|
||||
nlp = spacy.load("en_core_web_md", disable=disable)
|
||||
else:
|
||||
sys.exit("Language not supported")
|
||||
|
|
Loading…
Reference in a new issue