mirror of
https://github.com/adulau/napkin-text-analysis.git
synced 2024-11-22 01:47:06 +00:00
chg: [download] if a spacy.io model is missing, napkin automatically downloads it
Fix #1 TODO: map cld3 language to potential model to be downloaded
This commit is contained in:
parent
5b6136cfaf
commit
1192d17e86
1 changed files with 16 additions and 0 deletions
|
@ -27,11 +27,17 @@ parser.add_argument('--disable-tagger', help="disable tagger component in Spacy"
|
||||||
parser.add_argument('--token-span', default= None, help='Find the sentences where a specific token is located')
|
parser.add_argument('--token-span', default= None, help='Find the sentences where a specific token is located')
|
||||||
parser.add_argument('--table-format', help="set tabulate format (default is fancy_grid)", default="fancy_grid")
|
parser.add_argument('--table-format', help="set tabulate format (default is fancy_grid)", default="fancy_grid")
|
||||||
parser.add_argument('--full-labels', help="store each label value in a ranked set (default is False)", action='store_true', default=False)
|
parser.add_argument('--full-labels', help="store each label value in a ranked set (default is False)", action='store_true', default=False)
|
||||||
|
#parser.add_argument('--geolocation', help="export geolocation (default is False)", action='store_true', default=False)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.f is None:
|
if args.f is None:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
|
#if args.geolocation:
|
||||||
|
# args.full_labels = True
|
||||||
|
|
||||||
if not args.binary:
|
if not args.binary:
|
||||||
redisdb = redis.Redis(host="localhost", port=6380, db=5, encoding='utf-8', decode_responses=True)
|
redisdb = redis.Redis(host="localhost", port=6380, db=5, encoding='utf-8', decode_responses=True)
|
||||||
else:
|
else:
|
||||||
|
@ -53,8 +59,18 @@ if args.disable_tagger:
|
||||||
disable.append("tagger")
|
disable.append("tagger")
|
||||||
|
|
||||||
if args.l == "fr":
|
if args.l == "fr":
|
||||||
|
try :
|
||||||
|
nlp = spacy.load("fr_core_news_md", disable=disable)
|
||||||
|
except:
|
||||||
|
print("Downloading missing model")
|
||||||
|
spacy.cli.download("en_core_web_md")
|
||||||
nlp = spacy.load("fr_core_news_md", disable=disable)
|
nlp = spacy.load("fr_core_news_md", disable=disable)
|
||||||
elif args.l == "en":
|
elif args.l == "en":
|
||||||
|
try:
|
||||||
|
nlp = spacy.load("en_core_web_md", disable=disable)
|
||||||
|
except:
|
||||||
|
print("Downloading missing model")
|
||||||
|
spacy.cli.download("en_core_web_md")
|
||||||
nlp = spacy.load("en_core_web_md", disable=disable)
|
nlp = spacy.load("en_core_web_md", disable=disable)
|
||||||
else:
|
else:
|
||||||
sys.exit("Language not supported")
|
sys.exit("Language not supported")
|
||||||
|
|
Loading…
Reference in a new issue