new: [feature] option to save all labels in redis ranked set

This commit is contained in:
Alexandre Dulaunoy 2020-10-15 07:12:15 +02:00
parent 7bb9a78096
commit 5b6136cfaf
Signed by: adulau
GPG key ID: 09E2CD4944E6CBCD

View file

@ -26,6 +26,7 @@ parser.add_argument('--disable-parser', help="disable parser component in Spacy"
parser.add_argument('--disable-tagger', help="disable tagger component in Spacy", default=False, action='store_true') parser.add_argument('--disable-tagger', help="disable tagger component in Spacy", default=False, action='store_true')
parser.add_argument('--token-span', default= None, help='Find the sentences where a specific token is located') parser.add_argument('--token-span', default= None, help='Find the sentences where a specific token is located')
parser.add_argument('--table-format', help="set tabulate format (default is fancy_grid)", default="fancy_grid") parser.add_argument('--table-format', help="set tabulate format (default is fancy_grid)", default="fancy_grid")
parser.add_argument('--full-labels', help="store each label value in a ranked set (default is False)", action='store_true', default=False)
args = parser.parse_args() args = parser.parse_args()
if args.f is None: if args.f is None:
parser.print_help() parser.print_help()
@ -78,6 +79,15 @@ if args.token_span and not disable:
redisdb.hset("stats", "token", doc.__len__()) redisdb.hset("stats", "token", doc.__len__())
labels = [ "EVENT", "PERCENT", "MONEY", "FAC", "TIME", "QUANTITY", "WORK_OF_ART", "LANGUAGE", "PRODUCT", "LOC", "LAW", "DATE", "ORDINAL", "NORP", "ORG", "CARDINAL", "GPE", "PERSON"]
for entity in doc.ents:
redisdb.zincrby("labels", 1, entity.label_)
if not args.full_labels:
continue
if entity.label_ in labels:
redisdb.zincrby("label:{}".format(entity.label_), 1, entity.text)
for token in doc: for token in doc:
if args.token_span is not None and not disable: if args.token_span is not None and not disable:
if token.text == args.token_span: if token.text == args.token_span:
@ -130,9 +140,6 @@ for token in doc:
redisdb.hincrby("stats", "oov", 1) redisdb.hincrby("stats", "oov", 1)
for entity in doc.ents:
redisdb.zincrby("labels", 1, entity.label_)
if args.o == "json": if args.o == "json":
output_json = {"format":"napkin", "version": version} output_json = {"format":"napkin", "version": version}
for anal in analysis: for anal in analysis: