new: [feature] option to save all labels in redis ranked set

2024-11-22 01:47:06 +00:00 · 2020-10-15 07:12:15 +02:00 · 2020-10-15 07:12:15 +02:00 · 5b6136cfaf
commit 5b6136cfaf
parent 7bb9a78096
1 changed files with 10 additions and 3 deletions
--- a/bin/napkin.py
+++ b/bin/napkin.py
@ -26,6 +26,7 @@ parser.add_argument('--disable-parser', help="disable parser component in Spacy"
 parser.add_argument('--disable-tagger', help="disable tagger component in Spacy", default=False, action='store_true')
 parser.add_argument('--token-span', default= None, help='Find the sentences where a specific token is located')
 parser.add_argument('--table-format', help="set tabulate format (default is fancy_grid)", default="fancy_grid")
 parser.add_argument('--full-labels', help="store each label value in a ranked set (default is False)", action='store_true', default=False)
 args = parser.parse_args()
 if args.f is None:
    parser.print_help()
@ -78,6 +79,15 @@ if args.token_span and not disable:
 redisdb.hset("stats", "token", doc.__len__())
 labels = [ "EVENT", "PERCENT", "MONEY", "FAC", "TIME", "QUANTITY", "WORK_OF_ART", "LANGUAGE", "PRODUCT", "LOC", "LAW", "DATE", "ORDINAL", "NORP", "ORG", "CARDINAL", "GPE", "PERSON"]
 for entity in doc.ents:
        redisdb.zincrby("labels", 1, entity.label_)
        if not args.full_labels:
            continue
        if entity.label_ in labels:
            redisdb.zincrby("label:{}".format(entity.label_), 1, entity.text)
 for token in doc:
        if args.token_span is not None and not disable:
            if token.text == args.token_span:
@ -130,9 +140,6 @@ for token in doc:
            redisdb.hincrby("stats", "oov", 1)
 for entity in doc.ents:
        redisdb.zincrby("labels", 1, entity.label_)
 if args.o == "json":
    output_json = {"format":"napkin", "version": version}
 for anal in analysis: