mirror of
https://github.com/adulau/napkin-text-analysis.git
synced 2024-11-22 01:47:06 +00:00
new: [feature] option to save all labels in redis ranked set
This commit is contained in:
parent
7bb9a78096
commit
5b6136cfaf
1 changed files with 10 additions and 3 deletions
|
@ -26,6 +26,7 @@ parser.add_argument('--disable-parser', help="disable parser component in Spacy"
|
||||||
parser.add_argument('--disable-tagger', help="disable tagger component in Spacy", default=False, action='store_true')
|
parser.add_argument('--disable-tagger', help="disable tagger component in Spacy", default=False, action='store_true')
|
||||||
parser.add_argument('--token-span', default= None, help='Find the sentences where a specific token is located')
|
parser.add_argument('--token-span', default= None, help='Find the sentences where a specific token is located')
|
||||||
parser.add_argument('--table-format', help="set tabulate format (default is fancy_grid)", default="fancy_grid")
|
parser.add_argument('--table-format', help="set tabulate format (default is fancy_grid)", default="fancy_grid")
|
||||||
|
parser.add_argument('--full-labels', help="store each label value in a ranked set (default is False)", action='store_true', default=False)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.f is None:
|
if args.f is None:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
|
@ -78,6 +79,15 @@ if args.token_span and not disable:
|
||||||
|
|
||||||
redisdb.hset("stats", "token", doc.__len__())
|
redisdb.hset("stats", "token", doc.__len__())
|
||||||
|
|
||||||
|
labels = [ "EVENT", "PERCENT", "MONEY", "FAC", "TIME", "QUANTITY", "WORK_OF_ART", "LANGUAGE", "PRODUCT", "LOC", "LAW", "DATE", "ORDINAL", "NORP", "ORG", "CARDINAL", "GPE", "PERSON"]
|
||||||
|
|
||||||
|
for entity in doc.ents:
|
||||||
|
redisdb.zincrby("labels", 1, entity.label_)
|
||||||
|
if not args.full_labels:
|
||||||
|
continue
|
||||||
|
if entity.label_ in labels:
|
||||||
|
redisdb.zincrby("label:{}".format(entity.label_), 1, entity.text)
|
||||||
|
|
||||||
for token in doc:
|
for token in doc:
|
||||||
if args.token_span is not None and not disable:
|
if args.token_span is not None and not disable:
|
||||||
if token.text == args.token_span:
|
if token.text == args.token_span:
|
||||||
|
@ -130,9 +140,6 @@ for token in doc:
|
||||||
redisdb.hincrby("stats", "oov", 1)
|
redisdb.hincrby("stats", "oov", 1)
|
||||||
|
|
||||||
|
|
||||||
for entity in doc.ents:
|
|
||||||
redisdb.zincrby("labels", 1, entity.label_)
|
|
||||||
|
|
||||||
if args.o == "json":
|
if args.o == "json":
|
||||||
output_json = {"format":"napkin", "version": version}
|
output_json = {"format":"napkin", "version": version}
|
||||||
for anal in analysis:
|
for anal in analysis:
|
||||||
|
|
Loading…
Reference in a new issue