mirror of
https://github.com/adulau/napkin-text-analysis.git
synced 2024-11-22 01:47:06 +00:00
new: [output] JSON export added
This commit is contained in:
parent
3c3760019e
commit
3a09abc80c
1 changed files with 14 additions and 3 deletions
|
@ -6,13 +6,14 @@ import spacy
|
||||||
from spacy_langdetect import LanguageDetector
|
from spacy_langdetect import LanguageDetector
|
||||||
import argparse
|
import argparse
|
||||||
import sys
|
import sys
|
||||||
|
import simplejson as json
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="Extract statistical analysis of text")
|
parser = argparse.ArgumentParser(description="Extract statistical analysis of text")
|
||||||
parser.add_argument('-v', help="verbose output")
|
parser.add_argument('-v', help="verbose output")
|
||||||
parser.add_argument('-f', help="file to analyse")
|
parser.add_argument('-f', help="file to analyse")
|
||||||
parser.add_argument('-t', help="maximum value for the top list (default is 100) -1 is no limit", default=100)
|
parser.add_argument('-t', help="maximum value for the top list (default is 100) -1 is no limit", default=100)
|
||||||
parser.add_argument('-s', help="display the overall statistics (default is False)", default=False, action='store_true')
|
parser.add_argument('-s', help="display the overall statistics (default is False)", default=False, action='store_true')
|
||||||
parser.add_argument('-o', help="output format (default is csv)", default="csv")
|
parser.add_argument('-o', help="output format (default is csv), json", default="csv")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.f is None:
|
if args.f is None:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
|
@ -89,13 +90,23 @@ for token in doc:
|
||||||
for entity in doc.ents:
|
for entity in doc.ents:
|
||||||
redisdb.zincrby("labels:napkin", 1, entity.label_)
|
redisdb.zincrby("labels:napkin", 1, entity.label_)
|
||||||
|
|
||||||
|
if args.o == "json":
|
||||||
|
output_json = {"format":"napkin"}
|
||||||
for anal in analysis:
|
for anal in analysis:
|
||||||
x = redisdb.zrevrange(anal, 1, args.t, withscores=True)
|
x = redisdb.zrevrange(anal, 1, args.t, withscores=True)
|
||||||
|
if args.o == "csv":
|
||||||
print ("# Top {} of {}".format(args.t, anal))
|
print ("# Top {} of {}".format(args.t, anal))
|
||||||
|
elif args.o == "json":
|
||||||
|
output_json.update({anal:[]})
|
||||||
for a in x:
|
for a in x:
|
||||||
if args.o == "csv":
|
if args.o == "csv":
|
||||||
print ("{},{}".format(a[0],a[1]))
|
print ("{},{}".format(a[0],a[1]))
|
||||||
|
elif args.o == "json":
|
||||||
|
output_json[anal].append(a)
|
||||||
|
if args.o == "csv":
|
||||||
print ("#")
|
print ("#")
|
||||||
|
|
||||||
if args.s:
|
if args.s:
|
||||||
print (redisdb.hgetall('stats'))
|
print (redisdb.hgetall('stats'))
|
||||||
|
if args.o == "json":
|
||||||
|
print(json.dumps(output_json))
|
||||||
|
|
Loading…
Reference in a new issue