mirror of
https://github.com/adulau/napkin-text-analysis.git
synced 2024-11-22 01:47:06 +00:00
new: [option] Don't flush the redisdb, useful when you want to process multiple files and aggregate the results.
This commit is contained in:
parent
949e41d19f
commit
26244739dd
2 changed files with 20 additions and 10 deletions
|
@ -15,6 +15,8 @@ Analysis features are :
|
||||||
|
|
||||||
Verbs and nouns are in their lemmatized form by default but the option `--verbatim` allows to keep the original inflection.
|
Verbs and nouns are in their lemmatized form by default but the option `--verbatim` allows to keep the original inflection.
|
||||||
|
|
||||||
|
Intermediate results are stored in a Redis database to allow the analysis of multiple text files.
|
||||||
|
|
||||||
# requirements
|
# requirements
|
||||||
|
|
||||||
- Python >= 3.6
|
- Python >= 3.6
|
||||||
|
@ -25,6 +27,7 @@ Verbs and nouns are in their lemmatized form by default but the option `--verbat
|
||||||
|
|
||||||
~~~~
|
~~~~
|
||||||
usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim]
|
usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim]
|
||||||
|
[--no-flushdb]
|
||||||
|
|
||||||
Extract statistical analysis of text
|
Extract statistical analysis of text
|
||||||
|
|
||||||
|
@ -38,6 +41,9 @@ optional arguments:
|
||||||
-l L language used for the analysis (default is en)
|
-l L language used for the analysis (default is en)
|
||||||
--verbatim Don't use the lemmatized form, use verbatim. (default is the
|
--verbatim Don't use the lemmatized form, use verbatim. (default is the
|
||||||
lematized form)
|
lematized form)
|
||||||
|
--no-flushdb Don't flush the redisdb, useful when you want to process
|
||||||
|
multiple files and aggregate the results. (by default the
|
||||||
|
redis database is flushed at each run)
|
||||||
~~~~
|
~~~~
|
||||||
|
|
||||||
# example usage of napkin
|
# example usage of napkin
|
||||||
|
|
|
@ -16,6 +16,7 @@ parser.add_argument('-s', help="display the overall statistics (default is False
|
||||||
parser.add_argument('-o', help="output format (default is csv), json", default="csv")
|
parser.add_argument('-o', help="output format (default is csv), json", default="csv")
|
||||||
parser.add_argument('-l', help="language used for the analysis (default is en)", default="en")
|
parser.add_argument('-l', help="language used for the analysis (default is en)", default="en")
|
||||||
parser.add_argument('--verbatim', help="Don't use the lemmatized form, use verbatim. (default is the lematized form)", default=False, action='store_true')
|
parser.add_argument('--verbatim', help="Don't use the lemmatized form, use verbatim. (default is the lematized form)", default=False, action='store_true')
|
||||||
|
parser.add_argument('--no-flushdb', help="Don't flush the redisdb, useful when you want to process multiple files and aggregate the results. (by default the redis database is flushed at each run)", default=False, action='store_true')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.f is None:
|
if args.f is None:
|
||||||
|
@ -25,11 +26,14 @@ if args.f is None:
|
||||||
redisdb = redis.Redis(host="localhost", port=6380, db=5)
|
redisdb = redis.Redis(host="localhost", port=6380, db=5)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
redisdb.flushdb()
|
redisdb.ping()
|
||||||
except:
|
except:
|
||||||
print("Redis database on port 6380 is not running...", file=sys.stderr)
|
print("Redis database on port 6380 is not running...", file=sys.stderr)
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
|
if not args.no_flushdb:
|
||||||
|
redisdb.flushdb()
|
||||||
|
|
||||||
if args.l == "fr":
|
if args.l == "fr":
|
||||||
nlp = spacy.load("fr_core_news_md")
|
nlp = spacy.load("fr_core_news_md")
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in a new issue