mirror of
https://github.com/adulau/napkin-text-analysis.git
synced 2024-12-22 00:26:00 +00:00
new: [option] Don't flush the redisdb, useful when you want to process multiple files and aggregate the results.
This commit is contained in:
parent
949e41d19f
commit
26244739dd
2 changed files with 20 additions and 10 deletions
24
README.md
24
README.md
|
@ -15,6 +15,8 @@ Analysis features are :
|
|||
|
||||
Verbs and nouns are in their lemmatized form by default but the option `--verbatim` allows to keep the original inflection.
|
||||
|
||||
Intermediate results are stored in a Redis database to allow the analysis of multiple text files.
|
||||
|
||||
# requirements
|
||||
|
||||
- Python >= 3.6
|
||||
|
@ -25,19 +27,23 @@ Verbs and nouns are in their lemmatized form by default but the option `--verbat
|
|||
|
||||
~~~~
|
||||
usage: napkin.py [-h] [-v V] [-f F] [-t T] [-s] [-o O] [-l L] [--verbatim]
|
||||
[--no-flushdb]
|
||||
|
||||
Extract statistical analysis of text
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-v V verbose output
|
||||
-f F file to analyse
|
||||
-t T maximum value for the top list (default is 100) -1 is no limit
|
||||
-s display the overall statistics (default is False)
|
||||
-o O output format (default is csv), json
|
||||
-l L language used for the analysis (default is en)
|
||||
--verbatim Don't use the lemmatized form, use verbatim. (default is the
|
||||
lematized form)
|
||||
-h, --help show this help message and exit
|
||||
-v V verbose output
|
||||
-f F file to analyse
|
||||
-t T maximum value for the top list (default is 100) -1 is no limit
|
||||
-s display the overall statistics (default is False)
|
||||
-o O output format (default is csv), json
|
||||
-l L language used for the analysis (default is en)
|
||||
--verbatim Don't use the lemmatized form, use verbatim. (default is the
|
||||
lematized form)
|
||||
--no-flushdb Don't flush the redisdb, useful when you want to process
|
||||
multiple files and aggregate the results. (by default the
|
||||
redis database is flushed at each run)
|
||||
~~~~
|
||||
|
||||
# example usage of napkin
|
||||
|
|
|
@ -16,6 +16,7 @@ parser.add_argument('-s', help="display the overall statistics (default is False
|
|||
parser.add_argument('-o', help="output format (default is csv), json", default="csv")
|
||||
parser.add_argument('-l', help="language used for the analysis (default is en)", default="en")
|
||||
parser.add_argument('--verbatim', help="Don't use the lemmatized form, use verbatim. (default is the lematized form)", default=False, action='store_true')
|
||||
parser.add_argument('--no-flushdb', help="Don't flush the redisdb, useful when you want to process multiple files and aggregate the results. (by default the redis database is flushed at each run)", default=False, action='store_true')
|
||||
|
||||
args = parser.parse_args()
|
||||
if args.f is None:
|
||||
|
@ -25,11 +26,14 @@ if args.f is None:
|
|||
redisdb = redis.Redis(host="localhost", port=6380, db=5)
|
||||
|
||||
try:
|
||||
redisdb.flushdb()
|
||||
redisdb.ping()
|
||||
except:
|
||||
print("Redis database on port 6380 is not running...", file=sys.stderr)
|
||||
sys.exit()
|
||||
|
||||
if not args.no_flushdb:
|
||||
redisdb.flushdb()
|
||||
|
||||
if args.l == "fr":
|
||||
nlp = spacy.load("fr_core_news_md")
|
||||
else:
|
||||
|
|
Loading…
Reference in a new issue