mirror of
https://github.com/adulau/napkin-text-analysis.git
synced 2024-12-22 00:26:00 +00:00
fix: [bug] punctuation was not part of OOV and were not accounted
This commit is contained in:
parent
ef5011a64f
commit
193ad08144
1 changed files with 5 additions and 5 deletions
|
@ -73,6 +73,11 @@ for token in doc:
|
|||
redisdb.zincrby("noun", 1, token.text)
|
||||
redisdb.hincrby("stats", "noun", 1)
|
||||
continue
|
||||
if token.pos_ == "PUNCT" and not token.is_oov:
|
||||
redisdb.zincrby("punct", 1, value)
|
||||
redisdb.hincrby("stats", "punct", 1)
|
||||
continue
|
||||
|
||||
|
||||
if token.is_oov:
|
||||
value = "{}".format(token)
|
||||
|
@ -99,11 +104,6 @@ for token in doc:
|
|||
redisdb.zincrby("email", 1, value)
|
||||
redisdb.hincrby("stats", "email", 1)
|
||||
continue
|
||||
if token.is_punct:
|
||||
redisdb.zincrby("punct", 1, value)
|
||||
redisdb.hincrby("stats", "punct", 1)
|
||||
continue
|
||||
|
||||
redisdb.zincrby("oov", 1, value)
|
||||
redisdb.hincrby("stats", "oov", 1)
|
||||
|
||||
|
|
Loading…
Reference in a new issue