mirror of
https://github.com/adulau/napkin-text-analysis.git
synced 2024-11-22 01:47:06 +00:00
fix: [bug] punctuation was not part of OOV and were not accounted
This commit is contained in:
parent
ef5011a64f
commit
193ad08144
1 changed files with 5 additions and 5 deletions
|
@ -73,6 +73,11 @@ for token in doc:
|
||||||
redisdb.zincrby("noun", 1, token.text)
|
redisdb.zincrby("noun", 1, token.text)
|
||||||
redisdb.hincrby("stats", "noun", 1)
|
redisdb.hincrby("stats", "noun", 1)
|
||||||
continue
|
continue
|
||||||
|
if token.pos_ == "PUNCT" and not token.is_oov:
|
||||||
|
redisdb.zincrby("punct", 1, value)
|
||||||
|
redisdb.hincrby("stats", "punct", 1)
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
if token.is_oov:
|
if token.is_oov:
|
||||||
value = "{}".format(token)
|
value = "{}".format(token)
|
||||||
|
@ -99,11 +104,6 @@ for token in doc:
|
||||||
redisdb.zincrby("email", 1, value)
|
redisdb.zincrby("email", 1, value)
|
||||||
redisdb.hincrby("stats", "email", 1)
|
redisdb.hincrby("stats", "email", 1)
|
||||||
continue
|
continue
|
||||||
if token.is_punct:
|
|
||||||
redisdb.zincrby("punct", 1, value)
|
|
||||||
redisdb.hincrby("stats", "punct", 1)
|
|
||||||
continue
|
|
||||||
|
|
||||||
redisdb.zincrby("oov", 1, value)
|
redisdb.zincrby("oov", 1, value)
|
||||||
redisdb.hincrby("stats", "oov", 1)
|
redisdb.hincrby("stats", "oov", 1)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue