mirror of
https://github.com/adulau/DomainClassifier.git
synced 2024-11-22 01:47:06 +00:00
new: [domainclassifier] add a simple cache of the TLDs list from IANA
(to avoid downloading at each start of the library)
This commit is contained in:
parent
8debd6c6b7
commit
1e55e0a5a7
1 changed files with 22 additions and 7 deletions
|
@ -10,6 +10,7 @@ import IPy
|
|||
import socket
|
||||
import time
|
||||
from datetime import date, timedelta
|
||||
import os
|
||||
|
||||
try:
|
||||
# python 3
|
||||
|
@ -82,14 +83,28 @@ class Extract:
|
|||
)
|
||||
return value['response']['ranking']['rank']
|
||||
|
||||
def __updatelisttld(self):
|
||||
def __updatelisttld(self, force=False):
|
||||
ianatldlist = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt"
|
||||
userdir = os.path.expanduser("~")
|
||||
cachedir = os.path.join(userdir, ".DomainClassifier")
|
||||
if not os.path.exists(cachedir):
|
||||
os.mkdir(cachedir)
|
||||
tldcache = os.path.join(cachedir, "tlds")
|
||||
if not os.path.exists(tldcache):
|
||||
print(tldcache)
|
||||
req = urllib.Request(ianatldlist)
|
||||
req.add_header(
|
||||
'User-Agent',
|
||||
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0',
|
||||
)
|
||||
tlds = (urllib.urlopen(req).read()).decode('utf8')
|
||||
f = open(tldcache, "wb")
|
||||
f.write(tlds.encode("utf-8"))
|
||||
f.close()
|
||||
|
||||
f = open(tldcache, "r")
|
||||
tlds = f.read()
|
||||
f.close()
|
||||
tlds = tlds.split("\n")
|
||||
for tld in tlds:
|
||||
self.listtld.append(tld.lower())
|
||||
|
|
Loading…
Reference in a new issue