Merge pull request #1 from Terrtia/master

python 3.5 upgrade
This commit is contained in:
Alexandre Dulaunoy 2018-04-18 20:36:10 +02:00 committed by GitHub
commit 04a7a963a8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 27 additions and 21 deletions

View file

@ -8,7 +8,14 @@ import re
import dns.resolver import dns.resolver
import IPy import IPy
import socket import socket
import urllib2
try:
#python 3
import urllib.request as urllib
except:
#python 2
import urllib2 as urllib
__author__ = "Alexandre Dulaunoy" __author__ = "Alexandre Dulaunoy"
__copyright__ = "Copyright 2012-2017, Alexandre Dulaunoy" __copyright__ = "Copyright 2012-2017, Alexandre Dulaunoy"
@ -49,7 +56,7 @@ class Extract:
if a: if a:
x = str(a[0]).split("|") x = str(a[0]).split("|")
# why so many spaces? # why so many spaces?
x = map(lambda t: t.replace("\"", "").strip(), x) x = list( map(lambda t: t.replace("\"", "").strip(), x) )
return (x[0], x[2], a[0]) return (x[0], x[2], a[0])
else: else:
return None return None
@ -82,9 +89,9 @@ class Extract:
def __updatelisttld(self): def __updatelisttld(self):
ianatldlist = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt" ianatldlist = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt"
req = urllib2.Request(ianatldlist) req = urllib.Request(ianatldlist)
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0') req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0')
tlds = urllib2.urlopen(req).read() tlds = ( urllib.urlopen(req).read() ).decode('utf8')
tlds = tlds.split("\n") tlds = tlds.split("\n")
for tld in tlds: for tld in tlds:
self.listtld.append(tld.lower()) self.listtld.append(tld.lower())
@ -274,7 +281,6 @@ class Extract:
for dom in domains: for dom in domains:
if type(dom) == tuple: if type(dom) == tuple:
dom = dom[0] dom = dom[0]
if includefilter.search(dom): if includefilter.search(dom):
self.cleandomain.append(dom) self.cleandomain.append(dom)

View file

@ -1,19 +1,19 @@
import domainclassifier import domainclassifier
c = domainclassifier.Extract( rawtext = "www.xxx.com this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test www.facebook.com www.nic.ru www.youporn.com 8.8.8.8 201.1.1.1", nameservers = ['149.13.33.69']) c = domainclassifier.Extract( rawtext = "www.xxx.com this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test www.facebook.com www.nic.ru www.youporn.com 8.8.8.8 201.1.1.1", nameservers = ['8.8.8.8'])
#print c.potentialdomain() #print c.potentialdomain()
print c.validdomain(extended=True) print(c.validdomain(extended=True))
print "US:" print("US:")
print c.localizedomain(cc='US') print(c.localizedomain(cc='US'))
print "LU:" print("LU:")
print c.localizedomain(cc='LU') print(c.localizedomain(cc='LU'))
print "BE:" print("BE:")
print c.localizedomain(cc='BE') print(c.localizedomain(cc='BE'))
print "Ranking:" print("Ranking:")
print c.rankdomain() print(c.rankdomain())
print "List of ip addresses:" print("List of ip addresses:")
print c.ipaddress(extended=True) print(c.ipaddress(extended=True))
print "Include dot.lu:" print("Include dot.lu:")
print c.include(expression=r'\.lu$') print(c.include(expression=r'\.lu$'))
print "Exclude dot.lu:" print("Exclude dot.lu:")
print c.exclude(expression=r'\.lu$') print(c.exclude(expression=r'\.lu$'))