From 907571d50218dc7e49f9c1e3b7e57bd6315ada2c Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 18 Apr 2018 11:53:22 +0200 Subject: [PATCH 1/3] python 3.5 upgrade --- DomainClassifier/domainclassifier.py | 11 +++++----- DomainClassifier/test.py | 32 ++++++++++++++-------------- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/DomainClassifier/domainclassifier.py b/DomainClassifier/domainclassifier.py index ebb3ec5..af1e340 100644 --- a/DomainClassifier/domainclassifier.py +++ b/DomainClassifier/domainclassifier.py @@ -8,7 +8,7 @@ import re import dns.resolver import IPy import socket -import urllib2 +import urllib.request __author__ = "Alexandre Dulaunoy" __copyright__ = "Copyright 2012-2017, Alexandre Dulaunoy" @@ -27,7 +27,7 @@ class Extract: self.presolver = dns.resolver.Resolver() self.presolver.nameservers = nameservers self.presolver.lifetime = 1.0 - self.bgprankingserver = 'pdns.circl.lu' + self.bgprankingserver = 'pdns.circl.lu' #'bgpranking.circl.lu' self.vdomain = [] self.listtld = [] self.domain = self.potentialdomain() @@ -49,7 +49,7 @@ class Extract: if a: x = str(a[0]).split("|") # why so many spaces? - x = map(lambda t: t.replace("\"", "").strip(), x) + x = list( map(lambda t: t.replace("\"", "").strip(), x) ) return (x[0], x[2], a[0]) else: return None @@ -82,9 +82,9 @@ class Extract: def __updatelisttld(self): ianatldlist = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt" - req = urllib2.Request(ianatldlist) + req = urllib.request.Request(ianatldlist) req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0') - tlds = urllib2.urlopen(req).read() + tlds = ( urllib.request.urlopen(req).read() ).decode('utf8') tlds = tlds.split("\n") for tld in tlds: self.listtld.append(tld.lower()) @@ -274,7 +274,6 @@ class Extract: for dom in domains: if type(dom) == tuple: dom = dom[0] - if includefilter.search(dom): self.cleandomain.append(dom) diff --git a/DomainClassifier/test.py b/DomainClassifier/test.py index 71c4b92..228c298 100644 --- a/DomainClassifier/test.py +++ b/DomainClassifier/test.py @@ -1,19 +1,19 @@ import domainclassifier -c = domainclassifier.Extract( rawtext = "www.xxx.com this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test www.facebook.com www.nic.ru www.youporn.com 8.8.8.8 201.1.1.1", nameservers = ['149.13.33.69']) +c = domainclassifier.Extract( rawtext = "www.xxx.com this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test www.facebook.com www.nic.ru www.youporn.com 8.8.8.8 201.1.1.1", nameservers = ['8.8.8.8']) #print c.potentialdomain() -print c.validdomain(extended=True) -print "US:" -print c.localizedomain(cc='US') -print "LU:" -print c.localizedomain(cc='LU') -print "BE:" -print c.localizedomain(cc='BE') -print "Ranking:" -print c.rankdomain() -print "List of ip addresses:" -print c.ipaddress(extended=True) -print "Include dot.lu:" -print c.include(expression=r'\.lu$') -print "Exclude dot.lu:" -print c.exclude(expression=r'\.lu$') +print(c.validdomain(extended=True)) +print("US:") +print(c.localizedomain(cc='US')) +print("LU:") +print(c.localizedomain(cc='LU')) +print("BE:") +print(c.localizedomain(cc='BE')) +print("Ranking:") +print(c.rankdomain()) +print("List of ip addresses:") +print(c.ipaddress(extended=True)) +print("Include dot.lu:") +print(c.include(expression=r'\.lu$')) +print("Exclude dot.lu:") +print(c.exclude(expression=r'\.lu$')) From 388f14df2ba067c2d7db8e61160e989554d490db Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 18 Apr 2018 11:55:46 +0200 Subject: [PATCH 2/3] clean --- DomainClassifier/domainclassifier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DomainClassifier/domainclassifier.py b/DomainClassifier/domainclassifier.py index af1e340..b4a2e4a 100644 --- a/DomainClassifier/domainclassifier.py +++ b/DomainClassifier/domainclassifier.py @@ -27,7 +27,7 @@ class Extract: self.presolver = dns.resolver.Resolver() self.presolver.nameservers = nameservers self.presolver.lifetime = 1.0 - self.bgprankingserver = 'pdns.circl.lu' #'bgpranking.circl.lu' + self.bgprankingserver = 'pdns.circl.lu' self.vdomain = [] self.listtld = [] self.domain = self.potentialdomain() From 6e2dc62f6b04ac54d3539bb1b1874b1e13a97452 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 18 Apr 2018 13:51:09 +0200 Subject: [PATCH 3/3] python 2.7 and 3.5 compatibility --- DomainClassifier/domainclassifier.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/DomainClassifier/domainclassifier.py b/DomainClassifier/domainclassifier.py index b4a2e4a..c6100d1 100644 --- a/DomainClassifier/domainclassifier.py +++ b/DomainClassifier/domainclassifier.py @@ -8,7 +8,14 @@ import re import dns.resolver import IPy import socket -import urllib.request + +try: + #python 3 + import urllib.request as urllib +except: + #python 2 + import urllib2 as urllib + __author__ = "Alexandre Dulaunoy" __copyright__ = "Copyright 2012-2017, Alexandre Dulaunoy" @@ -82,9 +89,9 @@ class Extract: def __updatelisttld(self): ianatldlist = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt" - req = urllib.request.Request(ianatldlist) + req = urllib.Request(ianatldlist) req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0') - tlds = ( urllib.request.urlopen(req).read() ).decode('utf8') + tlds = ( urllib.urlopen(req).read() ).decode('utf8') tlds = tlds.split("\n") for tld in tlds: self.listtld.append(tld.lower())