Merge pull request #1 from Terrtia/master

python 3.5 upgrade
2024-12-22 16:45:59 +00:00 · 2018-04-18 20:36:10 +02:00 · 2018-04-18 20:36:10 +02:00 · 04a7a963a8
commit 04a7a963a8
parent 682260259c 6e2dc62f6b
2 changed files with 27 additions and 21 deletions
--- a/DomainClassifier/domainclassifier.py
+++ b/DomainClassifier/domainclassifier.py
@ -8,7 +8,14 @@ import re
 import dns.resolver
 import IPy
 import socket
-import urllib2
+
+try:
+    #python 3
+    import urllib.request as urllib
+except:
+    #python 2
+    import urllib2 as urllib
+

 __author__ = "Alexandre Dulaunoy"
 __copyright__ = "Copyright 2012-2017, Alexandre Dulaunoy"
@ -49,7 +56,7 @@ class Extract:
        if a:
            x = str(a[0]).split("|")
            # why so many spaces?
-            x = map(lambda t: t.replace("\"", "").strip(), x)
+            x = list( map(lambda t: t.replace("\"", "").strip(), x) )
            return (x[0], x[2], a[0])
        else:
            return None
@ -82,9 +89,9 @@ class Extract:

    def __updatelisttld(self):
        ianatldlist = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt"
-        req = urllib2.Request(ianatldlist)
+        req = urllib.Request(ianatldlist)
        req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0')
-        tlds = urllib2.urlopen(req).read()
+        tlds = ( urllib.urlopen(req).read() ).decode('utf8')
        tlds = tlds.split("\n")
        for tld in tlds:
            self.listtld.append(tld.lower())
@ -274,7 +281,6 @@ class Extract:
        for dom in domains:
            if type(dom) == tuple:
                dom = dom[0]
-
            if includefilter.search(dom):
                    self.cleandomain.append(dom)

--- a/DomainClassifier/test.py
+++ b/DomainClassifier/test.py
@ -1,19 +1,19 @@
 import domainclassifier

-c = domainclassifier.Extract( rawtext = "www.xxx.com this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test www.facebook.com www.nic.ru www.youporn.com 8.8.8.8 201.1.1.1", nameservers = ['149.13.33.69'])
+c = domainclassifier.Extract( rawtext = "www.xxx.com this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test www.facebook.com www.nic.ru www.youporn.com 8.8.8.8 201.1.1.1", nameservers = ['8.8.8.8'])
 #print c.potentialdomain()
-print c.validdomain(extended=True)
-print "US:"
-print c.localizedomain(cc='US')
-print "LU:"
-print c.localizedomain(cc='LU')
-print "BE:"
-print c.localizedomain(cc='BE')
-print "Ranking:"
-print c.rankdomain()
-print "List of ip addresses:"
-print c.ipaddress(extended=True)
-print "Include dot.lu:"
-print c.include(expression=r'\.lu$')
-print "Exclude dot.lu:"
-print c.exclude(expression=r'\.lu$')
+print(c.validdomain(extended=True))
+print("US:")
+print(c.localizedomain(cc='US'))
+print("LU:")
+print(c.localizedomain(cc='LU'))
+print("BE:")
+print(c.localizedomain(cc='BE'))
+print("Ranking:")
+print(c.rankdomain())
+print("List of ip addresses:")
+print(c.ipaddress(extended=True))
+print("Include dot.lu:")
+print(c.include(expression=r'\.lu$'))
+print("Exclude dot.lu:")
+print(c.exclude(expression=r'\.lu$'))