mirror of
https://github.com/adulau/DomainClassifier.git
synced 2024-11-22 18:07:07 +00:00
commit
04a7a963a8
2 changed files with 27 additions and 21 deletions
|
@ -8,7 +8,14 @@ import re
|
||||||
import dns.resolver
|
import dns.resolver
|
||||||
import IPy
|
import IPy
|
||||||
import socket
|
import socket
|
||||||
import urllib2
|
|
||||||
|
try:
|
||||||
|
#python 3
|
||||||
|
import urllib.request as urllib
|
||||||
|
except:
|
||||||
|
#python 2
|
||||||
|
import urllib2 as urllib
|
||||||
|
|
||||||
|
|
||||||
__author__ = "Alexandre Dulaunoy"
|
__author__ = "Alexandre Dulaunoy"
|
||||||
__copyright__ = "Copyright 2012-2017, Alexandre Dulaunoy"
|
__copyright__ = "Copyright 2012-2017, Alexandre Dulaunoy"
|
||||||
|
@ -49,7 +56,7 @@ class Extract:
|
||||||
if a:
|
if a:
|
||||||
x = str(a[0]).split("|")
|
x = str(a[0]).split("|")
|
||||||
# why so many spaces?
|
# why so many spaces?
|
||||||
x = map(lambda t: t.replace("\"", "").strip(), x)
|
x = list( map(lambda t: t.replace("\"", "").strip(), x) )
|
||||||
return (x[0], x[2], a[0])
|
return (x[0], x[2], a[0])
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
@ -82,9 +89,9 @@ class Extract:
|
||||||
|
|
||||||
def __updatelisttld(self):
|
def __updatelisttld(self):
|
||||||
ianatldlist = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt"
|
ianatldlist = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt"
|
||||||
req = urllib2.Request(ianatldlist)
|
req = urllib.Request(ianatldlist)
|
||||||
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0')
|
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0')
|
||||||
tlds = urllib2.urlopen(req).read()
|
tlds = ( urllib.urlopen(req).read() ).decode('utf8')
|
||||||
tlds = tlds.split("\n")
|
tlds = tlds.split("\n")
|
||||||
for tld in tlds:
|
for tld in tlds:
|
||||||
self.listtld.append(tld.lower())
|
self.listtld.append(tld.lower())
|
||||||
|
@ -274,7 +281,6 @@ class Extract:
|
||||||
for dom in domains:
|
for dom in domains:
|
||||||
if type(dom) == tuple:
|
if type(dom) == tuple:
|
||||||
dom = dom[0]
|
dom = dom[0]
|
||||||
|
|
||||||
if includefilter.search(dom):
|
if includefilter.search(dom):
|
||||||
self.cleandomain.append(dom)
|
self.cleandomain.append(dom)
|
||||||
|
|
||||||
|
|
|
@ -1,19 +1,19 @@
|
||||||
import domainclassifier
|
import domainclassifier
|
||||||
|
|
||||||
c = domainclassifier.Extract( rawtext = "www.xxx.com this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test www.facebook.com www.nic.ru www.youporn.com 8.8.8.8 201.1.1.1", nameservers = ['149.13.33.69'])
|
c = domainclassifier.Extract( rawtext = "www.xxx.com this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test www.facebook.com www.nic.ru www.youporn.com 8.8.8.8 201.1.1.1", nameservers = ['8.8.8.8'])
|
||||||
#print c.potentialdomain()
|
#print c.potentialdomain()
|
||||||
print c.validdomain(extended=True)
|
print(c.validdomain(extended=True))
|
||||||
print "US:"
|
print("US:")
|
||||||
print c.localizedomain(cc='US')
|
print(c.localizedomain(cc='US'))
|
||||||
print "LU:"
|
print("LU:")
|
||||||
print c.localizedomain(cc='LU')
|
print(c.localizedomain(cc='LU'))
|
||||||
print "BE:"
|
print("BE:")
|
||||||
print c.localizedomain(cc='BE')
|
print(c.localizedomain(cc='BE'))
|
||||||
print "Ranking:"
|
print("Ranking:")
|
||||||
print c.rankdomain()
|
print(c.rankdomain())
|
||||||
print "List of ip addresses:"
|
print("List of ip addresses:")
|
||||||
print c.ipaddress(extended=True)
|
print(c.ipaddress(extended=True))
|
||||||
print "Include dot.lu:"
|
print("Include dot.lu:")
|
||||||
print c.include(expression=r'\.lu$')
|
print(c.include(expression=r'\.lu$'))
|
||||||
print "Exclude dot.lu:"
|
print("Exclude dot.lu:")
|
||||||
print c.exclude(expression=r'\.lu$')
|
print(c.exclude(expression=r'\.lu$'))
|
||||||
|
|
Loading…
Reference in a new issue