mirror of
https://github.com/adulau/DomainClassifier.git
synced 2024-11-23 02:17:07 +00:00
Domain ranking added to rank your domain
This commit is contained in:
parent
b46192f235
commit
5417fe4911
2 changed files with 54 additions and 5 deletions
|
@ -12,7 +12,7 @@ import socket
|
||||||
__author__ = "Alexandre Dulaunoy"
|
__author__ = "Alexandre Dulaunoy"
|
||||||
__copyright__ = "Copyright 2012, Alexandre Dulaunoy"
|
__copyright__ = "Copyright 2012, Alexandre Dulaunoy"
|
||||||
__license__ = "AGPL version 3"
|
__license__ = "AGPL version 3"
|
||||||
__version__ = "0.0.2"
|
__version__ = "0.0.3"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -27,6 +27,7 @@ class Extract:
|
||||||
self.rawtext = rawtext
|
self.rawtext = rawtext
|
||||||
self.presolver = dns.resolver.Resolver()
|
self.presolver = dns.resolver.Resolver()
|
||||||
self.presolver.nameservers = ['149.13.33.69']
|
self.presolver.nameservers = ['149.13.33.69']
|
||||||
|
self.bgprankingserver = 'pdns.circl.lu'
|
||||||
self.vdomain = []
|
self.vdomain = []
|
||||||
|
|
||||||
"""__origin is a private function to the ASN lookup for an IP address via
|
"""__origin is a private function to the ASN lookup for an IP address via
|
||||||
|
@ -40,10 +41,30 @@ class Extract:
|
||||||
a = self.presolver.query(clook, 'TXT')
|
a = self.presolver.query(clook, 'TXT')
|
||||||
if a:
|
if a:
|
||||||
x = str(a[0]).split("|")
|
x = str(a[0]).split("|")
|
||||||
x = map (lambda t: t.strip(), x)
|
# why so many spaces?
|
||||||
|
x = map (lambda t: t.replace("\"","").strip(), x)
|
||||||
return (x[0],x[2])
|
return (x[0],x[2])
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
"""__bgpanking return the ranking the float value of an ASN.
|
||||||
|
"""
|
||||||
|
def __bgpranking(self, asn=None):
|
||||||
|
if asn:
|
||||||
|
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
s.connect((self.bgprankingserver,43))
|
||||||
|
s.send(asn+"\r\n")
|
||||||
|
r = ''
|
||||||
|
while True:
|
||||||
|
d = s.recv(2048)
|
||||||
|
r = r + d
|
||||||
|
if d == '':
|
||||||
|
break
|
||||||
|
s.close()
|
||||||
|
if len(r) > 0:
|
||||||
|
rank = r.split("\n")[1].split(",")[1]
|
||||||
|
return float(rank)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
"""domain method extracts potential domains matching any
|
"""domain method extracts potential domains matching any
|
||||||
string that is a serie of string with maximun 63 character separated by a
|
string that is a serie of string with maximun 63 character separated by a
|
||||||
|
@ -104,6 +125,34 @@ class Extract:
|
||||||
if(orig == cc): self.localdom.append(dom)
|
if(orig == cc): self.localdom.append(dom)
|
||||||
return self.localdom
|
return self.localdom
|
||||||
|
|
||||||
|
"""rankdomain method use the validdomain list (in extended format to rank
|
||||||
|
each domain with an IP address. Return a sorted list of tuples (ranking,
|
||||||
|
domain).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def rankdomain(self):
|
||||||
|
self.rankdom = []
|
||||||
|
|
||||||
|
if self.validdomain:
|
||||||
|
for dom in self.validdomain:
|
||||||
|
rank = None
|
||||||
|
if dom[1] == 'A':
|
||||||
|
ip = dom[2]
|
||||||
|
asn = self.__origin(ipaddr=dom[2])[0]
|
||||||
|
rank = self.__bgpranking(asn)
|
||||||
|
t = (rank, dom[0])
|
||||||
|
self.rankdom.append(t)
|
||||||
|
elif dom[1] == 'CNAME':
|
||||||
|
cname = str(dom[2])
|
||||||
|
ip = socket.gethostbyname(cname)
|
||||||
|
asn = self.__origin(ipaddr=ip)[0]
|
||||||
|
rank = self.__bgpranking(asn)
|
||||||
|
t = (rank, dom[0])
|
||||||
|
self.rankdom.append(t)
|
||||||
|
return sorted(self.rankdom, key=lambda d: d[0])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""exclude domains from a regular expression. If validdomain was called,
|
"""exclude domains from a regular expression. If validdomain was called,
|
||||||
it's only on the valid domain list."""
|
it's only on the valid domain list."""
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
import domainclassifier
|
import domainclassifier
|
||||||
|
|
||||||
c = domainclassifier.Extract( rawtext = "this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test")
|
c = domainclassifier.Extract( rawtext = "www.xxx.com this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test www.facebook.com www.nic.ru www.youporn.com")
|
||||||
|
|
||||||
print c.domain()
|
print c.domain()
|
||||||
print c.validdomain(extended=True)
|
print c.validdomain(extended=True)
|
||||||
print "US:"
|
print "US:"
|
||||||
|
@ -10,6 +9,7 @@ print "LU:"
|
||||||
print c.localizedomain(cc='LU')
|
print c.localizedomain(cc='LU')
|
||||||
print "BE:"
|
print "BE:"
|
||||||
print c.localizedomain(cc='BE')
|
print c.localizedomain(cc='BE')
|
||||||
|
print "Ranking:"
|
||||||
|
print c.rankdomain()
|
||||||
print c.include(expression=r'\.lu$')
|
print c.include(expression=r'\.lu$')
|
||||||
print c.exclude(expression=r'\.lu$')
|
print c.exclude(expression=r'\.lu$')
|
||||||
|
|
Loading…
Reference in a new issue