mirror of
https://github.com/adulau/DomainClassifier.git
synced 2024-12-23 00:55:58 +00:00
Domain ranking added to rank your domain
This commit is contained in:
parent
b46192f235
commit
5417fe4911
2 changed files with 54 additions and 5 deletions
|
@ -12,7 +12,7 @@ import socket
|
|||
__author__ = "Alexandre Dulaunoy"
|
||||
__copyright__ = "Copyright 2012, Alexandre Dulaunoy"
|
||||
__license__ = "AGPL version 3"
|
||||
__version__ = "0.0.2"
|
||||
__version__ = "0.0.3"
|
||||
|
||||
|
||||
|
||||
|
@ -27,6 +27,7 @@ class Extract:
|
|||
self.rawtext = rawtext
|
||||
self.presolver = dns.resolver.Resolver()
|
||||
self.presolver.nameservers = ['149.13.33.69']
|
||||
self.bgprankingserver = 'pdns.circl.lu'
|
||||
self.vdomain = []
|
||||
|
||||
"""__origin is a private function to the ASN lookup for an IP address via
|
||||
|
@ -40,10 +41,30 @@ class Extract:
|
|||
a = self.presolver.query(clook, 'TXT')
|
||||
if a:
|
||||
x = str(a[0]).split("|")
|
||||
x = map (lambda t: t.strip(), x)
|
||||
# why so many spaces?
|
||||
x = map (lambda t: t.replace("\"","").strip(), x)
|
||||
return (x[0],x[2])
|
||||
else:
|
||||
return None
|
||||
"""__bgpanking return the ranking the float value of an ASN.
|
||||
"""
|
||||
def __bgpranking(self, asn=None):
|
||||
if asn:
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
s.connect((self.bgprankingserver,43))
|
||||
s.send(asn+"\r\n")
|
||||
r = ''
|
||||
while True:
|
||||
d = s.recv(2048)
|
||||
r = r + d
|
||||
if d == '':
|
||||
break
|
||||
s.close()
|
||||
if len(r) > 0:
|
||||
rank = r.split("\n")[1].split(",")[1]
|
||||
return float(rank)
|
||||
else:
|
||||
return None
|
||||
|
||||
"""domain method extracts potential domains matching any
|
||||
string that is a serie of string with maximun 63 character separated by a
|
||||
|
@ -104,6 +125,34 @@ class Extract:
|
|||
if(orig == cc): self.localdom.append(dom)
|
||||
return self.localdom
|
||||
|
||||
"""rankdomain method use the validdomain list (in extended format to rank
|
||||
each domain with an IP address. Return a sorted list of tuples (ranking,
|
||||
domain).
|
||||
"""
|
||||
|
||||
def rankdomain(self):
|
||||
self.rankdom = []
|
||||
|
||||
if self.validdomain:
|
||||
for dom in self.validdomain:
|
||||
rank = None
|
||||
if dom[1] == 'A':
|
||||
ip = dom[2]
|
||||
asn = self.__origin(ipaddr=dom[2])[0]
|
||||
rank = self.__bgpranking(asn)
|
||||
t = (rank, dom[0])
|
||||
self.rankdom.append(t)
|
||||
elif dom[1] == 'CNAME':
|
||||
cname = str(dom[2])
|
||||
ip = socket.gethostbyname(cname)
|
||||
asn = self.__origin(ipaddr=ip)[0]
|
||||
rank = self.__bgpranking(asn)
|
||||
t = (rank, dom[0])
|
||||
self.rankdom.append(t)
|
||||
return sorted(self.rankdom, key=lambda d: d[0])
|
||||
|
||||
|
||||
|
||||
"""exclude domains from a regular expression. If validdomain was called,
|
||||
it's only on the valid domain list."""
|
||||
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import domainclassifier
|
||||
|
||||
c = domainclassifier.Extract( rawtext = "this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test")
|
||||
|
||||
c = domainclassifier.Extract( rawtext = "www.xxx.com this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test www.facebook.com www.nic.ru www.youporn.com")
|
||||
print c.domain()
|
||||
print c.validdomain(extended=True)
|
||||
print "US:"
|
||||
|
@ -10,6 +9,7 @@ print "LU:"
|
|||
print c.localizedomain(cc='LU')
|
||||
print "BE:"
|
||||
print c.localizedomain(cc='BE')
|
||||
|
||||
print "Ranking:"
|
||||
print c.rankdomain()
|
||||
print c.include(expression=r'\.lu$')
|
||||
print c.exclude(expression=r'\.lu$')
|
||||
|
|
Loading…
Reference in a new issue