mirror of
https://github.com/adulau/DomainClassifier.git
synced 2024-11-23 02:17:07 +00:00
Basic pydoc description added
This commit is contained in:
parent
b5bfadb368
commit
20c6c6a023
2 changed files with 27 additions and 1 deletions
|
@ -15,12 +15,23 @@ __license__ = "AGPL version 3"
|
||||||
__version__ = "0.0.1"
|
__version__ = "0.0.1"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Extract:
|
class Extract:
|
||||||
|
|
||||||
|
"""DomainClassifier Extract class is the base class for extracting domains
|
||||||
|
from a rawtext stream. When call, the rawtext parameter is a string
|
||||||
|
containing the raw data to be process."""
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, rawtext = None):
|
def __init__(self, rawtext = None):
|
||||||
self.rawtext = rawtext
|
self.rawtext = rawtext
|
||||||
self.presolver = dns.resolver.Resolver()
|
self.presolver = dns.resolver.Resolver()
|
||||||
self.presolver.nameservers = ['149.13.33.69']
|
self.presolver.nameservers = ['149.13.33.69']
|
||||||
|
|
||||||
|
"""__origin is a private function to the ASN lookup for an IP address via
|
||||||
|
the Team Cymru DNS interface. ipadd is a string contain the IP address in a
|
||||||
|
decimal form."""
|
||||||
|
|
||||||
def __origin(self, ipaddr=None):
|
def __origin(self, ipaddr=None):
|
||||||
|
|
||||||
if ipaddr:
|
if ipaddr:
|
||||||
|
@ -32,6 +43,11 @@ class Extract:
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
"""domain method extracts potential domains matching any
|
||||||
|
string that is a serie of string with maximun 63 character separated by a
|
||||||
|
dot. The method used the rawtext defined at the instantiation of the class.
|
||||||
|
This return a list of a potential domain."""
|
||||||
|
|
||||||
def domain(self):
|
def domain(self):
|
||||||
self.domain = []
|
self.domain = []
|
||||||
domain = re.compile(r'\b([a-zA-Z\d-]{,63}(\.[a-zA-Z\d-]{,63})+)\b')
|
domain = re.compile(r'\b([a-zA-Z\d-]{,63}(\.[a-zA-Z\d-]{,63})+)\b')
|
||||||
|
@ -41,6 +57,12 @@ class Extract:
|
||||||
|
|
||||||
return self.domain
|
return self.domain
|
||||||
|
|
||||||
|
"""validdomain method used the extracted domains from the domain method to
|
||||||
|
generate a list of valid domain (at least existing in the authoritative DNS
|
||||||
|
server". The records type used are A, AAAA, SOA, MX and CNAME records. This
|
||||||
|
returns a list of existing domain. If the extended flag is true, a set is
|
||||||
|
return with the associated DNS resources found."""
|
||||||
|
|
||||||
def validdomain(self, rtype=['A','AAAA','SOA','MX','CNAME'], extended=True):
|
def validdomain(self, rtype=['A','AAAA','SOA','MX','CNAME'], extended=True):
|
||||||
if extended is False:
|
if extended is False:
|
||||||
self.validdomain = set()
|
self.validdomain = set()
|
||||||
|
@ -59,6 +81,10 @@ class Extract:
|
||||||
self.validdomain.append((domain,dnstype,answers[0]))
|
self.validdomain.append((domain,dnstype,answers[0]))
|
||||||
return self.validdomain
|
return self.validdomain
|
||||||
|
|
||||||
|
"""localizedomain method use the validdomain list (in extended format) to
|
||||||
|
localize per country code the associated resources. The cc argument specifies the
|
||||||
|
country code in ISO 3166-1 alpha-2 format to check for."""
|
||||||
|
|
||||||
def localizedomain(self, cc=None):
|
def localizedomain(self, cc=None):
|
||||||
self.localdom = []
|
self.localdom = []
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ import domainclassifier
|
||||||
c = domainclassifier.Extract( rawtext = "this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test")
|
c = domainclassifier.Extract( rawtext = "this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test")
|
||||||
|
|
||||||
print c.domain()
|
print c.domain()
|
||||||
print c.validdomain(extended=False)
|
print c.validdomain(extended=True)
|
||||||
print "US:"
|
print "US:"
|
||||||
print c.localizedomain(cc='US')
|
print c.localizedomain(cc='US')
|
||||||
print "LU:"
|
print "LU:"
|
||||||
|
|
Loading…
Reference in a new issue