Basic pydoc description added

This commit is contained in:
Alexandre Dulaunoy 2012-01-24 22:53:20 +01:00
parent b5bfadb368
commit 20c6c6a023
2 changed files with 27 additions and 1 deletions

View file

@ -15,12 +15,23 @@ __license__ = "AGPL version 3"
__version__ = "0.0.1" __version__ = "0.0.1"
class Extract: class Extract:
"""DomainClassifier Extract class is the base class for extracting domains
from a rawtext stream. When call, the rawtext parameter is a string
containing the raw data to be process."""
def __init__(self, rawtext = None): def __init__(self, rawtext = None):
self.rawtext = rawtext self.rawtext = rawtext
self.presolver = dns.resolver.Resolver() self.presolver = dns.resolver.Resolver()
self.presolver.nameservers = ['149.13.33.69'] self.presolver.nameservers = ['149.13.33.69']
"""__origin is a private function to the ASN lookup for an IP address via
the Team Cymru DNS interface. ipadd is a string contain the IP address in a
decimal form."""
def __origin(self, ipaddr=None): def __origin(self, ipaddr=None):
if ipaddr: if ipaddr:
@ -32,6 +43,11 @@ class Extract:
else: else:
return None return None
"""domain method extracts potential domains matching any
string that is a serie of string with maximun 63 character separated by a
dot. The method used the rawtext defined at the instantiation of the class.
This return a list of a potential domain."""
def domain(self): def domain(self):
self.domain = [] self.domain = []
domain = re.compile(r'\b([a-zA-Z\d-]{,63}(\.[a-zA-Z\d-]{,63})+)\b') domain = re.compile(r'\b([a-zA-Z\d-]{,63}(\.[a-zA-Z\d-]{,63})+)\b')
@ -41,6 +57,12 @@ class Extract:
return self.domain return self.domain
"""validdomain method used the extracted domains from the domain method to
generate a list of valid domain (at least existing in the authoritative DNS
server". The records type used are A, AAAA, SOA, MX and CNAME records. This
returns a list of existing domain. If the extended flag is true, a set is
return with the associated DNS resources found."""
def validdomain(self, rtype=['A','AAAA','SOA','MX','CNAME'], extended=True): def validdomain(self, rtype=['A','AAAA','SOA','MX','CNAME'], extended=True):
if extended is False: if extended is False:
self.validdomain = set() self.validdomain = set()
@ -59,6 +81,10 @@ class Extract:
self.validdomain.append((domain,dnstype,answers[0])) self.validdomain.append((domain,dnstype,answers[0]))
return self.validdomain return self.validdomain
"""localizedomain method use the validdomain list (in extended format) to
localize per country code the associated resources. The cc argument specifies the
country code in ISO 3166-1 alpha-2 format to check for."""
def localizedomain(self, cc=None): def localizedomain(self, cc=None):
self.localdom = [] self.localdom = []

View file

@ -3,7 +3,7 @@ import domainclassifier
c = domainclassifier.Extract( rawtext = "this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test") c = domainclassifier.Extract( rawtext = "this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test")
print c.domain() print c.domain()
print c.validdomain(extended=False) print c.validdomain(extended=True)
print "US:" print "US:"
print c.localizedomain(cc='US') print c.localizedomain(cc='US')
print "LU:" print "LU:"