From 20c6c6a023750e0537d4dbb854b33c26af5db11f Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Tue, 24 Jan 2012 22:53:20 +0100 Subject: [PATCH] Basic pydoc description added --- DomainClassifier/domainclassifier.py | 26 ++++++++++++++++++++++++++ DomainClassifier/test.py | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/DomainClassifier/domainclassifier.py b/DomainClassifier/domainclassifier.py index d874e74..fe635e0 100644 --- a/DomainClassifier/domainclassifier.py +++ b/DomainClassifier/domainclassifier.py @@ -15,12 +15,23 @@ __license__ = "AGPL version 3" __version__ = "0.0.1" + class Extract: + + """DomainClassifier Extract class is the base class for extracting domains + from a rawtext stream. When call, the rawtext parameter is a string + containing the raw data to be process.""" + + def __init__(self, rawtext = None): self.rawtext = rawtext self.presolver = dns.resolver.Resolver() self.presolver.nameservers = ['149.13.33.69'] + """__origin is a private function to the ASN lookup for an IP address via + the Team Cymru DNS interface. ipadd is a string contain the IP address in a + decimal form.""" + def __origin(self, ipaddr=None): if ipaddr: @@ -32,6 +43,11 @@ class Extract: else: return None + """domain method extracts potential domains matching any + string that is a serie of string with maximun 63 character separated by a + dot. The method used the rawtext defined at the instantiation of the class. + This return a list of a potential domain.""" + def domain(self): self.domain = [] domain = re.compile(r'\b([a-zA-Z\d-]{,63}(\.[a-zA-Z\d-]{,63})+)\b') @@ -41,6 +57,12 @@ class Extract: return self.domain + """validdomain method used the extracted domains from the domain method to + generate a list of valid domain (at least existing in the authoritative DNS + server". The records type used are A, AAAA, SOA, MX and CNAME records. This + returns a list of existing domain. If the extended flag is true, a set is + return with the associated DNS resources found.""" + def validdomain(self, rtype=['A','AAAA','SOA','MX','CNAME'], extended=True): if extended is False: self.validdomain = set() @@ -59,6 +81,10 @@ class Extract: self.validdomain.append((domain,dnstype,answers[0])) return self.validdomain + """localizedomain method use the validdomain list (in extended format) to + localize per country code the associated resources. The cc argument specifies the + country code in ISO 3166-1 alpha-2 format to check for.""" + def localizedomain(self, cc=None): self.localdom = [] diff --git a/DomainClassifier/test.py b/DomainClassifier/test.py index e802349..f89b023 100644 --- a/DomainClassifier/test.py +++ b/DomainClassifier/test.py @@ -3,7 +3,7 @@ import domainclassifier c = domainclassifier.Extract( rawtext = "this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test") print c.domain() -print c.validdomain(extended=False) +print c.validdomain(extended=True) print "US:" print c.localizedomain(cc='US') print "LU:"