diff --git a/DomainClassifier/domainclassifier.py b/DomainClassifier/domainclassifier.py index 27c7f54..06c4882 100644 --- a/DomainClassifier/domainclassifier.py +++ b/DomainClassifier/domainclassifier.py @@ -46,7 +46,7 @@ class Extract: x = str(a[0]).split("|") # why so many spaces? x = map (lambda t: t.replace("\"","").strip(), x) - return (x[0],x[2]) + return (x[0],x[2],a[0]) else: return None """__bgpanking return the ranking the float value of an ASN. @@ -114,6 +114,29 @@ class Extract: self.validdomain.append((domain,dnstype,answers[0])) return self.validdomain + """ipaddress method extracts from the domain list the valid IPv4 addresses""" + + def ipaddress(self, extended=False): + if extended is False: + self.ipaddresses = [] + else: + self.ipaddresses = set() + + for d in self.domain: + try: + socket.inet_aton(d) + except: + pass + else: + if extended is False: + self.ipaddresses.append((d)) + else: + orig = self.__origin(ipaddr=d) + print orig + self.ipaddresses.add((d,str(orig))) + + return self.ipaddresses + """localizedomain method use the validdomain list (in extended format) to localize per country code the associated resources. The cc argument specifies the country code in ISO 3166-1 alpha-2 format to check for.""" diff --git a/DomainClassifier/test.py b/DomainClassifier/test.py index 80f34a5..68e5a73 100644 --- a/DomainClassifier/test.py +++ b/DomainClassifier/test.py @@ -1,6 +1,6 @@ import domainclassifier -c = domainclassifier.Extract( rawtext = "www.xxx.com this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test www.facebook.com www.nic.ru www.youporn.com") +c = domainclassifier.Extract( rawtext = "www.xxx.com this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test www.facebook.com www.nic.ru www.youporn.com 8.8.8.8 201.1.1.1") print c.domain() print c.validdomain(extended=True) print "US:" @@ -11,5 +11,9 @@ print "BE:" print c.localizedomain(cc='BE') print "Ranking:" print c.rankdomain() +print "List of ip addresses:" +print c.ipaddress(extended=True) +print "Include dot.lu:" print c.include(expression=r'\.lu$') +print "Exclude dot.lu:" print c.exclude(expression=r'\.lu$')