From 13c4bf22da1fd37ec0fa8225b3b67babc792cd3f Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Fri, 14 Jun 2013 10:12:37 +0200 Subject: [PATCH] ipaddress() method added This method extracts valid IPv4 addresses from raw text. The validation is done using the standard socket call. The extended parameter adds the origin of the IP address via Cymru IP/ASN service. --- DomainClassifier/domainclassifier.py | 25 ++++++++++++++++++++++++- DomainClassifier/test.py | 6 +++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/DomainClassifier/domainclassifier.py b/DomainClassifier/domainclassifier.py index 27c7f54..06c4882 100644 --- a/DomainClassifier/domainclassifier.py +++ b/DomainClassifier/domainclassifier.py @@ -46,7 +46,7 @@ class Extract: x = str(a[0]).split("|") # why so many spaces? x = map (lambda t: t.replace("\"","").strip(), x) - return (x[0],x[2]) + return (x[0],x[2],a[0]) else: return None """__bgpanking return the ranking the float value of an ASN. @@ -114,6 +114,29 @@ class Extract: self.validdomain.append((domain,dnstype,answers[0])) return self.validdomain + """ipaddress method extracts from the domain list the valid IPv4 addresses""" + + def ipaddress(self, extended=False): + if extended is False: + self.ipaddresses = [] + else: + self.ipaddresses = set() + + for d in self.domain: + try: + socket.inet_aton(d) + except: + pass + else: + if extended is False: + self.ipaddresses.append((d)) + else: + orig = self.__origin(ipaddr=d) + print orig + self.ipaddresses.add((d,str(orig))) + + return self.ipaddresses + """localizedomain method use the validdomain list (in extended format) to localize per country code the associated resources. The cc argument specifies the country code in ISO 3166-1 alpha-2 format to check for.""" diff --git a/DomainClassifier/test.py b/DomainClassifier/test.py index 80f34a5..68e5a73 100644 --- a/DomainClassifier/test.py +++ b/DomainClassifier/test.py @@ -1,6 +1,6 @@ import domainclassifier -c = domainclassifier.Extract( rawtext = "www.xxx.com this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test www.facebook.com www.nic.ru www.youporn.com") +c = domainclassifier.Extract( rawtext = "www.xxx.com this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test www.facebook.com www.nic.ru www.youporn.com 8.8.8.8 201.1.1.1") print c.domain() print c.validdomain(extended=True) print "US:" @@ -11,5 +11,9 @@ print "BE:" print c.localizedomain(cc='BE') print "Ranking:" print c.rankdomain() +print "List of ip addresses:" +print c.ipaddress(extended=True) +print "Include dot.lu:" print c.include(expression=r'\.lu$') +print "Exclude dot.lu:" print c.exclude(expression=r'\.lu$')