ipaddress() method added

This method extracts valid IPv4 addresses from raw text. The validation
is done using the standard socket call. The extended parameter adds the
origin of the IP address via Cymru IP/ASN service.
This commit is contained in:
Alexandre Dulaunoy 2013-06-14 10:12:37 +02:00
parent a3f87b5135
commit 13c4bf22da
2 changed files with 29 additions and 2 deletions

View file

@ -46,7 +46,7 @@ class Extract:
x = str(a[0]).split("|")
# why so many spaces?
x = map (lambda t: t.replace("\"","").strip(), x)
return (x[0],x[2])
return (x[0],x[2],a[0])
else:
return None
"""__bgpanking return the ranking the float value of an ASN.
@ -114,6 +114,29 @@ class Extract:
self.validdomain.append((domain,dnstype,answers[0]))
return self.validdomain
"""ipaddress method extracts from the domain list the valid IPv4 addresses"""
def ipaddress(self, extended=False):
if extended is False:
self.ipaddresses = []
else:
self.ipaddresses = set()
for d in self.domain:
try:
socket.inet_aton(d)
except:
pass
else:
if extended is False:
self.ipaddresses.append((d))
else:
orig = self.__origin(ipaddr=d)
print orig
self.ipaddresses.add((d,str(orig)))
return self.ipaddresses
"""localizedomain method use the validdomain list (in extended format) to
localize per country code the associated resources. The cc argument specifies the
country code in ISO 3166-1 alpha-2 format to check for."""

View file

@ -1,6 +1,6 @@
import domainclassifier
c = domainclassifier.Extract( rawtext = "www.xxx.com this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test www.facebook.com www.nic.ru www.youporn.com")
c = domainclassifier.Extract( rawtext = "www.xxx.com this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test www.facebook.com www.nic.ru www.youporn.com 8.8.8.8 201.1.1.1")
print c.domain()
print c.validdomain(extended=True)
print "US:"
@ -11,5 +11,9 @@ print "BE:"
print c.localizedomain(cc='BE')
print "Ranking:"
print c.rankdomain()
print "List of ip addresses:"
print c.ipaddress(extended=True)
print "Include dot.lu:"
print c.include(expression=r'\.lu$')
print "Exclude dot.lu:"
print c.exclude(expression=r'\.lu$')