mirror of
https://github.com/adulau/DomainClassifier.git
synced 2024-11-23 02:17:07 +00:00
ipaddress() method added
This method extracts valid IPv4 addresses from raw text. The validation is done using the standard socket call. The extended parameter adds the origin of the IP address via Cymru IP/ASN service.
This commit is contained in:
parent
a3f87b5135
commit
13c4bf22da
2 changed files with 29 additions and 2 deletions
|
@ -46,7 +46,7 @@ class Extract:
|
||||||
x = str(a[0]).split("|")
|
x = str(a[0]).split("|")
|
||||||
# why so many spaces?
|
# why so many spaces?
|
||||||
x = map (lambda t: t.replace("\"","").strip(), x)
|
x = map (lambda t: t.replace("\"","").strip(), x)
|
||||||
return (x[0],x[2])
|
return (x[0],x[2],a[0])
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
"""__bgpanking return the ranking the float value of an ASN.
|
"""__bgpanking return the ranking the float value of an ASN.
|
||||||
|
@ -114,6 +114,29 @@ class Extract:
|
||||||
self.validdomain.append((domain,dnstype,answers[0]))
|
self.validdomain.append((domain,dnstype,answers[0]))
|
||||||
return self.validdomain
|
return self.validdomain
|
||||||
|
|
||||||
|
"""ipaddress method extracts from the domain list the valid IPv4 addresses"""
|
||||||
|
|
||||||
|
def ipaddress(self, extended=False):
|
||||||
|
if extended is False:
|
||||||
|
self.ipaddresses = []
|
||||||
|
else:
|
||||||
|
self.ipaddresses = set()
|
||||||
|
|
||||||
|
for d in self.domain:
|
||||||
|
try:
|
||||||
|
socket.inet_aton(d)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
if extended is False:
|
||||||
|
self.ipaddresses.append((d))
|
||||||
|
else:
|
||||||
|
orig = self.__origin(ipaddr=d)
|
||||||
|
print orig
|
||||||
|
self.ipaddresses.add((d,str(orig)))
|
||||||
|
|
||||||
|
return self.ipaddresses
|
||||||
|
|
||||||
"""localizedomain method use the validdomain list (in extended format) to
|
"""localizedomain method use the validdomain list (in extended format) to
|
||||||
localize per country code the associated resources. The cc argument specifies the
|
localize per country code the associated resources. The cc argument specifies the
|
||||||
country code in ISO 3166-1 alpha-2 format to check for."""
|
country code in ISO 3166-1 alpha-2 format to check for."""
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import domainclassifier
|
import domainclassifier
|
||||||
|
|
||||||
c = domainclassifier.Extract( rawtext = "www.xxx.com this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test www.facebook.com www.nic.ru www.youporn.com")
|
c = domainclassifier.Extract( rawtext = "www.xxx.com this is a text with a domain called test@foo.lu another test abc.lu something a.b.c.d.e end of 1.2.3.4 foo.be www.belnet.be http://www.cert.be/ www.public.lu www.allo.lu quuxtest www.eurodns.com something-broken-www.google.com www.google.lu trailing test www.facebook.com www.nic.ru www.youporn.com 8.8.8.8 201.1.1.1")
|
||||||
print c.domain()
|
print c.domain()
|
||||||
print c.validdomain(extended=True)
|
print c.validdomain(extended=True)
|
||||||
print "US:"
|
print "US:"
|
||||||
|
@ -11,5 +11,9 @@ print "BE:"
|
||||||
print c.localizedomain(cc='BE')
|
print c.localizedomain(cc='BE')
|
||||||
print "Ranking:"
|
print "Ranking:"
|
||||||
print c.rankdomain()
|
print c.rankdomain()
|
||||||
|
print "List of ip addresses:"
|
||||||
|
print c.ipaddress(extended=True)
|
||||||
|
print "Include dot.lu:"
|
||||||
print c.include(expression=r'\.lu$')
|
print c.include(expression=r'\.lu$')
|
||||||
|
print "Exclude dot.lu:"
|
||||||
print c.exclude(expression=r'\.lu$')
|
print c.exclude(expression=r'\.lu$')
|
||||||
|
|
Loading…
Reference in a new issue