From 542c53dcca13de660a4bc49572f1897e6bf9146c Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Tue, 24 Jan 2012 11:47:04 +0100 Subject: [PATCH] DomainClassifier Class extended The class has been extended to add the localizedomain method to geolocalize DNS records associated for an existing domain. The localization rely on the Team Cymru ip2asn lookup via DNS. --- DomainClassifier/domainclassifier.py | 55 ++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/DomainClassifier/domainclassifier.py b/DomainClassifier/domainclassifier.py index 7ff349e..d874e74 100644 --- a/DomainClassifier/domainclassifier.py +++ b/DomainClassifier/domainclassifier.py @@ -1,9 +1,37 @@ +#!/usr/bin/env python +"""DomainClassifier is a simple Python library to extract and classify Internet +domains from raw text files following their existence, localization or +attributes. +""" + import re import dns.resolver +import IPy +import socket + +__author__ = "Alexandre Dulaunoy" +__copyright__ = "Copyright 2012, Alexandre Dulaunoy" +__license__ = "AGPL version 3" +__version__ = "0.0.1" + class Extract: def __init__(self, rawtext = None): self.rawtext = rawtext + self.presolver = dns.resolver.Resolver() + self.presolver.nameservers = ['149.13.33.69'] + + def __origin(self, ipaddr=None): + + if ipaddr: + clook = IPy.IP(str(ipaddr)).reverseName().replace('.in-addr.arpa.','.origin.asn.cymru.com') + a = self.presolver.query(clook, 'TXT') + if a: + x = str(a[0]).split("|") + return x[2].strip() + else: + return None + def domain(self): self.domain = [] domain = re.compile(r'\b([a-zA-Z\d-]{,63}(\.[a-zA-Z\d-]{,63})+)\b') @@ -13,21 +41,40 @@ class Extract: return self.domain - def validdomain(self, rtype=['A','AAAA','SOA','MX','CNAME'], extended=None): - if extended is None: + def validdomain(self, rtype=['A','AAAA','SOA','MX','CNAME'], extended=True): + if extended is False: self.validdomain = set() else: self.validdomain = [] for domain in self.domain: for dnstype in rtype: try: - answers = dns.resolver.query(domain, dnstype) + answers = self.presolver.query(domain, dnstype) except: pass else: - if extended is None: + if extended is False: self.validdomain.add((domain)) else: self.validdomain.append((domain,dnstype,answers[0])) return self.validdomain + def localizedomain(self, cc=None): + self.localdom = [] + + for dom in self.validdomain: + if dom[1] == 'A': + ip = dom[2] + orig = self.__origin(ipaddr=dom[2]) + if(orig == cc): self.localdom.append(dom) + elif dom[1] == 'CNAME': + cname = str(dom[2]) + ip = socket.gethostbyname(cname) + orig = self.__origin(ipaddr=ip) + if(orig == cc): self.localdom.append(dom) + return self.localdom + + def filterdomain(self,filter=None): + pass + +