From c620f200533ffe38e91488da778c5b42b0a827ae Mon Sep 17 00:00:00 2001 From: aaronkaplan Date: Thu, 29 Apr 2021 11:58:29 +0200 Subject: [PATCH 1/7] initial JSON schema proposal. There are a couple of ways to express it though. --- schema/schema.json | 182 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 schema/schema.json diff --git a/schema/schema.json b/schema/schema.json new file mode 100644 index 0000000..f7c8877 --- /dev/null +++ b/schema/schema.json @@ -0,0 +1,182 @@ +{ + "$id": "https://github.com/adulau/pdns-qof/blob/master/schema/schema.json", + "$schema": "http://json-schema.org/draft-07/schema", + "default": {}, + "description": "This schema helps to validate JSON documents against the passive DNS Common Output Format. See https://www.first.org/global/sigs/passive-dns/", + "examples": [ + { + "count": 962, + "time_first": 1522998917, + "time_last": 1619613241, + "rrname": "westernunion.com.ph.", + "rrtype": "A", + "bailiwick": "westernunion.com.ph.", + "rdata": [ + "66.218.161.27", + "66.218.170.77" + ] + } + ], + "required": [ + "time_first", + "time_last", + "rrname", + "rrtype", + "rdata" + ], + "title": "The passive DNS Common Output Format (COF) schema", + "type": "object", + "properties": { + "count": { + "$id": "#/properties/count", + "default": 0, + "description": "Specifies how many authoritative DNS answers were received at the Passive DNS Server's collectors with exactly the given set of values as answers (i.e. same data in the answer set). The number of requests is expressed as a decimal value. This field is represented as a JSON [RFC4627] number.", + "examples": [ + 962 + ], + "title": "count of authoritative answers", + "type": "number" + }, + "time_first": { + "$id": "#/properties/time_first", + "default": 0, + "description": "This field returns the first time that the record / unique tuple (rrname, rrtype, rdata) has been seen by the passive DNS. The date is expressed in seconds (decimal) since 1st of January 1970 (Unix timestamp). The time zone MUST be UTC. This field is represented as a JSON [RFC4627] number.", + "examples": [ + 1522998917 + ], + "title": "first seen", + "type": "number" + }, + "time_last": { + "$id": "#/properties/time_last", + "default": 0, + "description": "This field returns the last time that the unique tuple (rrname, rrtype, rdata) record has been seen by the passive DNS. The date is expressed in seconds (decimal) since 1st of January 1970 (Unix timestamp). The time zone MUST be UTC. This field is represented as a JSON [RFC4627] number.", + "examples": [ + 1619613241 + ], + "title": "last seen", + "type": "number" + }, + "rrname": { + "$id": "#/properties/rrname", + "default": "", + "description": "This field returns the name of the queried resource.", + "examples": [ + "westernunion.com.ph." + ], + "title": "rrname", + "type": "string" + }, + "rrtype": { + "$id": "#/properties/rrtype", + "default": "", + "description": "This field returns the resource record type as seen by the passive DNS. The key is rrtype and the value is in the interpreted record type represented as a JSON [RFC4627] string. If the value cannot be interpreted the decimal value is returned following the principle of transparency as described in RFC 3597 [RFC3597]. Then the decimal value is represented as a JSON [RFC4627] number. Currently known and supported textual descriptions of rrtypes are: A, AAAA, CNAME, PTR, SOA, TXT, DNAME, NS, SRV, RP, NAPTR, HINFO, A6.", + "examples": [ + "A" + ], + "title": "rrtype", + "type": "string" + }, + "bailiwick": { + "$id": "#/properties/bailiwick", + "default": "", + "description": "The bailiwick is the best estimate of the apex of the zone where this data is authoritative.", + "examples": [ + "google.com." + ], + "title": "bailiwick", + "type": "string" + }, + "rdata": { + "$id": "#/properties/rdata", + "default": [], + "description": "This field returns the resource records of the queried resource. When multiple resource records are returned, rdata MUST be a JSON array containing JSON strings. In the case of a single resource record is returned, rdata MUST be a JSON string or a JSON array containing one JSON string. Each resource record is represented as a JSON string", + "examples": [ + [ + "8.8.8.8", + "9.9.9.9" + ] + ], + "title": "The rdata schema", + "type": "array", + "additionalItems": true, + "items": { + "$id": "#/properties/rdata/items", + "anyOf": [ + { + "$id": "#/properties/rdata/items/anyOf/0", + "type": "string", + "title": "The first anyOf schema", + "description": "An explanation about the purpose of this instance.", + "default": "", + "examples": [ + "66.218.161.27", + "66.218.170.77" + ] + } + ] + } + }, + "sensor_id": { + "$id": "#/properties/sensor_id", + "default": 0, + "description": "This field returns the sensor information where the record was seen. It is represented as a JSON [RFC4627] string.", + "examples": [ + "42" + ], + "title": "sensor_id", + "type": "string" + }, + "zone_time_first": { + "$id": "#/properties/zone_time_first", + "default": 0, + "description": "This field returns the first time that the unique tuple (rrname, rrtype, rdata) record has been seen via master file import. The date is expressed in seconds (decimal) since 1st of January 1970 (Unix timestamp). The time zone MUST be UTC. This field is represented as a JSON [RFC4627] number.", + "examples": [ + 1522998917 + ], + "title": "zone time first seen", + "type": "number" + }, + "zone_time_last": { + "$id": "#/properties/zone_time_last", + "default": 0, + "description": "This field returns the last time that the unique tuple (rrname, rrtype, rdata) record has been seen via master file import. The date is expressed in seconds (decimal) since 1st of January 1970 (Unix timestamp). The time zone MUST be UTC. This field is represented as a JSON [RFC4627] number.", + "examples": [ + 1619613241 + ], + "title": "zone time last seen", + "type": "number" + }, + "time_first_ms": { + "$id": "#/properties/time_first_ms", + "default": 0, + "description": "Same meaning as the field 'time_first', with the only difference, that the resolution is in milliseconds since 1st of January 1970 (UTC).", + "examples": [ + 1619690051001 + ], + "title": "time first seen (millisec)", + "type": "number" + }, + "time_last_ms": { + "$id": "#/properties/time_last_ms", + "default": 0, + "description": "Same meaning as the field 'time_last', with the only difference, that the resolution is in milliseconds since 1st of January 1970 (UTC).", + "examples": [ + 1619690211002 + ], + "title": "time last seen (millisec)", + "type": "number" + }, + "origin": { + "$id": "#/properties/origin", + "default": "", + "description": "Specifies the resource origin of the Passive DNS response. This field is represented as a Uniform Resource Identifier [RFC3986] (URI).", + "examples": [ + "http://circl.lu" + ], + "title": "sensor_id", + "type": "string" + } + }, + "additionalProperties": true +} From 7f638b0f15d11c1b68947e1f46831c5a3434a4af Mon Sep 17 00:00:00 2001 From: aaronkaplan Date: Thu, 29 Apr 2021 11:59:06 +0200 Subject: [PATCH 2/7] update CHANGELOG --- CHANGELOG | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 0385b71..657ef99 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,11 @@ # Version 0.8 +## Content changes + * added time_first_ms, time_last_ms + * clarified that time_{first,last} OR zone_time_{first,last} can be specified. + +## Other changes + * Added JSON schema * Started tracking in a CHANGELOG file * Change Aaron's Address * Address and find a compromise for issue #17 From c06cad0aa278ea2daa12dc26b42e3c0340e14129 Mon Sep 17 00:00:00 2001 From: aaronkaplan Date: Thu, 29 Apr 2021 11:59:48 +0200 Subject: [PATCH 3/7] fix issue #24 --- i-d/pdns-qof.txt | 728 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 728 insertions(+) create mode 100644 i-d/pdns-qof.txt diff --git a/i-d/pdns-qof.txt b/i-d/pdns-qof.txt new file mode 100644 index 0000000..f5c9203 --- /dev/null +++ b/i-d/pdns-qof.txt @@ -0,0 +1,728 @@ + + + + +Domain Name System Operations A. Dulaunoy +Internet-Draft CIRCL +Intended status: Informational A. Kaplan +Expires: December 3, 2020 + P. Vixie + H. Stern + Farsight Security, Inc. + June 1, 2020 + + + Passive DNS - Common Output Format + draft-dulaunoy-dnsop-passive-dns-cof-08 + +Abstract + + This document describes a common output format of Passive DNS Servers + which clients can query. The output format description includes also + in addition a common semantic for each Passive DNS system. By having + multiple Passive DNS Systems adhere to the same output format for + queries, users of multiple Passive DNS servers will be able to + combine result sets easily. + +Status of This Memo + + This Internet-Draft is submitted in full conformance with the + provisions of BCP 78 and BCP 79. + + Internet-Drafts are working documents of the Internet Engineering + Task Force (IETF). Note that other groups may also distribute + working documents as Internet-Drafts. The list of current Internet- + Drafts is at https://datatracker.ietf.org/drafts/current/. + + Internet-Drafts are draft documents valid for a maximum of six months + and may be updated, replaced, or obsoleted by other documents at any + time. It is inappropriate to use Internet-Drafts as reference + material or to cite them other than as "work in progress." + + This Internet-Draft will expire on December 3, 2020. + +Copyright Notice + + Copyright (c) 2020 IETF Trust and the persons identified as the + document authors. All rights reserved. + + This document is subject to BCP 78 and the IETF Trust's Legal + Provisions Relating to IETF Documents + (https://trustee.ietf.org/license-info) in effect on the date of + publication of this document. Please review these documents + + + +Dulaunoy, et al. Expires December 3, 2020 [Page 1] + +Internet-Draft Passive DNS - Common Output Format June 2020 + + + carefully, as they describe your rights and restrictions with respect + to this document. Code Components extracted from this document must + include Simplified BSD License text as described in Section 4.e of + the Trust Legal Provisions and are provided without warranty as + described in the Simplified BSD License. + +Table of Contents + + 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 3 + 1.1. Requirements Language . . . . . . . . . . . . . . . . . . 3 + 2. Limitation . . . . . . . . . . . . . . . . . . . . . . . . . 3 + 3. Common Output Format . . . . . . . . . . . . . . . . . . . . 4 + 3.1. Overview . . . . . . . . . . . . . . . . . . . . . . . . 4 + 3.2. ABNF grammar . . . . . . . . . . . . . . . . . . . . . . 4 + 3.3. Mandatory Fields . . . . . . . . . . . . . . . . . . . . 5 + 3.3.1. rrname . . . . . . . . . . . . . . . . . . . . . . . 5 + 3.3.2. rrtype . . . . . . . . . . . . . . . . . . . . . . . 5 + 3.3.3. rdata . . . . . . . . . . . . . . . . . . . . . . . . 5 + 3.3.4. time_first . . . . . . . . . . . . . . . . . . . . . 6 + 3.3.5. time_last . . . . . . . . . . . . . . . . . . . . . . 6 + 3.4. Optional Fields . . . . . . . . . . . . . . . . . . . . . 6 + 3.4.1. count . . . . . . . . . . . . . . . . . . . . . . . . 6 + 3.4.2. bailiwick . . . . . . . . . . . . . . . . . . . . . . 6 + 3.5. Additional Fields . . . . . . . . . . . . . . . . . . . . 6 + 3.5.1. sensor_id . . . . . . . . . . . . . . . . . . . . . . 6 + 3.5.2. zone_time_first . . . . . . . . . . . . . . . . . . . 7 + 3.5.3. zone_time_last . . . . . . . . . . . . . . . . . . . 7 + 3.5.4. origin . . . . . . . . . . . . . . . . . . . . . . . 7 + 3.5.5. time_first_ms . . . . . . . . . . . . . . . . . . . . 7 + 3.5.6. time_last_ms . . . . . . . . . . . . . . . . . . . . 7 + 3.6. Additional Fields Registry . . . . . . . . . . . . . . . 7 + 3.7. Additional notes . . . . . . . . . . . . . . . . . . . . 8 + 4. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . 8 + 5. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 8 + 6. Privacy Considerations . . . . . . . . . . . . . . . . . . . 8 + 7. Security Considerations . . . . . . . . . . . . . . . . . . . 9 + 8. References . . . . . . . . . . . . . . . . . . . . . . . . . 9 + 8.1. Normative References . . . . . . . . . . . . . . . . . . 9 + 8.2. References . . . . . . . . . . . . . . . . . . . . . . . 10 + 8.3. Informative References . . . . . . . . . . . . . . . . . 11 + Appendix A. Examples . . . . . . . . . . . . . . . . . . . . . . 11 + Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . 12 + + + + + + + + + +Dulaunoy, et al. Expires December 3, 2020 [Page 2] + +Internet-Draft Passive DNS - Common Output Format June 2020 + + +1. Introduction + + Passive DNS is a technique described by Florian Weimer in 2005 in + Passive DNS replication, F Weimer - 17th Annual FIRST Conference on + Computer Security [WEIMERPDNS]. Since then multiple Passive DNS + implementations were created and evolved over time. Users of these + Passive DNS servers may query a server (often via WHOIS [RFC3912] or + HTTP REST [REST]), parse the results and process them in other + applications. + + There are multiple implementations of Passive DNS software. Users of + passive DNS query each implementation and aggregate the results for + their search. This document describes the output format of four + Passive DNS Systems ([DNSDB], [DNSDBQ], [PDNSCERTAT], [PDNSCIRCL] and + [PDNSCOF]) which are in use today and which already share a nearly + identical output format. As the format and the meaning of output + fields from each Passive DNS need to be consistent, we propose in + this document a solution to commonly name each field along with their + corresponding interpretation. The format follows a simple key-value + structure in JSON [RFC4627] format. The benefit of having a + consistent Passive DNS output format is that multiple client + implementations can query different servers without having to have a + separate parser for each individual server. passivedns-client + [PDNSCLIENT] currently implements multiple parsers due to a lack of + standardization. The document does not describe the protocol (e.g. + WHOIS [RFC3912], HTTP REST [REST]) nor the query format used to query + the Passive DNS. Neither does this document describe "pre-recursor" + Passive DNS Systems. Both of these are separate topics and deserve + their own RFC document. The document describes the current best + practices implemented in various Passive DNS server implementations. + +1.1. Requirements Language + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in RFC 2119 [RFC2119]. + +2. Limitation + + As a Passive DNS servers can include protection mechanisms for their + operation, results might be different due to those protection + measures. These mechanisms filter out DNS answers if they fail some + criteria. The bailiwick algorithm [BAILIWICK] protects the Passive + DNS Database from cache poisoning attacks [CACHEPOISONING]. Another + limitation that clients querying the database need to be aware of is + that each query simply gets a snapshot-answer of the time of + querying. Clients MUST NOT rely on consistent answers. Nor must + + + + +Dulaunoy, et al. Expires December 3, 2020 [Page 3] + +Internet-Draft Passive DNS - Common Output Format June 2020 + + + they assume that answers must be identical across multiple Passive + DNS Servers. + +3. Common Output Format + +3.1. Overview + + The formatting of the answer follows the JSON [RFC4627] format. In + fact, it is a subset of the full JSON language. Notable differences + are the modified definition of whitespace ("ws"). The order of the + fields is not significant for the same resource type. + + The intent of this output format is to be easily parsable by scripts. + Each JSON object is expressed on a single line to be processed by the + client line-by-line. Every implementation MUST support the JSON + output format. + + Examples of JSON (Appendix A) output are in the appendix. + +3.2. ABNF grammar + + Formal grammar as defined in ABNF [RFC2234] + + answer = entries + entries = * ( entry CR) + entry = "{" keyvallist "}" + keyvallist = [ member *( value-separator member ) ] + member = qm field qm name-separator value + name-separator = ws %x3A ws ; a ":" colon + value = value ; as defined in the JSON RFC + value-separator = ws %x2C ws ; , comma. As defined in JSON + field = "rrname" | "rrtype" | "rdata" | "time_first" | + "time_last" | "count" | "bailiwick" | "sensor_id" | + "zone_time_first" | "zone_time_last" | "origin" | + futureField + futureField = string + CR = %x0D + qm = %x22 ; " a quotation mark + ws = *( + %x20 | ; Space + %x09 ; Horizontal tab + ) + + + Note that value is defined in JSON [RFC4627] and has the exact same + specification as there. The same goes for the definition of string. + + + + + +Dulaunoy, et al. Expires December 3, 2020 [Page 4] + +Internet-Draft Passive DNS - Common Output Format June 2020 + + +3.3. Mandatory Fields + + Implementation MUST support all the mandatory fields. + + Uniqueness property: the tuple (rrname,rrtype,rdata) will always be + unique within one answer per server. While rrname and rrtype are + always individual JSON primitive types (strings, numbers, booleans or + null), rdata MAY return multiple resource records or a single record. + When multiple resource records are returned, rdata MUST be a JSON + array. In the case of a single resource record is returned, rdata + MUST be a JSON string or a JSON array containing one JSON string. + Senders SHOULD send an array for rdata, but receivers MUST be able to + accept a single-string result for rdata. + +3.3.1. rrname + + This field returns the name of the queried resource. JSON [RFC4627] + string. + +3.3.2. rrtype + + This field returns the resource record type as seen by the passive + DNS. The key is rrtype and the value is in the interpreted record + type represented as a JSON [RFC4627] string. If the value cannot be + interpreted, the decimal value is returned following the principle of + transparency as described in RFC 3597 [RFC3597]. Then the decimal + value is represented as a JSON [RFC4627] number. The resource record + type can be any values as described by IANA in the DNS parameters + document in the section 'Resource Record (RR) TYPEs' + (http://www.iana.org/assignments/dns-parameters). Supported textual + descriptions of rrtypes include: A, AAAA, CNAME, etc. A client MUST + be able to understand these textual rrtype values represented as a + JSON [RFC4627] string. In addition, a client MUST be able to handle + a decimal value (as mentioned above) answer represented as a JSON + [RFC4627] number. + +3.3.3. rdata + + This field returns the resource records of the queried resource. + When multiple resource records are returned, rdata MUST be a JSON + array containing JSON strings. In the case of a single resource + record is returned, rdata MUST be a JSON string or a JSON array + containing one JSON string. Each resource record is represented as a + JSON [RFC4627] string. Each resource record MUST be escaped as + defined in section 2.6 of RFC4627 [RFC4627]. Depending on the + rrtype, this can be an IPv4 or IPv6 address, a domain name (as in the + case of CNAMEs), an SPF record, etc. A client MUST be able to + interpret any value which is legal as the right hand side in a DNS + + + +Dulaunoy, et al. Expires December 3, 2020 [Page 5] + +Internet-Draft Passive DNS - Common Output Format June 2020 + + + master file RFC 1035 [RFC1035] and RFC 1034 [RFC1034]. If the rdata + came from an unknown DNS resource records, the server must follow the + transparency principle as described in RFC 3597 [RFC3597]. + +3.3.4. time_first + + This field returns the first time that the record / unique tuple + (rrname, rrtype, rdata) has been seen by the passive DNS. The date + is expressed in seconds (decimal) since 1st of January 1970 (Unix + timestamp). The time zone MUST be UTC. This field is represented as + a JSON [RFC4627] number. + +3.3.5. time_last + + This field returns the last time that the unique tuple (rrname, + rrtype, rdata) record has been seen by the passive DNS. The date is + expressed in seconds (decimal) since 1st of January 1970 (Unix + timestamp). The time zone MUST be UTC. This field is represented as + a JSON [RFC4627] number. + +3.4. Optional Fields + + Implementations SHOULD support one or more fields. + +3.4.1. count + + Specifies how many authoritative DNS answers were received at the + Passive DNS Server's collectors with exactly the given set of values + as answers (i.e. same data in the answer set - compare with the + uniqueness property in "Mandatory Fields"). The number of requests + is expressed as a decimal value. This field is represented as a JSON + [RFC4627] number. + +3.4.2. bailiwick + + The bailiwick is the best estimate of the apex of the zone where this + data is authoritative. + +3.5. Additional Fields + + Implementations MAY support the following fields: + +3.5.1. sensor_id + + This field returns the sensor information where the record was seen. + It is represented as a JSON [RFC4627] string. + + + + + +Dulaunoy, et al. Expires December 3, 2020 [Page 6] + +Internet-Draft Passive DNS - Common Output Format June 2020 + + + If the data originate from sensors or probes which are part of a + publicly-known gathering or measurement system (e.g. RIPE Atlas), a + JSON [RFC4627] string SHOULD be prefixed. + +3.5.2. zone_time_first + + This field returns the first time that the unique tuple (rrname, + rrtype, rdata) record has been seen via master file import. The date + is expressed in seconds (decimal) since 1st of January 1970 (Unix + timestamp). The time zone MUST be UTC. This field is represented as + a JSON [RFC4627] number. + +3.5.3. zone_time_last + + This field returns the last time that the unique tuple (rrname, + rrtype, rdata) record has been seen via master file import. The date + is expressed in seconds (decimal) since 1st of January 1970 (Unix + timestamp). The time zone MUST be UTC. This field is represented as + a JSON [RFC4627] number. + +3.5.4. origin + + Specifies the resource origin of the Passive DNS response. This + field is represented as a Uniform Resource Identifier [RFC3986] + (URI). + +3.5.5. time_first_ms + + Same meaning as the field "time_first", with the only difference, + that the resolution is in milliseconds since 1st of January 1970 + (UTC). + +3.5.6. time_last_ms + + Same meaning as the field "time_last", with the only difference, that + the resolution is in milliseconds since 1st of January 1970 (UTC). + +3.6. Additional Fields Registry + + In accordance with [RFC6648], designers of new passive DNS + applications that would need additional fields can request and + register new field name at https://github.com/adulau/pdns-qof/wiki/ + Additional-Fields. + + + + + + + + +Dulaunoy, et al. Expires December 3, 2020 [Page 7] + +Internet-Draft Passive DNS - Common Output Format June 2020 + + +3.7. Additional notes + + An implementer of a passive DNS Server MAY chose to either return + time_first and time_last OR return zone_time_first and + zone_time_last. In pseudocode: (time_first AND time_last) OR + (zone_time_first AND zone_time_last). In this case, + zone_time_{first,last} replace the time_{first,last} fields. + However, this is not encouraged since it might be confusing for + parsers who will expect the mandatory fields time_{first,last}. See: + [github_issue_17] + +4. Acknowledgements + + Thanks to the Passive DNS developers who contributed to the document. + +5. IANA Considerations + + This memo includes no request to IANA. + +6. Privacy Considerations + + Passive DNS Servers capture DNS answers from multiple collecting + points ("sensors") which are located on the Internet-facing side of + DNS recursors ("post-recursor passive DNS"). In this process, they + intentionally omit the source IP, source port, destination IP and + destination port from the captured packets. Since the data is + captured "post-recursor", the timing information (who queries what) + is lost, since the recursor will cache the results. Furthermore, + since multiple sensors feed into a passive DNS server, the resulting + data gets mixed together, reducing the likelihood that Passive DNS + Servers are able to find out much about the actual person querying + the DNS records nor who actually sent the query. In this sense, + passive DNS Servers are similar to keeping an archive of all previous + phone books - if public DNS records can be compared to phone numbers + - as they often are. Nevertheless, the authors strongly encourage + Passive DNS implementors to take special care of privacy issues. + bortzmeyer-dnsop-dns-privacy is an excellent starting point for this. + Finally, the overall recommendations in RFC6973 [RFC6973] should be + taken into consideration when designing any application which uses + Passive DNS data. + + In the scope of the General Data Protection Regulation (GDPR - + Directive 95/46/EC), operators of Passive DNS Server needs to ensure + the legal ground and lawfulness of its operation. + + + + + + + +Dulaunoy, et al. Expires December 3, 2020 [Page 8] + +Internet-Draft Passive DNS - Common Output Format June 2020 + + +7. Security Considerations + + In some cases, Passive DNS output might contain confidential + information and its access might be restricted. When a user is + querying multiple Passive DNS and aggregating the data, the + sensitivity of the data must be considered. + +8. References + +8.1. Normative References + + [RFC1034] Mockapetris, P., "Domain names - concepts and facilities", + STD 13, RFC 1034, DOI 10.17487/RFC1034, November 1987, + . + + [RFC1035] Mockapetris, P., "Domain names - implementation and + specification", STD 13, RFC 1035, DOI 10.17487/RFC1035, + November 1987, . + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, + DOI 10.17487/RFC2119, March 1997, + . + + [RFC2234] Crocker, D., Ed. and P. Overell, "Augmented BNF for Syntax + Specifications: ABNF", RFC 2234, DOI 10.17487/RFC2234, + November 1997, . + + [RFC3597] Gustafsson, A., "Handling of Unknown DNS Resource Record + (RR) Types", RFC 3597, DOI 10.17487/RFC3597, September + 2003, . + + [RFC3912] Daigle, L., "WHOIS Protocol Specification", RFC 3912, + DOI 10.17487/RFC3912, September 2004, + . + + [RFC3986] Berners-Lee, T., Fielding, R., and L. Masinter, "Uniform + Resource Identifier (URI): Generic Syntax", STD 66, + RFC 3986, DOI 10.17487/RFC3986, January 2005, + . + + [RFC4627] Crockford, D., "The application/json Media Type for + JavaScript Object Notation (JSON)", RFC 4627, + DOI 10.17487/RFC4627, July 2006, + . + + + + + + +Dulaunoy, et al. Expires December 3, 2020 [Page 9] + +Internet-Draft Passive DNS - Common Output Format June 2020 + + + [RFC5001] Austein, R., "DNS Name Server Identifier (NSID) Option", + RFC 5001, DOI 10.17487/RFC5001, August 2007, + . + + [RFC6648] Saint-Andre, P., Crocker, D., and M. Nottingham, + "Deprecating the "X-" Prefix and Similar Constructs in + Application Protocols", BCP 178, RFC 6648, + DOI 10.17487/RFC6648, June 2012, + . + + [RFC6973] Cooper, A., Tschofenig, H., Aboba, B., Peterson, J., + Morris, J., Hansen, M., and R. Smith, "Privacy + Considerations for Internet Protocols", RFC 6973, + DOI 10.17487/RFC6973, July 2013, + . + +8.2. References + + [BAILIWICK] + "Passive DNS Hardening", 2010, + . + + [CACHEPOISONING] + "Black ops 2008: It's the end of the cache as we know + it.", 2008, . + + [DNSDB] "DNSDB API", 2013, . + + [DNSDBQ] "DNSDB API Client, C Version", 2018, + . + + [github_issue_17] + "Discussion on the existing implementations of returning + either zone_time{first,last} OR time_{first,last}", 2020, + . + + [PDNSCERTAT] + "pDNS presentation at 4th Centr R&D workshop Frankfurt Jun + 5th 2012", 2012, + . + + [PDNSCIRCL] + "CIRCL Passive DNS", 2012, + . + + + + + +Dulaunoy, et al. Expires December 3, 2020 [Page 10] + +Internet-Draft Passive DNS - Common Output Format June 2020 + + + [PDNSCLIENT] + "Queries 5 major Passive DNS databases: BFK, CERTEE, + DNSParse, ISC, and VirusTotal.", 2013, + . + + [PDNSCOF] "Passive DNS server interface using the common output + format", 2013, + . + + [REST] "Representational State Transfer (REST)", 2000, + . + + [WEIMERPDNS] + "Passive DNS Replication", 2005, + . + +8.3. Informative References + + [I-D.narten-iana-considerations-rfc2434bis] + Narten, T. and H. Alvestrand, "Guidelines for Writing an + IANA Considerations Section in RFCs", draft-narten-iana- + considerations-rfc2434bis-09 (work in progress), March + 2008. + + [RFC3552] Rescorla, E. and B. Korver, "Guidelines for Writing RFC + Text on Security Considerations", BCP 72, RFC 3552, + DOI 10.17487/RFC3552, July 2003, + . + +Appendix A. Examples + + The JSON output are represented on multiple lines for readability but + each JSON object should be on a single line. + + If you query a passive DNS for the rrname www.ietf.org, the passive + dns common output format can be: + + + {"count": 102, "time_first": 1298412391, "rrtype": "AAAA", + "rrname": "www.ietf.org", "rdata": "2001:1890:1112:1::20", + "time_last": 1302506851} + {"count": 59, "time_first": 1384865833, "rrtype": "A", + "rrname": "www.ietf.org", "rdata": "4.31.198.44", + "time_last": 1389022219} + + + + + +Dulaunoy, et al. Expires December 3, 2020 [Page 11] + +Internet-Draft Passive DNS - Common Output Format June 2020 + + + If you query a passive DNS for the rrname ietf.org, the passive dns + common output format can be: + + + {"count": 109877, "time_first": 1298398002, "rrtype": "NS", + "rrname": "ietf.org", "rdata": "ns1.yyz1.afilias-nst.info", + "time_last": 1389095375} + {"count": 4, "time_first": 1298495035, "rrtype": "A", + "rrname": "ietf.org", "rdata": "64.170.98.32", + "time_last": 1298495035} + {"count": 9, "time_first": 1317037550, "rrtype": "AAAA", + "rrname": "ietf.org", "rdata": "2001:1890:123a::1:1e", + "time_last": 1330209752} + + + Please note that the examples imply that a single query returns a + single set of JSON objects. For example, two queries were made; one + query returned a set of two JSON objects and the other query returned + a set of three JSON objects. This specification requires each JSON + object individually MUST conform to the common output format, but + this specification does not require that a query will return a set of + JSON objects. + + Please note that in the examples above, any backslashes "\" can be + ignored and are an artifact of the tools which produced this + document. + +Authors' Addresses + + Alexandre Dulaunoy + CIRCL + 16, bd d'Avranches + Luxembourg L-1160 + Luxembourg + + Phone: (+352) 247 88444 + Email: alexandre.dulaunoy@circl.lu + URI: http://www.circl.lu/ + + + L. Aaron Kaplan + Vienna A-1170 + Austria + + Email: aaron@lo-res.org + + + + + + +Dulaunoy, et al. Expires December 3, 2020 [Page 12] + +Internet-Draft Passive DNS - Common Output Format June 2020 + + + Paul Vixie + Farsight Security, Inc. + 11400 La Honda Road + Woodside, California 94062 + U.S.A. + + Email: paul@redbarn.org + URI: https://www.farsightsecurity.com/ + + + Henry Stern + Farsight Security, Inc. + 11400 La Honda Road + Woodside, California 94062 + U.S.A. + + Phone: +1 650 542-7836 + Email: henry@stern.ca + URI: https://www.farsightsecurity.com/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Dulaunoy, et al. Expires December 3, 2020 [Page 13] From 65a1317cd1bb566aced7416354f3518b5632f8d3 Mon Sep 17 00:00:00 2001 From: aaronkaplan Date: Thu, 29 Apr 2021 13:02:24 +0200 Subject: [PATCH 4/7] Add python code to test the JSON schema. Works. Added testdata/ --- .gitignore | 2 ++ example_code/python/README.md | 5 +++ example_code/python/cofparser_jsonschema.py | 37 +++++++++++++++++++++ example_code/testdata/data.json | 2 ++ 4 files changed, 46 insertions(+) create mode 100644 .gitignore create mode 100644 example_code/python/cofparser_jsonschema.py create mode 100644 example_code/testdata/data.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..783cd5b --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.swp + diff --git a/example_code/python/README.md b/example_code/python/README.md index 21c1c98..9c7490a 100644 --- a/example_code/python/README.md +++ b/example_code/python/README.md @@ -3,4 +3,9 @@ This little package can parse the Passive DNS Common Output Format (COF) and validate it. It is given as example code. + * cofparser.py ... a manually written parser for the COF format + * cofparser_jsonschema.py .... one which uses the JSON schema to validate. + + + diff --git a/example_code/python/cofparser_jsonschema.py b/example_code/python/cofparser_jsonschema.py new file mode 100644 index 0000000..02d6b7f --- /dev/null +++ b/example_code/python/cofparser_jsonschema.py @@ -0,0 +1,37 @@ +import sys +import json +import jsonschema +from jsonschema import validate + + +def get_schema(filename): + """This function loads the given schema available""" + with open(filename, 'r') as file: + schema = json.load(file) + return schema + + +def validate_json(json_data, schema=None): + """REF: https://json-schema.org/ """ + + try: + validate(instance=json_data, schema=schema) + except jsonschema.exceptions.ValidationError as err: + print(err) + err = "Given JSON data is InValid" + return False, err + + message = "Given JSON data is Valid" + return True, message + + +if __name__ == "__main__": + schema = get_schema("schema/schema.json") + + # Convert json to python object. + with open(sys.argv[1], 'r') as ndjson_file: + for line in ndjson_file: + jsonData = json.loads(line) + # validate it + is_valid, msg = validate_json(jsonData, schema=schema) + print(msg) diff --git a/example_code/testdata/data.json b/example_code/testdata/data.json new file mode 100644 index 0000000..b439f73 --- /dev/null +++ b/example_code/testdata/data.json @@ -0,0 +1,2 @@ +{"count":1909,"rdata":["cpa.circl.lu"],"rrname":"www.circl.lu","rrtype":"CNAME","time_first":1315586409,"time_last":1449566799} +{"count":2560,"rdata":["cpab.circl.lu"],"rrname":"www.circl.lu","rrtype":"CNAME","time_first":1449584660,"time_last":1617676151} From 7fe69103d190dbfc1d86b531199da288f894f2f8 Mon Sep 17 00:00:00 2001 From: aaronkaplan Date: Fri, 30 Apr 2021 10:14:32 +0200 Subject: [PATCH 5/7] move it baby --- ...{cofparser_jsonschema.py => cofparser_using_the_jsonschema.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename example_code/python/{cofparser_jsonschema.py => cofparser_using_the_jsonschema.py} (100%) diff --git a/example_code/python/cofparser_jsonschema.py b/example_code/python/cofparser_using_the_jsonschema.py similarity index 100% rename from example_code/python/cofparser_jsonschema.py rename to example_code/python/cofparser_using_the_jsonschema.py From f4639cdf0113684f01fe5816d8c8f151fe9fbf41 Mon Sep 17 00:00:00 2001 From: aaronkaplan Date: Tue, 4 May 2021 16:03:04 +0200 Subject: [PATCH 6/7] Add a section on MIME types. Closes issue #9. --- i-d/pdns-qof.txt | 74 ++++++++++++++++++++++++------------------------ i-d/pdns-qof.xml | 3 ++ 2 files changed, 40 insertions(+), 37 deletions(-) diff --git a/i-d/pdns-qof.txt b/i-d/pdns-qof.txt index f5c9203..a2ff37d 100644 --- a/i-d/pdns-qof.txt +++ b/i-d/pdns-qof.txt @@ -90,6 +90,7 @@ Table of Contents 3.5.6. time_last_ms . . . . . . . . . . . . . . . . . . . . 7 3.6. Additional Fields Registry . . . . . . . . . . . . . . . 7 3.7. Additional notes . . . . . . . . . . . . . . . . . . . . 8 + 3.8. Suggested MIME Types . . . . . . . . . . . . . . . . . . 8 4. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . 8 5. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 8 6. Privacy Considerations . . . . . . . . . . . . . . . . . . . 8 @@ -108,7 +109,6 @@ Table of Contents - Dulaunoy, et al. Expires December 3, 2020 [Page 2] Internet-Draft Passive DNS - Common Output Format June 2020 @@ -405,6 +405,12 @@ Internet-Draft Passive DNS - Common Output Format June 2020 parsers who will expect the mandatory fields time_{first,last}. See: [github_issue_17] +3.8. Suggested MIME Types + + An implementer of a passive DNS Server SHOULD server a document in + this Common Output Format with a MIME header of "application/ + x-ndjson". + 4. Acknowledgements Thanks to the Passive DNS developers who contributed to the document. @@ -435,12 +441,6 @@ Internet-Draft Passive DNS - Common Output Format June 2020 taken into consideration when designing any application which uses Passive DNS data. - In the scope of the General Data Protection Regulation (GDPR - - Directive 95/46/EC), operators of Passive DNS Server needs to ensure - the legal ground and lawfulness of its operation. - - - @@ -450,6 +450,10 @@ Dulaunoy, et al. Expires December 3, 2020 [Page 8] Internet-Draft Passive DNS - Common Output Format June 2020 + In the scope of the General Data Protection Regulation (GDPR - + Directive 95/46/EC), operators of Passive DNS Server needs to ensure + the legal ground and lawfulness of its operation. + 7. Security Considerations In some cases, Passive DNS output might contain confidential @@ -491,10 +495,6 @@ Internet-Draft Passive DNS - Common Output Format June 2020 RFC 3986, DOI 10.17487/RFC3986, January 2005, . - [RFC4627] Crockford, D., "The application/json Media Type for - JavaScript Object Notation (JSON)", RFC 4627, - DOI 10.17487/RFC4627, July 2006, - . @@ -506,6 +506,11 @@ Dulaunoy, et al. Expires December 3, 2020 [Page 9] Internet-Draft Passive DNS - Common Output Format June 2020 + [RFC4627] Crockford, D., "The application/json Media Type for + JavaScript Object Notation (JSON)", RFC 4627, + DOI 10.17487/RFC4627, July 2006, + . + [RFC5001] Austein, R., "DNS Name Server Identifier (NSID) Option", RFC 5001, DOI 10.17487/RFC5001, August 2007, . @@ -549,11 +554,6 @@ Internet-Draft Passive DNS - Common Output Format June 2020 . - [PDNSCIRCL] - "CIRCL Passive DNS", 2012, - . - - @@ -562,6 +562,10 @@ Dulaunoy, et al. Expires December 3, 2020 [Page 10] Internet-Draft Passive DNS - Common Output Format June 2020 + [PDNSCIRCL] + "CIRCL Passive DNS", 2012, + . + [PDNSCLIENT] "Queries 5 major Passive DNS databases: BFK, CERTEE, DNSParse, ISC, and VirusTotal.", 2013, @@ -602,12 +606,8 @@ Appendix A. Examples dns common output format can be: - {"count": 102, "time_first": 1298412391, "rrtype": "AAAA", - "rrname": "www.ietf.org", "rdata": "2001:1890:1112:1::20", - "time_last": 1302506851} - {"count": 59, "time_first": 1384865833, "rrtype": "A", - "rrname": "www.ietf.org", "rdata": "4.31.198.44", - "time_last": 1389022219} + + @@ -618,6 +618,14 @@ Dulaunoy, et al. Expires December 3, 2020 [Page 11] Internet-Draft Passive DNS - Common Output Format June 2020 + {"count": 102, "time_first": 1298412391, "rrtype": "AAAA", + "rrname": "www.ietf.org", "rdata": "2001:1890:1112:1::20", + "time_last": 1302506851} + {"count": 59, "time_first": 1384865833, "rrtype": "A", + "rrname": "www.ietf.org", "rdata": "4.31.198.44", + "time_last": 1389022219} + + If you query a passive DNS for the rrname ietf.org, the passive dns common output format can be: @@ -658,14 +666,6 @@ Authors' Addresses URI: http://www.circl.lu/ - L. Aaron Kaplan - Vienna A-1170 - Austria - - Email: aaron@lo-res.org - - - @@ -674,6 +674,13 @@ Dulaunoy, et al. Expires December 3, 2020 [Page 12] Internet-Draft Passive DNS - Common Output Format June 2020 + L. Aaron Kaplan + Vienna A-1170 + Austria + + Email: aaron@lo-res.org + + Paul Vixie Farsight Security, Inc. 11400 La Honda Road @@ -710,13 +717,6 @@ Internet-Draft Passive DNS - Common Output Format June 2020 - - - - - - - diff --git a/i-d/pdns-qof.xml b/i-d/pdns-qof.xml index d79971f..069f44a 100644 --- a/i-d/pdns-qof.xml +++ b/i-d/pdns-qof.xml @@ -263,6 +263,9 @@ ws = *(
An implementer of a passive DNS Server MAY chose to either return time_first and time_last OR return zone_time_first and zone_time_last. In pseudocode: (time_first AND time_last) OR (zone_time_first AND zone_time_last). In this case, zone_time_{first,last} replace the time_{first,last} fields. However, this is not encouraged since it might be confusing for parsers who will expect the mandatory fields time_{first,last}. See: +
+
+ An implementer of a passive DNS Server SHOULD server a document in this Common Output Format with a MIME header of "application/x-ndjson".
From be9de2de887c216fe9b67007b644ff026e3474eb Mon Sep 17 00:00:00 2001 From: aaronkaplan Date: Tue, 4 May 2021 16:03:04 +0200 Subject: [PATCH 7/7] Add a section on MIME types. Closes issue #9. --- CHANGELOG | 1 + i-d/pdns-qof.txt | 74 ++++++++++++++++++++++++------------------------ i-d/pdns-qof.xml | 3 ++ 3 files changed, 41 insertions(+), 37 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 657ef99..2ac6d07 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,6 +3,7 @@ ## Content changes * added time_first_ms, time_last_ms * clarified that time_{first,last} OR zone_time_{first,last} can be specified. + * Added MIME type SHOULD be "application/x-ndjson". As discussed in #9. ## Other changes * Added JSON schema diff --git a/i-d/pdns-qof.txt b/i-d/pdns-qof.txt index f5c9203..a2ff37d 100644 --- a/i-d/pdns-qof.txt +++ b/i-d/pdns-qof.txt @@ -90,6 +90,7 @@ Table of Contents 3.5.6. time_last_ms . . . . . . . . . . . . . . . . . . . . 7 3.6. Additional Fields Registry . . . . . . . . . . . . . . . 7 3.7. Additional notes . . . . . . . . . . . . . . . . . . . . 8 + 3.8. Suggested MIME Types . . . . . . . . . . . . . . . . . . 8 4. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . 8 5. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 8 6. Privacy Considerations . . . . . . . . . . . . . . . . . . . 8 @@ -108,7 +109,6 @@ Table of Contents - Dulaunoy, et al. Expires December 3, 2020 [Page 2] Internet-Draft Passive DNS - Common Output Format June 2020 @@ -405,6 +405,12 @@ Internet-Draft Passive DNS - Common Output Format June 2020 parsers who will expect the mandatory fields time_{first,last}. See: [github_issue_17] +3.8. Suggested MIME Types + + An implementer of a passive DNS Server SHOULD server a document in + this Common Output Format with a MIME header of "application/ + x-ndjson". + 4. Acknowledgements Thanks to the Passive DNS developers who contributed to the document. @@ -435,12 +441,6 @@ Internet-Draft Passive DNS - Common Output Format June 2020 taken into consideration when designing any application which uses Passive DNS data. - In the scope of the General Data Protection Regulation (GDPR - - Directive 95/46/EC), operators of Passive DNS Server needs to ensure - the legal ground and lawfulness of its operation. - - - @@ -450,6 +450,10 @@ Dulaunoy, et al. Expires December 3, 2020 [Page 8] Internet-Draft Passive DNS - Common Output Format June 2020 + In the scope of the General Data Protection Regulation (GDPR - + Directive 95/46/EC), operators of Passive DNS Server needs to ensure + the legal ground and lawfulness of its operation. + 7. Security Considerations In some cases, Passive DNS output might contain confidential @@ -491,10 +495,6 @@ Internet-Draft Passive DNS - Common Output Format June 2020 RFC 3986, DOI 10.17487/RFC3986, January 2005, . - [RFC4627] Crockford, D., "The application/json Media Type for - JavaScript Object Notation (JSON)", RFC 4627, - DOI 10.17487/RFC4627, July 2006, - . @@ -506,6 +506,11 @@ Dulaunoy, et al. Expires December 3, 2020 [Page 9] Internet-Draft Passive DNS - Common Output Format June 2020 + [RFC4627] Crockford, D., "The application/json Media Type for + JavaScript Object Notation (JSON)", RFC 4627, + DOI 10.17487/RFC4627, July 2006, + . + [RFC5001] Austein, R., "DNS Name Server Identifier (NSID) Option", RFC 5001, DOI 10.17487/RFC5001, August 2007, . @@ -549,11 +554,6 @@ Internet-Draft Passive DNS - Common Output Format June 2020 . - [PDNSCIRCL] - "CIRCL Passive DNS", 2012, - . - - @@ -562,6 +562,10 @@ Dulaunoy, et al. Expires December 3, 2020 [Page 10] Internet-Draft Passive DNS - Common Output Format June 2020 + [PDNSCIRCL] + "CIRCL Passive DNS", 2012, + . + [PDNSCLIENT] "Queries 5 major Passive DNS databases: BFK, CERTEE, DNSParse, ISC, and VirusTotal.", 2013, @@ -602,12 +606,8 @@ Appendix A. Examples dns common output format can be: - {"count": 102, "time_first": 1298412391, "rrtype": "AAAA", - "rrname": "www.ietf.org", "rdata": "2001:1890:1112:1::20", - "time_last": 1302506851} - {"count": 59, "time_first": 1384865833, "rrtype": "A", - "rrname": "www.ietf.org", "rdata": "4.31.198.44", - "time_last": 1389022219} + + @@ -618,6 +618,14 @@ Dulaunoy, et al. Expires December 3, 2020 [Page 11] Internet-Draft Passive DNS - Common Output Format June 2020 + {"count": 102, "time_first": 1298412391, "rrtype": "AAAA", + "rrname": "www.ietf.org", "rdata": "2001:1890:1112:1::20", + "time_last": 1302506851} + {"count": 59, "time_first": 1384865833, "rrtype": "A", + "rrname": "www.ietf.org", "rdata": "4.31.198.44", + "time_last": 1389022219} + + If you query a passive DNS for the rrname ietf.org, the passive dns common output format can be: @@ -658,14 +666,6 @@ Authors' Addresses URI: http://www.circl.lu/ - L. Aaron Kaplan - Vienna A-1170 - Austria - - Email: aaron@lo-res.org - - - @@ -674,6 +674,13 @@ Dulaunoy, et al. Expires December 3, 2020 [Page 12] Internet-Draft Passive DNS - Common Output Format June 2020 + L. Aaron Kaplan + Vienna A-1170 + Austria + + Email: aaron@lo-res.org + + Paul Vixie Farsight Security, Inc. 11400 La Honda Road @@ -710,13 +717,6 @@ Internet-Draft Passive DNS - Common Output Format June 2020 - - - - - - - diff --git a/i-d/pdns-qof.xml b/i-d/pdns-qof.xml index d79971f..069f44a 100644 --- a/i-d/pdns-qof.xml +++ b/i-d/pdns-qof.xml @@ -263,6 +263,9 @@ ws = *(
An implementer of a passive DNS Server MAY chose to either return time_first and time_last OR return zone_time_first and zone_time_last. In pseudocode: (time_first AND time_last) OR (zone_time_first AND zone_time_last). In this case, zone_time_{first,last} replace the time_{first,last} fields. However, this is not encouraged since it might be confusing for parsers who will expect the mandatory fields time_{first,last}. See: +
+
+ An implementer of a passive DNS Server SHOULD server a document in this Common Output Format with a MIME header of "application/x-ndjson".