Merge pull request #33 from adulau/AddressEliotsInitialComments

Address Eliots initial comments
This commit is contained in:
Alexandre Dulaunoy 2024-08-29 11:28:36 +02:00 committed by GitHub
commit de636a7937
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 181 additions and 134 deletions

1
.gitignore vendored
View file

@ -1,2 +1,3 @@
*.swp *.swp
i-d/pdns-qof.txt

View file

@ -1,30 +1,38 @@
<?xml version="1.0" encoding="US-ASCII"?> <?xml version="1.0" encoding="utf-8"?>
<?xml-model href="rfc7991bis.rnc"?> <!-- Required for schema validation and schema-aware editing -->
<?xml-stylesheet type="text/xsl" href="rfc2629.xslt" ?>
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [ <!DOCTYPE rfc [
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC2629 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2629.xml">
<!ENTITY RFC1035 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.1035.xml">
<!ENTITY RFC1034 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.1034.xml"> <!ENTITY RFC1034 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.1034.xml">
<!ENTITY RFC4627 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4627.xml"> <!ENTITY RFC1035 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.1035.xml">
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC2234 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2234.xml">
<!ENTITY RFC2629 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2629.xml">
<!ENTITY RFC3597 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3597.xml"> <!ENTITY RFC3597 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3597.xml">
<!ENTITY RFC3912 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3912.xml"> <!ENTITY RFC3912 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3912.xml">
<!ENTITY RFC6648 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6648.xml">
<!ENTITY RFC2234 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2234.xml">
<!ENTITY RFC6973 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6973.xml">
<!ENTITY RFC3986 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3986.xml"> <!ENTITY RFC3986 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3986.xml">
<!ENTITY RFC4627 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4627.xml">
<!ENTITY RFC6648 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6648.xml">
<!ENTITY RFC6973 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6973.xml">
<!ENTITY RFC7258 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.7258.xml">
<!ENTITY RFC7871 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.7871.xml">
<!ENTITY I-D.narten-iana-considerations-rfc2434bis SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.narten-iana-considerations-rfc2434bis.xml"> <!ENTITY I-D.narten-iana-considerations-rfc2434bis SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.narten-iana-considerations-rfc2434bis.xml">
<!ENTITY I-D.draft-bortzmeyer-dnsop-dns-privacy SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-bortzmeyer-dnsop-dns-privacy"> <!ENTITY I-D.draft-bortzmeyer-dnsop-dns-privacy SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-bortzmeyer-dnsop-dns-privacy">
]> ]>
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt'?>
<?rfc strict="yes"?>
<?rfc toc="yes"?> <rfc
<?rfc tocdepth="4"?> xmlns:xi="http://www.w3.org/2001/XInclude"
<?rfc symrefs="yes"?> category="info"
<?rfc sortrefs="yes"?> docName="draft-dulaunoy-dnsop-passive-dns-cof-12"
<?rfc compact="yes"?> ipr="trust200902"
<?rfc subcompact="no"?> obsoletes=""
<rfc category="info" docName="draft-dulaunoy-dnsop-passive-dns-cof-12" ipr="trust200902"> updates=""
submissionType="IETF"
xml:lang="en"
version="3">
<!-- ***** FRONT MATTER ***** --> <!-- ***** FRONT MATTER ***** -->
<front> <front>
<title abbrev="Passive DNS - Common Output Format">Passive DNS - Common Output Format</title> <title abbrev="Passive DNS - Common Output Format">Passive DNS - Common Output Format</title>
@ -97,12 +105,12 @@
</address> </address>
</author> </author>
<date day="5" month="June" year="2024" /> <date day="27" month="August" year="2024" />
<area>General</area> <area>General</area>
<workgroup>Domain Name System Operations</workgroup> <workgroup>Domain Name System Operations</workgroup>
<keyword>dns</keyword> <keyword>dns</keyword>
<abstract> <abstract>
<t>This document describes a common output format of Passive DNS Servers that clients can <t>This document describes a common output format of Passive DNS servers that clients can
query. The output format description also includes a common semantic for each Passive DNS query. The output format description also includes a common semantic for each Passive DNS
system. By having multiple Passive DNS Systems adhere to the same output format for queries, system. By having multiple Passive DNS Systems adhere to the same output format for queries,
users of multiple Passive DNS servers will be able to combine result sets easily.</t> users of multiple Passive DNS servers will be able to combine result sets easily.</t>
@ -111,29 +119,63 @@
<middle> <middle>
<section title="Introduction"> <section title="Introduction">
<t>Passive DNS is a technique described by Florian Weimer in 2005 in <xref target="WEIMERPDNS">Passive <t>Passive DNS is a technique described by Florian Weimer in 2005 in <xref target="WEIMERPDNS">Passive
DNS replication, F Weimer - 17th Annual FIRST Conference on Computer Security</xref>. Since DNS replication, F Weimer - 17th Annual FIRST Conference on Computer Security</xref>.
then, multiple Passive DNS implementations were created and have evolved over time. Users of It is a mechanism for
these Passive DNS servers may query a server (often via <xref target="RFC3912">WHOIS</xref> logging DNS answers in a manner intended to minimize the privacy
or HTTP <xref target="REST">REST</xref>), parse the results, and process them in other implications to users, and is widely by security researchers to investigate
applications.</t> malware (for example to discover command and control servers), and other
security threats. By capturing only the "cache fill" DNS responses
(responses from authoritative servers in response to queries performed by a
recursive resolver when iteratively resolving a name), Passive DNS does
not have access to the client (users) source IP, source port, destination
IP, or destination port.</t>
<t> There are multiple implementations of Passive DNS software. Users of Passive DNS query <t>As these answers are served in response to queries originally
each implementation and aggregate the results for their search. This document describes the initiated by user devices, the Passive DNS data can be used to detect if
output format of four Passive DNS Systems (<xref target="DNSDB" />, <xref target="DNSDBQ" /> devices using the resolver are connecting to known malicious domains,
, <xref target="PDNSCERTAT" />, <xref target="PDNSCIRCL" /> and <xref target="PDNSCOF" />) without identifying the individual users / devices. In addition, as
that are in use today and that already share a nearly identical output format. As the format answers are responses to queries made by the recursive server itself,
and the meaning of output fields from each Passive DNS need to be consistent, this document Passive DNS records the answers which are ultimately served to users.
proposes a solution to commonly name each field along with its corresponding interpretation. This is important as authoritative servers may serve different answers to
The format follows a simple key-value structure in <xref target="RFC4627">JSON</xref> different query addresses, for example to increase performance (e.g <xref
format. The benefit of having a consistent Passive DNS output format is that multiple client target="RFC7871">Client Subnet in DNS Queries</xref>) or to hide
implementations can query different servers without having to have a separate parser for malicious behavior when queried from addresses known to be associated
each individual server. <xref target="PDNSCLIENT">passivedns-client</xref> currently with security researchers.</t>
implements multiple parsers due to a lack of standardization. The document does not describe
the protocol (e.g. <xref target="RFC3912">WHOIS</xref>, HTTP <xref target="REST">REST</xref>) <t>Passive DNS is usually implemented either by capturing DNS response
nor the query format used to query the Passive DNS. Neither does this document describe packets themselves (i.e packets with a destination address of the
"pre-recursor" Passive DNS Systems. Each of these are separate topics and deserve their own recursive resolver, a source port of 53, and the QR bit set to 1) or
RFC documents. This document describes the current best practices implemented in various by having the DNS software itself log these responses. The latter method
Passive DNS server implementations. </t> is likely to become more common as recursive to authoritative DNS
communication becomes encrypted.
</t>
<t>Multiple Passive DNS implementations and services exist. Users of
these Passive DNS services may query a server (often via <xref
target="RFC3912">WHOIS</xref>
or HTTP <xref target="REST">REST</xref>), parse the results, and process
them in other applications. Users of Passive DNS query each
implementation and aggregate the results for their search. This document
describes the output format of four Passive DNS Systems (<xref
target="DNSDB" />, <xref target="DNSDBQ" /> , <xref target="PDNSCERTAT"
/>, <xref target="PDNSCIRCL" /> and <xref target="PDNSCOF" />) that are
in use today and that already share a nearly identical output format. As
the format and the meaning of output fields from each Passive DNS need to
be consistent, this document proposes a solution to commonly name each
field along with its corresponding interpretation. The format follows a
simple key-value structure in <xref target="RFC4627">JSON</xref>
format. The benefit of having a consistent Passive DNS output format is
that multiple client implementations can query different servers
without having to have a separate parser for each individual server.
<xref target="PDNSCLIENT">passivedns-client</xref> currently implements
multiple parsers due to a lack of standardization. The document does
not describe the protocol (e.g. <xref target="RFC3912">WHOIS</xref>,
HTTP <xref target="REST">REST</xref>) nor the query format used to
query the Passive DNS. Neither does this document describe
"pre-recursor" Passive DNS Systems. Each of these are separate topics
and deserve their own RFC documents. This document describes the
current best practices implemented in various Passive DNS server
implementations. </t>
<section title="Requirements Language"> <section title="Requirements Language">
<t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD
@ -142,15 +184,15 @@
</section> </section>
</section> </section>
<section title="Limitation"> <section title="Limitations">
<t> As Passive DNS servers can include protection mechanisms for their operation, results <t> As Passive DNS servers can include protection mechanisms for their operation, results
might be different due to those protection measures. These mechanisms filter out DNS answers might be different due to those protection measures. These mechanisms filter out DNS answers
if they fail some criteria. The <xref target="BAILIWICK">bailiwick algorithm</xref> protects if they fail some criteria. The <xref target="BAILIWICK">bailiwick algorithm</xref> protects
the Passive DNS Database from <xref target="CACHEPOISONING">cache poisoning attacks</xref>. the Passive DNS Database from cache poisoning attacks.
Another limitation that clients querying the database need to be aware of is that each query Another limitation that clients querying the database need to be aware of is that each query
simply gets a snapshot-in-time answer at the time of querying. Clients MUST NOT rely on simply gets a snapshot-in-time answer at the time of querying. Clients MUST NOT rely on
existing answers from different Passive DNS database. Nor should they assume that answers existing answers from different Passive DNS database. Nor should they assume that answers
will be identical across multiple Passive DNS Servers. </t> will be identical across multiple Passive DNS servers. </t>
</section> </section>
<section title="Common Output Format"> <section title="Common Output Format">
@ -168,8 +210,9 @@
</section> </section>
<section title="ABNF grammar"> <section title="ABNF grammar">
<!-- "preamble" is deprecated in V3 -->
<t>Formal grammar as defined in <xref target="RFC2234">ABNF</xref></t>
<figure> <figure>
<preamble>Formal grammar as defined in <xref target="RFC2234">ABNF</xref></preamble>
<artwork><![CDATA[ <artwork><![CDATA[
answer = entries answer = entries
entries = * ( entry newline ) entries = * ( entry newline )
@ -266,7 +309,7 @@ ws = *(
<section title="Optional Fields"> <section title="Optional Fields">
<t>Implementations SHOULD support one or more fields.</t> <t>Implementations SHOULD support one or more fields.</t>
<section title="count"> <section title="count">
<t>Specifies how many authoritative DNS answers were received at the Passive DNS Server's <t>Specifies how many authoritative DNS answers were received at the Passive DNS server's
collectors with exactly the given set of values as answers (i.e. same data in the answer collectors with exactly the given set of values as answers (i.e. same data in the answer
set - compare with the uniqueness property in "Mandatory Fields"). The number of set - compare with the uniqueness property in "Mandatory Fields"). The number of
requests is expressed as a decimal value. This field is represented as a <xref requests is expressed as a decimal value. This field is represented as a <xref
@ -328,7 +371,7 @@ ws = *(
</section> </section>
<section title="Additional notes"> <section title="Additional notes">
<t>An implementer of a passive DNS Server MAY chose to either return time_first and <t>An implementer of a passive DNS server MAY chose to either return time_first and
time_last OR return zone_time_first and zone_time_last. In pseudocode: (time_first AND time_last OR return zone_time_first and zone_time_last. In pseudocode: (time_first AND
time_last) OR (zone_time_first AND zone_time_last). In this case, zone_time_{first,last} time_last) OR (zone_time_first AND zone_time_last). In this case, zone_time_{first,last}
replace the time_{first,last} fields. However, this is not encouraged since it might be replace the time_{first,last} fields. However, this is not encouraged since it might be
@ -337,13 +380,12 @@ ws = *(
</section> </section>
<section title="Suggested MIME Types"> <section title="Suggested MIME Types">
<t>An implementer of a passive DNS Server SHOULD serve a document in this Common Output <t>An implementer of a passive DNS server SHOULD serve a document in this Common Output
Format with a MIME header of "application/x-ndjson".</t> Format with a MIME header of "application/x-ndjson".</t>
</section> </section>
</section> </section>
<!-- This PI places the pagebreak correctly (before the section title) in the text output. --> <!-- This PI places the pagebreak correctly (before the section title) in the text output. -->
<?rfc needLines="8"?> <?rfc needLines="8"?>
<section anchor="Acknowledgements" title="Acknowledgements"> <section anchor="Acknowledgements" title="Acknowledgements">
@ -355,39 +397,53 @@ ws = *(
</section> </section>
<section anchor="Privacy" title="Privacy Considerations"> <section anchor="Privacy" title="Privacy Considerations">
<t>Passive DNS Servers capture DNS answers from multiple collection points ("sensors") which <t>Passive DNS servers capture DNS answers from multiple collection points ("sensors") which
are located on the Internet-facing side of DNS recursors ("post-recursor passive DNS"). In are located on the Internet-facing side of DNS recursors ("post-recursor passive DNS"). In
this process, they intentionally omit the source IP, source port, destination IP and this process, they intentionally omit the source IP, source port, destination IP and
destination port from the captured packets. Since the data is captured "post-recursor", the destination port from the captured packets. Since the data is captured "post-recursor", the
timing information (who queries what) is lost, since the recursor will cache the results. timing information (who queries what) is lost, since the recursor will cache the results.
Furthermore, since multiple sensors feed into a passive DNS server, the resulting data gets Furthermore, since multiple sensors feed into a passive DNS system, the resulting data gets
mixed together, reducing the likelihood that Passive DNS Servers are able to find out much mixed together, reducing the likelihood that Passive DNS systems are able to find out much
about the actual person querying the DNS records. In this sense, passive DNS Servers are about the actual person querying the DNS records. In this sense, passive DNS systems are
similar to keeping an archive of all previous phone books - if public DNS records can be similar to keeping an archive of all previous phone books - if public DNS records can be
compared to phone numbers - as they often are. Nevertheless, the authors strongly encourage compared to phone numbers - as they often are. Nevertheless, the authors strongly encourage
Passive DNS implementors to take special care of privacy issues. Passive DNS implementors to take special care of privacy issues. Finally, the overall
bortzmeyer-dnsop-dns-privacy is an excellent starting point for this. Finally, the overall
recommendations in <xref target="RFC6973">RFC6973</xref> should be taken into consideration recommendations in <xref target="RFC6973">RFC6973</xref> should be taken into consideration
when designing any application which uses Passive DNS data.</t> when designing any application which uses Passive DNS data.</t>
<t>Passive DNS attempts to collect information necessary for security (such as malware protection)
in as privacy protecting a manner as possible, and is intended to be
used instead of more invasive methods. It does this by only collecting
DNS cache-fill answers, and not any information associated with who caused the
name to be resolved, nor why the name was resolved. Nevertheless, it is possible that
this may still lead to privacy concerns - for example, if Passive DNS records show that
a recursive resolver resolved the name the-mary-and-john-smith-family.example.com, it may be
possible to infer that the Smith family is using that resolver. Operators of Passive DNS
servers should be aware of this and take appropriate steps to limit access to the data.</t>
<t>Passive DNS operators are encouraged to read and understand
<xref target="RFC7258">RFC7258</xref> </t>
<t>In the scope of the General Data Protection Regulation (GDPR - Directive 95/46/EC), <t>In the scope of the General Data Protection Regulation (GDPR - Directive 95/46/EC),
operators of Passive DNS Server needs to ensure the legal ground and lawfulness of its operators of Passive DNS server needs to ensure the legal ground and lawfulness of its
operation.</t> operation.</t>
</section> </section>
<section anchor="Security" title="Security Considerations"> <section anchor="Security" title="Security Considerations">
<t>In some cases, Passive DNS output might contain confidential information and its access <t>In some cases, Passive DNS output might contain confidential information and its access
might be restricted. When a user is querying multiple Passive DNS and aggregating the data, should be restricted. When a user is querying multiple Passive DNS and aggregating the data,
the sensitivity of the data must be considered.</t> the sensitivity of the data must be considered.</t>
</section> </section>
</middle> </middle>
<!-- *****BACK MATTER ***** --> <!-- *****BACK MATTER ***** -->
<back> <back>
<references title="Normative References"><!--?rfc
include="http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml"?--> &RFC2119; &RFC1035; &RFC1034; &RFC3912; &RFC4627;
&RFC3597; &RFC6648; &RFC2234; &RFC6973; &RFC3986; </references>
<references> <references>
<name>Normative References</name>
&RFC2119; &RFC1035; &RFC1034; &RFC3912; &RFC4627;
&RFC3597; &RFC6648; &RFC2234; &RFC6973; &RFC3986;
&RFC7258;
<reference anchor="WEIMERPDNS" <reference anchor="WEIMERPDNS"
target="http://www.enyo.de/fw/software/dnslogger/first2005-paper.pdf"> target="http://www.enyo.de/fw/software/dnslogger/first2005-paper.pdf">
<front> <front>
@ -397,14 +453,28 @@ ws = *(
</front> </front>
</reference> </reference>
<reference anchor="CACHEPOISONING" target="http://kurser.lobner.dk/dDist/DMK_BO2K8.pdf"> <reference anchor="PDNSCOF" target="https://github.com/D4-project/analyzer-d4-passivedns/">
<front> <front>
<title>Black ops 2008: It's the end of the cache as we know it.</title> <title>Passive DNS server interface using the common output format</title>
<author fullname="Dan Kaminsky" /> <author fullname="D4 Project, Alexandre Dulaunoy" />
<date year="2008" /> <date year="2019" />
</front> </front>
</reference> </reference>
<reference anchor="github_issue_17" target="https://github.com/adulau/pdns-qof/issues/17">
<front>
<title>Discussion on the existing implementations of returning either
zone_time{first,last} OR time_{first,last}</title>
<author fullname="Paul Vixie, Weizman, April, Kaplan, et.al" />
<date year="2020" />
</front>
</reference>
</references>
<references>
<name>Informative References</name>
&RFC7871;
<reference anchor="BAILIWICK" <reference anchor="BAILIWICK"
target="https://archive.farsightsecurity.com/Passive_DNS/passive_dns_hardening_handout.pdf"> target="https://archive.farsightsecurity.com/Passive_DNS/passive_dns_hardening_handout.pdf">
<front> <front>
@ -456,14 +526,6 @@ ws = *(
</front> </front>
</reference> </reference>
<reference anchor="PDNSCOF" target="https://github.com/D4-project/analyzer-d4-passivedns/">
<front>
<title>Passive DNS server interface using the common output format</title>
<author fullname="D4 Project, Alexandre Dulaunoy" />
<date year="2019" />
</front>
</reference>
<reference anchor="DNSDBQ" target="https://github.com/dnsdb/dnsdbq"> <reference anchor="DNSDBQ" target="https://github.com/dnsdb/dnsdbq">
<front> <front>
<title>DNSDB API Client, C Version</title> <title>DNSDB API Client, C Version</title>
@ -471,22 +533,6 @@ ws = *(
<date year="2018" /> <date year="2018" />
</front> </front>
</reference> </reference>
<reference anchor="github_issue_17" target="https://github.com/adulau/pdns-qof/issues/17">
<front>
<title>Discussion on the existing implementations of returning either
zone_time{first,last} OR time_{first,last}</title>
<author fullname="Paul Vixie, Weizman, April, Kaplan, et.al" />
<date year="2020" />
</front>
</reference>
</references>
<references title="Informative References">
<!-- Here we use entities that we defined at the beginning. -->
<!-- &I-D.narten-iana-considerations-rfc2434bis; -->
<!-- &I-D.draft-bortzmeyer-dnsop-dns-privacy; -->
</references> </references>
<section anchor="app-additional" title="Examples"> <section anchor="app-additional" title="Examples">