From 2333b3284918d6c3f29bd6846a59a341260cf7ac Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Sun, 3 Nov 2019 15:26:43 +0100 Subject: [PATCH] new: [git-vuln-finder] Initial release of git-vuln-finder Finding potential software vulnerabilities from git commit messages. First release based on a set of regexp patterns (to be improved). --- README.md | 80 ++++++++++++++++++++++++++++++++++++++++++++++ REQUIREMENTS | 1 + bin/finder.py | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 169 insertions(+) create mode 100644 README.md create mode 100644 REQUIREMENTS create mode 100644 bin/finder.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..a0f9553 --- /dev/null +++ b/README.md @@ -0,0 +1,80 @@ +# git-vuln-finder + +Finding potential software vulnerabilities from git commit messages. + +# Requirements + +- Python 3 +- GitPython + +# Usage + +~~~bash +usage: finder.py [-h] [-v] [-r R] [-o O] + +Finding potential software vulnerabilities from git commit messages. + +optional arguments: + -h, --help show this help message and exit + -v increase output verbosity + -r R git repository to analyse + -o O Output format: [json] + +More info: https://github.com/cve-search/git-vuln-finder +~~~ + +~~~bash +python3 finder.py -r /home/adulau/git/curl | jq . +... + "6df916d751e72fc9a1febc07bb59c4ddd886c043": { + "message": "loadlibrary: Only load system DLLs from the system directory\n\nInspiration provided by: Daniel Stenberg and Ray Satiro\n\nBug: https://curl.haxx.se/docs/adv_20160530.html\n\nRef: Windows DLL hijacking with curl, CVE-2016-4802\n", + "commit-id": "6df916d751e72fc9a1febc07bb59c4ddd886c043", + "summary": "loadlibrary: Only load system DLLs from the system directory", + "stats": { + "insertions": 180, + "deletions": 8, + "lines": 188, + "files": 7 + }, + "author": "Steve Holme", + "author-email": "steve_holme@hotmail.com", + "authored_date": 1464555460, + "committed_date": 1464588867, + "branches": [ + "master" + ], + "pattern-matches": "hijack" + }, + "c2b3f264cb5210f82bdc84a3b89250a611b68dd3": { + "message": "CONNECT_ONLY: don't close connection on GSS 401/407 reponses\n\nPreviously, connections were closed immediately before the user had a\nchance to extract the socket when the proxy required Negotiate\nauthentication.\n\nThis regression was brought in with the security fix in commit\n79b9d5f1a42578f\n\nCloses #655\n", + "commit-id": "c2b3f264cb5210f82bdc84a3b89250a611b68dd3", + "summary": "CONNECT_ONLY: don't close connection on GSS 401/407 reponses", + "stats": { + "insertions": 4, + "deletions": 2, + "lines": 6, + "files": 1 + }, + "author": "Marcel Raad", + "author-email": "raad@teamviewer.com", + "authored_date": 1455523116, + "committed_date": 1461704516, + "branches": [ + "master" + ], + "pattern-matches": "security " + }, +... +~~~ + +# License + +This software is free software and licensed under the AGPL version 3. + +# References + +- [Notes](https://gist.github.com/adulau/dce5a6ca5c65017869bb01dfee576303#file-finding-vuln-git-commit-messages-md) +- https://csce.ucmss.com/cr/books/2017/LFS/CSREA2017/ICA2077.pdf (mainly using CVE referenced in the commit message) - archive (http://archive.is/xep9o) +- https://asankhaya.github.io/pdf/automated-identification-of-security-issues-from-commit-messages-and-bug-reports.pdf (2 main regexps) + + diff --git a/REQUIREMENTS b/REQUIREMENTS new file mode 100644 index 0000000..59348f9 --- /dev/null +++ b/REQUIREMENTS @@ -0,0 +1 @@ +gitpython diff --git a/bin/finder.py b/bin/finder.py new file mode 100644 index 0000000..4c2e130 --- /dev/null +++ b/bin/finder.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Finding potential software vulnerabilities from git commit messages +# +# Software is free software released under the "GNU Affero General Public License v3.0" +# +# This software is part of cve-search.org +# +# Copyright (c) 2019 Alexandre Dulaunoy - a@foo.be + + +import re +import git +import json +import sys +import argparse + +parser = argparse.ArgumentParser(description = "Finding potential software vulnerabilities from git commit messages.", epilog = "More info: https://github.com/cve-search/git-vuln-finder") +parser.add_argument("-v", help="increase output verbosity", action="store_true") +parser.add_argument("-r", type=str, help="git repository to analyse") +parser.add_argument("-o", type=str, help="Output format: [json]", default="json") +args = parser.parse_args() + +vulnpatterns = re.compile("(?i)(denial.o f .service |\bXX E\b|remote.code.execution|\bopen.redirect|OSVDB|\bvuln|\bCVE\b |\bXSS\b|\bReDoS\b|\bNVD\b|malicious|x − f rame − options|attack|cross.site |exploit|malicious|directory. traversal |\bRCE\b|\bdos\b|\bXSRF \b|\bXSS\b|clickjack|session.fixation|hijack|\badvisory|\binsecure |security |\bcross − oriдin\b|unauthori[z|s]ed |in finite.loop)") + +if not args.r: + parser.print_usage() + parser.exit() +else: + repo = git.Repo(args.r) + + +found = 0 +potential_vulnerabilities = {} + + +def find_vuln(commit): + m = vulnpatterns.search(commit.message) + if m: + if args.v: + print("Match found: {}".format(m.group(0)), file=sys.stderr) + print(commit.message, file=sys.stderr) + print("---", file=sys.stderr) + ret = {} + ret['commit'] = commit + ret['match'] = m.group(1) + return ret + else: + # print(commit.message) + return None + # print("Nothing match") + + +repo_heads = repo.heads +repo_heads_names = [h.name for h in repo_heads] +print(repo_heads_names, file=sys.stderr) + + +for branch in repo_heads_names: + commits = list(repo.iter_commits(branch)) + + for commit in commits: + ret = find_vuln(commit) + if ret: + #print("Vulnerability found: {}".format(ret)) + #print(ret.hexsha) + rcommit = ret['commit'] + # Deduplication of commits on different branches + if rcommit.hexsha in potential_vulnerabilities: + potential_vulnerabilities[rcommit.hexsha]['branches'].append(branch) + else: + potential_vulnerabilities[rcommit.hexsha] = {} + potential_vulnerabilities[rcommit.hexsha]['message'] = rcommit.message + potential_vulnerabilities[rcommit.hexsha]['commit-id'] = rcommit.hexsha + potential_vulnerabilities[rcommit.hexsha]['summary'] = rcommit.summary + potential_vulnerabilities[rcommit.hexsha]['stats'] = rcommit.stats.total + potential_vulnerabilities[rcommit.hexsha]['author'] = rcommit.author.name + potential_vulnerabilities[rcommit.hexsha]['author-email'] = rcommit.author.email + potential_vulnerabilities[rcommit.hexsha]['authored_date'] = rcommit.authored_date + potential_vulnerabilities[rcommit.hexsha]['committed_date'] = rcommit.committed_date + potential_vulnerabilities[rcommit.hexsha]['branches'] = [] + potential_vulnerabilities[rcommit.hexsha]['branches'].append(branch) + potential_vulnerabilities[rcommit.hexsha]['pattern-matches'] = ret['match'] + found += 1 + +print(json.dumps(potential_vulnerabilities)) +print("Total potential vulnerability found in {} commit(s)".format(found), file=sys.stderr)