mirror of
https://github.com/adulau/git-vuln-finder.git
synced 2024-11-22 10:07:11 +00:00
wip, but seems to be working as a cli
This commit is contained in:
parent
01cedee91d
commit
dcb0fcc0a5
13 changed files with 55 additions and 238 deletions
26
README.md
26
README.md
|
@ -2,7 +2,11 @@
|
||||||
|
|
||||||
![git-vuln-finder logo](https://raw.githubusercontent.com/cve-search/git-vuln-finder/f22077452c37e110bff0564e1f7b34637dc726c3/doc/logos/git-vuln-finder-small.png)
|
![git-vuln-finder logo](https://raw.githubusercontent.com/cve-search/git-vuln-finder/f22077452c37e110bff0564e1f7b34637dc726c3/doc/logos/git-vuln-finder-small.png)
|
||||||
|
|
||||||
Finding potential software vulnerabilities from git commit messages. The output format is a JSON with the associated commit which could contain a fix regarding a software vulnerability. The search is based on a set of regular expressions against the commit messages only. If CVE IDs are present, those are added automatically in the output.
|
Finding potential software vulnerabilities from git commit messages.
|
||||||
|
The output format is a JSON with the associated commit which could contain a
|
||||||
|
fix regarding a software vulnerability. The search is based on a set of regular
|
||||||
|
expressions against the commit messages only. If CVE IDs are present, those are
|
||||||
|
added automatically in the output.
|
||||||
|
|
||||||
# Requirements
|
# Requirements
|
||||||
|
|
||||||
|
@ -10,6 +14,24 @@ Finding potential software vulnerabilities from git commit messages. The output
|
||||||
- GitPython
|
- GitPython
|
||||||
- langdetect
|
- langdetect
|
||||||
|
|
||||||
|
|
||||||
|
# Installation
|
||||||
|
|
||||||
|
## Use in your Python software
|
||||||
|
|
||||||
|
~~~bash
|
||||||
|
$ poetry install git-vuln-finder
|
||||||
|
$ poetry shell
|
||||||
|
~~~
|
||||||
|
|
||||||
|
## Use as a command line tool
|
||||||
|
|
||||||
|
~~~bash
|
||||||
|
$ pipx install git-vuln-finder
|
||||||
|
$ finder --help
|
||||||
|
~~~
|
||||||
|
|
||||||
|
|
||||||
# Usage
|
# Usage
|
||||||
|
|
||||||
~~~bash
|
~~~bash
|
||||||
|
@ -166,5 +188,3 @@ collectively own this open source software. The contributors acknowledge the [De
|
||||||
- [Notes](https://gist.github.com/adulau/dce5a6ca5c65017869bb01dfee576303#file-finding-vuln-git-commit-messages-md)
|
- [Notes](https://gist.github.com/adulau/dce5a6ca5c65017869bb01dfee576303#file-finding-vuln-git-commit-messages-md)
|
||||||
- https://csce.ucmss.com/cr/books/2017/LFS/CSREA2017/ICA2077.pdf (mainly using CVE referenced in the commit message) - archive (http://archive.is/xep9o)
|
- https://csce.ucmss.com/cr/books/2017/LFS/CSREA2017/ICA2077.pdf (mainly using CVE referenced in the commit message) - archive (http://archive.is/xep9o)
|
||||||
- https://asankhaya.github.io/pdf/automated-identification-of-security-issues-from-commit-messages-and-bug-reports.pdf (2 main regexps)
|
- https://asankhaya.github.io/pdf/automated-identification-of-security-issues-from-commit-messages-and-bug-reports.pdf (2 main regexps)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,2 +0,0 @@
|
||||||
gitpython
|
|
||||||
langdetect
|
|
|
@ -1 +0,0 @@
|
||||||
theme: jekyll-theme-minimal
|
|
169
bin/finder.py
169
bin/finder.py
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# Finding potential software vulnerabilities from git commit messages
|
# Finding potential software vulnerabilities from git commit messages
|
||||||
|
@ -10,16 +10,21 @@
|
||||||
# Copyright (c) 2019 Alexandre Dulaunoy - a@foo.be
|
# Copyright (c) 2019 Alexandre Dulaunoy - a@foo.be
|
||||||
|
|
||||||
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import git
|
import git
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
import typing
|
import typing
|
||||||
from langdetect import detect as langdetect
|
|
||||||
|
|
||||||
PATTERNS_PATH="../patterns"
|
from git_vuln_finder import (
|
||||||
|
build_pattern,
|
||||||
|
get_patterns,
|
||||||
|
find_vuln,
|
||||||
|
summary,
|
||||||
|
extract_cve
|
||||||
|
)
|
||||||
|
|
||||||
|
PATTERNS_PATH="./git_vuln_finder/patterns"
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description = "Finding potential software vulnerabilities from git commit messages.", epilog = "More info: https://github.com/cve-search/git-vuln-finder")
|
parser = argparse.ArgumentParser(description = "Finding potential software vulnerabilities from git commit messages.", epilog = "More info: https://github.com/cve-search/git-vuln-finder")
|
||||||
parser.add_argument("-v", help="increase output verbosity", action="store_true")
|
parser.add_argument("-v", help="increase output verbosity", action="store_true")
|
||||||
|
@ -32,63 +37,6 @@ parser.add_argument("-t", help="Include tags matching a specific commit", action
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def build_pattern(pattern_file):
|
|
||||||
fp = open(pattern_file, "r")
|
|
||||||
rex = ""
|
|
||||||
try:
|
|
||||||
prefix_fp = open(pattern_file + ".prefix", "r")
|
|
||||||
rex += prefix_fp.read()
|
|
||||||
prefix_fp.close()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
for line in fp.readlines():
|
|
||||||
rex += line.rstrip() + "|"
|
|
||||||
rex = rex[:-1] # We remove the extra '|
|
|
||||||
fp.close()
|
|
||||||
|
|
||||||
try:
|
|
||||||
suffix_fp = open(pattern_file + ".suffix", "r")
|
|
||||||
rex += suffix_fp.read()
|
|
||||||
suffix_fp.close()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return rex
|
|
||||||
|
|
||||||
def get_patterns(patterns_path=PATTERNS_PATH):
|
|
||||||
patterns = {}
|
|
||||||
for root, dirs, files in os.walk(patterns_path):
|
|
||||||
path = root.split(os.sep)
|
|
||||||
for f in files:
|
|
||||||
if f.endswith(".prefix") or f.endswith(".suffix"):
|
|
||||||
continue
|
|
||||||
npath = root[len(patterns_path):].split(os.sep)
|
|
||||||
try:
|
|
||||||
npath.remove('')
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
lang = npath[0]
|
|
||||||
severity = npath[1]
|
|
||||||
pattern_category = f
|
|
||||||
|
|
||||||
try: # FIXME: Is there a better way?
|
|
||||||
a = patterns[lang]
|
|
||||||
except KeyError:
|
|
||||||
patterns[lang] = {}
|
|
||||||
try:
|
|
||||||
a = patterns[lang][severity]
|
|
||||||
except KeyError:
|
|
||||||
patterns[lang][severity] = {}
|
|
||||||
try:
|
|
||||||
a = patterns[lang][severity][pattern_category]
|
|
||||||
except KeyError:
|
|
||||||
rex = build_pattern(root + os.sep + f)
|
|
||||||
patterns[lang][severity][pattern_category] = re.compile(rex)
|
|
||||||
|
|
||||||
return patterns
|
|
||||||
|
|
||||||
patterns = get_patterns()
|
patterns = get_patterns()
|
||||||
vulnpatterns = patterns["en"]["medium"]["vuln"]
|
vulnpatterns = patterns["en"]["medium"]["vuln"]
|
||||||
cryptopatterns = patterns["en"]["medium"]["crypto"]
|
cryptopatterns = patterns["en"]["medium"]["crypto"]
|
||||||
|
@ -114,79 +62,12 @@ else:
|
||||||
|
|
||||||
|
|
||||||
found = 0
|
found = 0
|
||||||
potential_vulnerabilities = {}
|
all_potential_vulnerabilities = {}
|
||||||
cve_found = set()
|
cve_found = set()
|
||||||
|
|
||||||
def find_vuln(commit, pattern=vulnpatterns):
|
def main():
|
||||||
m = pattern.search(commit.message)
|
pass
|
||||||
if m:
|
|
||||||
if args.v:
|
|
||||||
print("Match found: {}".format(m.group(0)), file=sys.stderr)
|
|
||||||
print(commit.message, file=sys.stderr)
|
|
||||||
print("---", file=sys.stderr)
|
|
||||||
ret = {}
|
|
||||||
ret['commit'] = commit
|
|
||||||
ret['match'] = m.groups()
|
|
||||||
return ret
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def summary(commit, branch, pattern, origin=None):
|
|
||||||
rcommit = commit
|
|
||||||
cve = extract_cve(rcommit.message)
|
|
||||||
if origin is not None:
|
|
||||||
origin = origin
|
|
||||||
if origin.find('github.com'):
|
|
||||||
origin_github_api = origin.split(':')[1]
|
|
||||||
(org_name, repo_name) = origin_github_api.split('/', 1)
|
|
||||||
if repo_name.find('.git$'):
|
|
||||||
repo_name = re.sub(r".git$","", repo_name)
|
|
||||||
origin_github_api = 'https://api.github.com/repos/{}/{}/commits/{}'.format(org_name, repo_name, rcommit.hexsha)
|
|
||||||
|
|
||||||
else:
|
|
||||||
origin = 'git origin unknown'
|
|
||||||
# deduplication if similar commits on different branches
|
|
||||||
if rcommit.hexsha in potential_vulnerabilities:
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['branches'].append(branch)
|
|
||||||
else:
|
|
||||||
potential_vulnerabilities[rcommit.hexsha] = {}
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['message'] = rcommit.message
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['language'] = langdetect(rcommit.message)
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['commit-id'] = rcommit.hexsha
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['summary'] = rcommit.summary
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['stats'] = rcommit.stats.total
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['author'] = rcommit.author.name
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['author-email'] = rcommit.author.email
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['authored_date'] = rcommit.authored_date
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['committed_date'] = rcommit.committed_date
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['branches'] = []
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['branches'].append(branch)
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['pattern-selected'] = pattern.pattern
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['pattern-matches'] = ret['match']
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['origin'] = origin
|
|
||||||
if origin_github_api:
|
|
||||||
potential_vulnerabilities[commit.hexsha]['origin-github-api'] = origin_github_api
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['tags'] = []
|
|
||||||
if args.t:
|
|
||||||
if repo.commit(rcommit).hexsha in tagmap:
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['tags'] = tagmap[repo.commit(rcommit).hexsha]
|
|
||||||
if cve: potential_vulnerabilities[rcommit.hexsha]['cve'] = cve
|
|
||||||
if cve:
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['state'] = "cve-assigned"
|
|
||||||
else:
|
|
||||||
potential_vulnerabilities[rcommit.hexsha]['state'] = args.s
|
|
||||||
|
|
||||||
return rcommit.hexsha
|
|
||||||
|
|
||||||
def extract_cve(commit):
|
|
||||||
cve_find = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,7}', re.IGNORECASE)
|
|
||||||
m = cve_find.findall(commit)
|
|
||||||
if m:
|
|
||||||
for v in m:
|
|
||||||
cve_found.add(v)
|
|
||||||
return m
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
repo_heads = repo.heads
|
repo_heads = repo.heads
|
||||||
repo_heads_names = [h.name for h in repo_heads]
|
repo_heads_names = [h.name for h in repo_heads]
|
||||||
|
@ -202,20 +83,34 @@ for branch in repo_heads_names:
|
||||||
defaultpattern
|
defaultpattern
|
||||||
for commit in commits:
|
for commit in commits:
|
||||||
if isinstance(defaultpattern, typing.Pattern):
|
if isinstance(defaultpattern, typing.Pattern):
|
||||||
ret = find_vuln(commit, pattern=defaultpattern)
|
ret = find_vuln(commit, pattern=defaultpattern, versbose=args.v)
|
||||||
if ret:
|
if ret:
|
||||||
rcommit = ret['commit']
|
rcommit = ret['commit']
|
||||||
summary(rcommit, branch, defaultpattern, origin=origin)
|
_, potential_vulnerabilities = summary(rcommit,
|
||||||
|
branch,
|
||||||
|
defaultpattern,
|
||||||
|
origin=origin,
|
||||||
|
vuln_match=ret['match'],
|
||||||
|
tags_matching=args.t,
|
||||||
|
commit_state=args.s)
|
||||||
|
all_potential_vulnerabilities.update(potential_vulnerabilities)
|
||||||
found += 1
|
found += 1
|
||||||
elif isinstance(defaultpattern, list):
|
elif isinstance(defaultpattern, list):
|
||||||
for p in defaultpattern:
|
for p in defaultpattern:
|
||||||
ret = find_vuln(commit, pattern=p)
|
ret = find_vuln(commit, pattern=p, versbose=args.v)
|
||||||
if ret:
|
if ret:
|
||||||
rcommit = ret['commit']
|
rcommit = ret['commit']
|
||||||
summary(rcommit, branch, p, origin=origin)
|
_, potential_vulnerabilities = summary(rcommit,
|
||||||
|
branch,
|
||||||
|
p,
|
||||||
|
origin=origin,
|
||||||
|
vuln_match=ret['match'],
|
||||||
|
tags_matching=args.t,
|
||||||
|
commit_state=args.s)
|
||||||
|
all_potential_vulnerabilities.update(potential_vulnerabilities)
|
||||||
found += 1
|
found += 1
|
||||||
if not args.c:
|
if not args.c:
|
||||||
print(json.dumps(potential_vulnerabilities))
|
print(json.dumps(all_potential_vulnerabilities))
|
||||||
elif args.c:
|
elif args.c:
|
||||||
print(json.dumps(list(cve_found)))
|
print(json.dumps(list(cve_found)))
|
||||||
|
|
||||||
|
|
|
@ -1,4 +0,0 @@
|
||||||
double[-| ]free
|
|
||||||
buffer overflow
|
|
||||||
double free
|
|
||||||
race[-| ]condition
|
|
|
@ -1 +0,0 @@
|
||||||
(?i)(
|
|
|
@ -1 +0,0 @@
|
||||||
)
|
|
|
@ -1,55 +0,0 @@
|
||||||
assessment
|
|
||||||
lack of
|
|
||||||
bad
|
|
||||||
vulnerable
|
|
||||||
missing
|
|
||||||
unproper
|
|
||||||
unsuitable
|
|
||||||
breakable
|
|
||||||
broken
|
|
||||||
weak
|
|
||||||
incorrect
|
|
||||||
replace
|
|
||||||
assessment
|
|
||||||
pen([\s-]?)test
|
|
||||||
pentest
|
|
||||||
penetration([\s-]?)test
|
|
||||||
report
|
|
||||||
vulnerablity
|
|
||||||
replace
|
|
||||||
fix
|
|
||||||
issue
|
|
||||||
fixes
|
|
||||||
add
|
|
||||||
remove
|
|
||||||
check){s1,}
|
|
||||||
(crypto
|
|
||||||
cryptographic
|
|
||||||
cryptography
|
|
||||||
encipherement
|
|
||||||
encryption
|
|
||||||
ciphers
|
|
||||||
cipher
|
|
||||||
AES
|
|
||||||
DES
|
|
||||||
3DES
|
|
||||||
cipher
|
|
||||||
GPG
|
|
||||||
PGP
|
|
||||||
OpenSSL
|
|
||||||
SSH
|
|
||||||
wireguard
|
|
||||||
VPN
|
|
||||||
CBC
|
|
||||||
ECB
|
|
||||||
CTR
|
|
||||||
key[.|,|\s]
|
|
||||||
private([\s-]?)key
|
|
||||||
public([\s-]?)key size
|
|
||||||
length
|
|
||||||
strenght
|
|
||||||
generation
|
|
||||||
randomness
|
|
||||||
entropy
|
|
||||||
prng
|
|
||||||
rng
|
|
|
@ -1 +0,0 @@
|
||||||
.*(
|
|
|
@ -1 +0,0 @@
|
||||||
){1,}
|
|
|
@ -1,30 +0,0 @@
|
||||||
denial of service
|
|
||||||
\bXXE\b
|
|
||||||
remote code execution
|
|
||||||
\bopen redirect
|
|
||||||
OSVDB
|
|
||||||
\bvuln
|
|
||||||
\bCVE\b
|
|
||||||
\bXSS\b
|
|
||||||
\bReDoS\b
|
|
||||||
\bNVD\b
|
|
||||||
malicious
|
|
||||||
x−frame−options
|
|
||||||
attack
|
|
||||||
cross site
|
|
||||||
exploit
|
|
||||||
malicious
|
|
||||||
directory traversal
|
|
||||||
\bRCE\b
|
|
||||||
\bdos\b
|
|
||||||
\bXSRF \b
|
|
||||||
\bXSS\b
|
|
||||||
clickjack
|
|
||||||
session.fixation
|
|
||||||
hijack
|
|
||||||
\badvisory
|
|
||||||
\binsecure
|
|
||||||
security
|
|
||||||
\bcross−origin\b
|
|
||||||
unauthori[z|s]ed
|
|
||||||
infinite loop
|
|
|
@ -1 +0,0 @@
|
||||||
(?i)(
|
|
|
@ -1 +0,0 @@
|
||||||
)
|
|
Loading…
Reference in a new issue