diff --git a/bin/finder.py b/bin/finder.py index 40f6b28..560931f 100644 --- a/bin/finder.py +++ b/bin/finder.py @@ -15,13 +15,14 @@ import git import json import sys import argparse +import typing parser = argparse.ArgumentParser(description = "Finding potential software vulnerabilities from git commit messages.", epilog = "More info: https://github.com/cve-search/git-vuln-finder") parser.add_argument("-v", help="increase output verbosity", action="store_true") parser.add_argument("-r", type=str, help="git repository to analyse") parser.add_argument("-o", type=str, help="Output format: [json]", default="json") parser.add_argument("-s", type=str, help="State of the commit found", default="under-review") -parser.add_argument("-p", type=str, help="Matching pattern to use: [vulnpatterns, cryptopatterns, cpatterns]", default="vulnpatterns") +parser.add_argument("-p", type=str, help="Matching pattern to use: [vulnpatterns, cryptopatterns, cpatterns] - the pattern 'all' is used to match all the patterns at once.", default="vulnpatterns") args = parser.parse_args() vulnpatterns = re.compile("(?i)(denial of service |\bXXE\b|remote code execution|\bopen redirect|OSVDB|\bvuln|\bCVE\b |\bXSS\b|\bReDoS\b|\bNVD\b|malicious|x−frame−options|attack|cross site |exploit|malicious|directory traversal |\bRCE\b|\bdos\b|\bXSRF \b|\bXSS\b|clickjack|session.fixation|hijack|\badvisory|\binsecure |security |\bcross−origin\b|unauthori[z|s]ed |infinite loop)") @@ -36,6 +37,8 @@ elif args.p == "cryptopatterns": defaultpattern = cryptopatterns elif args.p == "cpatterns": defaultpattern = cpatterns +elif args.p == "all": + defaultpattern = [vulnpatterns, cryptopatterns, cpatterns] else: parser.print_usage() parser.exit() @@ -63,11 +66,29 @@ def find_vuln(commit, pattern=vulnpatterns): ret['match'] = m.group(1) return ret else: - # print(commit.message) return None - # print("Nothing match") +def summary(commit, branch, pattern): + rcommit = commit + if rcommit.hexsha in potential_vulnerabilities: + potential_vulnerabilities[rcommit.hexsha]['branches'].append(branch) + else: + potential_vulnerabilities[rcommit.hexsha] = {} + potential_vulnerabilities[rcommit.hexsha]['message'] = rcommit.message + potential_vulnerabilities[rcommit.hexsha]['commit-id'] = rcommit.hexsha + potential_vulnerabilities[rcommit.hexsha]['summary'] = rcommit.summary + potential_vulnerabilities[rcommit.hexsha]['stats'] = rcommit.stats.total + potential_vulnerabilities[rcommit.hexsha]['author'] = rcommit.author.name + potential_vulnerabilities[rcommit.hexsha]['author-email'] = rcommit.author.email + potential_vulnerabilities[rcommit.hexsha]['authored_date'] = rcommit.authored_date + potential_vulnerabilities[rcommit.hexsha]['committed_date'] = rcommit.committed_date + potential_vulnerabilities[rcommit.hexsha]['branches'] = [] + potential_vulnerabilities[rcommit.hexsha]['branches'].append(branch) + potential_vulnerabilities[rcommit.hexsha]['pattern-selected'] = pattern.pattern + potential_vulnerabilities[rcommit.hexsha]['pattern-matches'] = ret['match'] + potential_vulnerabilities[rcommit.hexsha]['state'] = args.s + return rcommit.hexsha repo_heads = repo.heads repo_heads_names = [h.name for h in repo_heads] @@ -76,31 +97,25 @@ print(repo_heads_names, file=sys.stderr) for branch in repo_heads_names: commits = list(repo.iter_commits(branch)) + defaultpattern for commit in commits: - ret = find_vuln(commit, pattern=defaultpattern) - if ret: - #print("Vulnerability found: {}".format(ret)) - #print(ret.hexsha) - rcommit = ret['commit'] - # Deduplication of commits on different branches - if rcommit.hexsha in potential_vulnerabilities: - potential_vulnerabilities[rcommit.hexsha]['branches'].append(branch) - else: - potential_vulnerabilities[rcommit.hexsha] = {} - potential_vulnerabilities[rcommit.hexsha]['message'] = rcommit.message - potential_vulnerabilities[rcommit.hexsha]['commit-id'] = rcommit.hexsha - potential_vulnerabilities[rcommit.hexsha]['summary'] = rcommit.summary - potential_vulnerabilities[rcommit.hexsha]['stats'] = rcommit.stats.total - potential_vulnerabilities[rcommit.hexsha]['author'] = rcommit.author.name - potential_vulnerabilities[rcommit.hexsha]['author-email'] = rcommit.author.email - potential_vulnerabilities[rcommit.hexsha]['authored_date'] = rcommit.authored_date - potential_vulnerabilities[rcommit.hexsha]['committed_date'] = rcommit.committed_date - potential_vulnerabilities[rcommit.hexsha]['branches'] = [] - potential_vulnerabilities[rcommit.hexsha]['branches'].append(branch) - potential_vulnerabilities[rcommit.hexsha]['pattern-selected'] = args.p - potential_vulnerabilities[rcommit.hexsha]['pattern-matches'] = ret['match'] - potential_vulnerabilities[rcommit.hexsha]['state'] = args.s + if isinstance(defaultpattern, typing.Pattern): + ret = find_vuln(commit, pattern=defaultpattern) + if ret: + #print("Vulnerability found: {}".format(ret)) + #print(ret.hexsha) + rcommit = ret['commit'] + summary(rcommit, branch, defaultpattern) + # Deduplication of commits on different branches found += 1 + elif isinstance(defaultpattern, list): + for p in defaultpattern: + ret = find_vuln(commit, pattern=p) + if ret: + rcommit = ret['commit'] + summary(rcommit, branch, p) + found += 1 print(json.dumps(potential_vulnerabilities)) + print("Total potential vulnerability found in {} commit(s)".format(found), file=sys.stderr)