From dcb0fcc0a512ccc12902ce3831dd3f419093f0b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bonhomme?= Date: Fri, 3 Jan 2020 16:06:33 +0100 Subject: [PATCH] wip, but seems to be working as a cli --- README.md | 26 ++++- REQUIREMENTS | 2 - _config.yml | 1 - bin/finder.py | 169 ++++++------------------------- patterns/en/medium/c | 4 - patterns/en/medium/c.prefix | 1 - patterns/en/medium/c.suffix | 1 - patterns/en/medium/crypto | 55 ---------- patterns/en/medium/crypto.prefix | 1 - patterns/en/medium/crypto.suffix | 1 - patterns/en/medium/vuln | 30 ------ patterns/en/medium/vuln.prefix | 1 - patterns/en/medium/vuln.suffix | 1 - 13 files changed, 55 insertions(+), 238 deletions(-) delete mode 100644 REQUIREMENTS delete mode 100644 _config.yml delete mode 100644 patterns/en/medium/c delete mode 100644 patterns/en/medium/c.prefix delete mode 100644 patterns/en/medium/c.suffix delete mode 100644 patterns/en/medium/crypto delete mode 100644 patterns/en/medium/crypto.prefix delete mode 100644 patterns/en/medium/crypto.suffix delete mode 100644 patterns/en/medium/vuln delete mode 100644 patterns/en/medium/vuln.prefix delete mode 100644 patterns/en/medium/vuln.suffix diff --git a/README.md b/README.md index 5487ab7..e5cbd90 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,11 @@ ![git-vuln-finder logo](https://raw.githubusercontent.com/cve-search/git-vuln-finder/f22077452c37e110bff0564e1f7b34637dc726c3/doc/logos/git-vuln-finder-small.png) -Finding potential software vulnerabilities from git commit messages. The output format is a JSON with the associated commit which could contain a fix regarding a software vulnerability. The search is based on a set of regular expressions against the commit messages only. If CVE IDs are present, those are added automatically in the output. +Finding potential software vulnerabilities from git commit messages. +The output format is a JSON with the associated commit which could contain a +fix regarding a software vulnerability. The search is based on a set of regular +expressions against the commit messages only. If CVE IDs are present, those are +added automatically in the output. # Requirements @@ -10,6 +14,24 @@ Finding potential software vulnerabilities from git commit messages. The output - GitPython - langdetect + +# Installation + +## Use in your Python software + +~~~bash +$ poetry install git-vuln-finder +$ poetry shell +~~~ + +## Use as a command line tool + +~~~bash +$ pipx install git-vuln-finder +$ finder --help +~~~ + + # Usage ~~~bash @@ -166,5 +188,3 @@ collectively own this open source software. The contributors acknowledge the [De - [Notes](https://gist.github.com/adulau/dce5a6ca5c65017869bb01dfee576303#file-finding-vuln-git-commit-messages-md) - https://csce.ucmss.com/cr/books/2017/LFS/CSREA2017/ICA2077.pdf (mainly using CVE referenced in the commit message) - archive (http://archive.is/xep9o) - https://asankhaya.github.io/pdf/automated-identification-of-security-issues-from-commit-messages-and-bug-reports.pdf (2 main regexps) - - diff --git a/REQUIREMENTS b/REQUIREMENTS deleted file mode 100644 index be616c3..0000000 --- a/REQUIREMENTS +++ /dev/null @@ -1,2 +0,0 @@ -gitpython -langdetect diff --git a/_config.yml b/_config.yml deleted file mode 100644 index 2f7efbe..0000000 --- a/_config.yml +++ /dev/null @@ -1 +0,0 @@ -theme: jekyll-theme-minimal \ No newline at end of file diff --git a/bin/finder.py b/bin/finder.py index f7d79ab..9e26600 100644 --- a/bin/finder.py +++ b/bin/finder.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # -*- coding: utf-8 -*- # # Finding potential software vulnerabilities from git commit messages @@ -10,16 +10,21 @@ # Copyright (c) 2019 Alexandre Dulaunoy - a@foo.be -import os -import re import git import json import sys import argparse import typing -from langdetect import detect as langdetect -PATTERNS_PATH="../patterns" +from git_vuln_finder import ( + build_pattern, + get_patterns, + find_vuln, + summary, + extract_cve +) + +PATTERNS_PATH="./git_vuln_finder/patterns" parser = argparse.ArgumentParser(description = "Finding potential software vulnerabilities from git commit messages.", epilog = "More info: https://github.com/cve-search/git-vuln-finder") parser.add_argument("-v", help="increase output verbosity", action="store_true") @@ -32,63 +37,6 @@ parser.add_argument("-t", help="Include tags matching a specific commit", action args = parser.parse_args() -def build_pattern(pattern_file): - fp = open(pattern_file, "r") - rex = "" - try: - prefix_fp = open(pattern_file + ".prefix", "r") - rex += prefix_fp.read() - prefix_fp.close() - except: - pass - - for line in fp.readlines(): - rex += line.rstrip() + "|" - rex = rex[:-1] # We remove the extra '| - fp.close() - - try: - suffix_fp = open(pattern_file + ".suffix", "r") - rex += suffix_fp.read() - suffix_fp.close() - except: - pass - - return rex - -def get_patterns(patterns_path=PATTERNS_PATH): - patterns = {} - for root, dirs, files in os.walk(patterns_path): - path = root.split(os.sep) - for f in files: - if f.endswith(".prefix") or f.endswith(".suffix"): - continue - npath = root[len(patterns_path):].split(os.sep) - try: - npath.remove('') - except ValueError: - pass - - lang = npath[0] - severity = npath[1] - pattern_category = f - - try: # FIXME: Is there a better way? - a = patterns[lang] - except KeyError: - patterns[lang] = {} - try: - a = patterns[lang][severity] - except KeyError: - patterns[lang][severity] = {} - try: - a = patterns[lang][severity][pattern_category] - except KeyError: - rex = build_pattern(root + os.sep + f) - patterns[lang][severity][pattern_category] = re.compile(rex) - - return patterns - patterns = get_patterns() vulnpatterns = patterns["en"]["medium"]["vuln"] cryptopatterns = patterns["en"]["medium"]["crypto"] @@ -114,79 +62,12 @@ else: found = 0 -potential_vulnerabilities = {} +all_potential_vulnerabilities = {} cve_found = set() -def find_vuln(commit, pattern=vulnpatterns): - m = pattern.search(commit.message) - if m: - if args.v: - print("Match found: {}".format(m.group(0)), file=sys.stderr) - print(commit.message, file=sys.stderr) - print("---", file=sys.stderr) - ret = {} - ret['commit'] = commit - ret['match'] = m.groups() - return ret - else: - return None +def main(): + pass -def summary(commit, branch, pattern, origin=None): - rcommit = commit - cve = extract_cve(rcommit.message) - if origin is not None: - origin = origin - if origin.find('github.com'): - origin_github_api = origin.split(':')[1] - (org_name, repo_name) = origin_github_api.split('/', 1) - if repo_name.find('.git$'): - repo_name = re.sub(r".git$","", repo_name) - origin_github_api = 'https://api.github.com/repos/{}/{}/commits/{}'.format(org_name, repo_name, rcommit.hexsha) - - else: - origin = 'git origin unknown' - # deduplication if similar commits on different branches - if rcommit.hexsha in potential_vulnerabilities: - potential_vulnerabilities[rcommit.hexsha]['branches'].append(branch) - else: - potential_vulnerabilities[rcommit.hexsha] = {} - potential_vulnerabilities[rcommit.hexsha]['message'] = rcommit.message - potential_vulnerabilities[rcommit.hexsha]['language'] = langdetect(rcommit.message) - potential_vulnerabilities[rcommit.hexsha]['commit-id'] = rcommit.hexsha - potential_vulnerabilities[rcommit.hexsha]['summary'] = rcommit.summary - potential_vulnerabilities[rcommit.hexsha]['stats'] = rcommit.stats.total - potential_vulnerabilities[rcommit.hexsha]['author'] = rcommit.author.name - potential_vulnerabilities[rcommit.hexsha]['author-email'] = rcommit.author.email - potential_vulnerabilities[rcommit.hexsha]['authored_date'] = rcommit.authored_date - potential_vulnerabilities[rcommit.hexsha]['committed_date'] = rcommit.committed_date - potential_vulnerabilities[rcommit.hexsha]['branches'] = [] - potential_vulnerabilities[rcommit.hexsha]['branches'].append(branch) - potential_vulnerabilities[rcommit.hexsha]['pattern-selected'] = pattern.pattern - potential_vulnerabilities[rcommit.hexsha]['pattern-matches'] = ret['match'] - potential_vulnerabilities[rcommit.hexsha]['origin'] = origin - if origin_github_api: - potential_vulnerabilities[commit.hexsha]['origin-github-api'] = origin_github_api - potential_vulnerabilities[rcommit.hexsha]['tags'] = [] - if args.t: - if repo.commit(rcommit).hexsha in tagmap: - potential_vulnerabilities[rcommit.hexsha]['tags'] = tagmap[repo.commit(rcommit).hexsha] - if cve: potential_vulnerabilities[rcommit.hexsha]['cve'] = cve - if cve: - potential_vulnerabilities[rcommit.hexsha]['state'] = "cve-assigned" - else: - potential_vulnerabilities[rcommit.hexsha]['state'] = args.s - - return rcommit.hexsha - -def extract_cve(commit): - cve_find = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,7}', re.IGNORECASE) - m = cve_find.findall(commit) - if m: - for v in m: - cve_found.add(v) - return m - else: - return None repo_heads = repo.heads repo_heads_names = [h.name for h in repo_heads] @@ -202,20 +83,34 @@ for branch in repo_heads_names: defaultpattern for commit in commits: if isinstance(defaultpattern, typing.Pattern): - ret = find_vuln(commit, pattern=defaultpattern) + ret = find_vuln(commit, pattern=defaultpattern, versbose=args.v) if ret: rcommit = ret['commit'] - summary(rcommit, branch, defaultpattern, origin=origin) + _, potential_vulnerabilities = summary(rcommit, + branch, + defaultpattern, + origin=origin, + vuln_match=ret['match'], + tags_matching=args.t, + commit_state=args.s) + all_potential_vulnerabilities.update(potential_vulnerabilities) found += 1 elif isinstance(defaultpattern, list): for p in defaultpattern: - ret = find_vuln(commit, pattern=p) + ret = find_vuln(commit, pattern=p, versbose=args.v) if ret: rcommit = ret['commit'] - summary(rcommit, branch, p, origin=origin) + _, potential_vulnerabilities = summary(rcommit, + branch, + p, + origin=origin, + vuln_match=ret['match'], + tags_matching=args.t, + commit_state=args.s) + all_potential_vulnerabilities.update(potential_vulnerabilities) found += 1 if not args.c: - print(json.dumps(potential_vulnerabilities)) + print(json.dumps(all_potential_vulnerabilities)) elif args.c: print(json.dumps(list(cve_found))) diff --git a/patterns/en/medium/c b/patterns/en/medium/c deleted file mode 100644 index 42d4a36..0000000 --- a/patterns/en/medium/c +++ /dev/null @@ -1,4 +0,0 @@ -double[-| ]free -buffer overflow -double free -race[-| ]condition diff --git a/patterns/en/medium/c.prefix b/patterns/en/medium/c.prefix deleted file mode 100644 index 4a45df8..0000000 --- a/patterns/en/medium/c.prefix +++ /dev/null @@ -1 +0,0 @@ -(?i)( \ No newline at end of file diff --git a/patterns/en/medium/c.suffix b/patterns/en/medium/c.suffix deleted file mode 100644 index e8a0f87..0000000 --- a/patterns/en/medium/c.suffix +++ /dev/null @@ -1 +0,0 @@ -) \ No newline at end of file diff --git a/patterns/en/medium/crypto b/patterns/en/medium/crypto deleted file mode 100644 index b24e6c2..0000000 --- a/patterns/en/medium/crypto +++ /dev/null @@ -1,55 +0,0 @@ -assessment -lack of -bad -vulnerable -missing -unproper -unsuitable -breakable -broken -weak -incorrect -replace -assessment -pen([\s-]?)test -pentest -penetration([\s-]?)test -report -vulnerablity -replace -fix -issue -fixes -add -remove -check){s1,} - (crypto -cryptographic -cryptography -encipherement -encryption -ciphers -cipher -AES -DES -3DES -cipher -GPG -PGP -OpenSSL -SSH -wireguard -VPN -CBC -ECB -CTR -key[.|,|\s] -private([\s-]?)key -public([\s-]?)key size -length -strenght -generation -randomness -entropy -prng -rng diff --git a/patterns/en/medium/crypto.prefix b/patterns/en/medium/crypto.prefix deleted file mode 100644 index f5b5f62..0000000 --- a/patterns/en/medium/crypto.prefix +++ /dev/null @@ -1 +0,0 @@ -.*( \ No newline at end of file diff --git a/patterns/en/medium/crypto.suffix b/patterns/en/medium/crypto.suffix deleted file mode 100644 index 12953cd..0000000 --- a/patterns/en/medium/crypto.suffix +++ /dev/null @@ -1 +0,0 @@ -){1,} \ No newline at end of file diff --git a/patterns/en/medium/vuln b/patterns/en/medium/vuln deleted file mode 100644 index db3ca53..0000000 --- a/patterns/en/medium/vuln +++ /dev/null @@ -1,30 +0,0 @@ -denial of service -\bXXE\b -remote code execution -\bopen redirect -OSVDB -\bvuln -\bCVE\b -\bXSS\b -\bReDoS\b -\bNVD\b -malicious -x−frame−options -attack -cross site -exploit -malicious -directory traversal -\bRCE\b -\bdos\b -\bXSRF \b -\bXSS\b -clickjack -session.fixation -hijack -\badvisory -\binsecure -security -\bcross−origin\b -unauthori[z|s]ed -infinite loop \ No newline at end of file diff --git a/patterns/en/medium/vuln.prefix b/patterns/en/medium/vuln.prefix deleted file mode 100644 index 4a45df8..0000000 --- a/patterns/en/medium/vuln.prefix +++ /dev/null @@ -1 +0,0 @@ -(?i)( \ No newline at end of file diff --git a/patterns/en/medium/vuln.suffix b/patterns/en/medium/vuln.suffix deleted file mode 100644 index e8a0f87..0000000 --- a/patterns/en/medium/vuln.suffix +++ /dev/null @@ -1 +0,0 @@ -) \ No newline at end of file