mirror of
https://github.com/adulau/git-vuln-finder.git
synced 2024-11-22 18:17:08 +00:00
165 lines
5.5 KiB
Python
165 lines
5.5 KiB
Python
|
#!/usr/bin/env python
|
||
|
# -*- coding: utf-8 -*-
|
||
|
#
|
||
|
# Finding potential software vulnerabilities from git commit messages
|
||
|
#
|
||
|
# Software is free software released under the "GNU Affero General Public License v3.0"
|
||
|
#
|
||
|
# This software is part of cve-search.org
|
||
|
#
|
||
|
# Copyright (c) 2019 Alexandre Dulaunoy - a@foo.be
|
||
|
|
||
|
|
||
|
import os
|
||
|
import re
|
||
|
import git
|
||
|
import json
|
||
|
import sys
|
||
|
import typing
|
||
|
from langdetect import detect as langdetect
|
||
|
|
||
|
|
||
|
PATTERNS_PATH="./git_vuln_finder/patterns"
|
||
|
|
||
|
|
||
|
def build_pattern(pattern_file):
|
||
|
fp = open(pattern_file, "r")
|
||
|
rex = ""
|
||
|
try:
|
||
|
prefix_fp = open(pattern_file + ".prefix", "r")
|
||
|
rex += prefix_fp.read()
|
||
|
prefix_fp.close()
|
||
|
except:
|
||
|
pass
|
||
|
|
||
|
for line in fp.readlines():
|
||
|
rex += line.rstrip() + "|"
|
||
|
rex = rex[:-1] # We remove the extra '|
|
||
|
fp.close()
|
||
|
|
||
|
try:
|
||
|
suffix_fp = open(pattern_file + ".suffix", "r")
|
||
|
rex += suffix_fp.read()
|
||
|
suffix_fp.close()
|
||
|
except:
|
||
|
pass
|
||
|
|
||
|
return rex
|
||
|
|
||
|
|
||
|
def get_patterns(patterns_path=PATTERNS_PATH):
|
||
|
patterns = {}
|
||
|
for root, dirs, files in os.walk(patterns_path):
|
||
|
path = root.split(os.sep)
|
||
|
for f in files:
|
||
|
if f.endswith(".prefix") or f.endswith(".suffix"):
|
||
|
continue
|
||
|
npath = root[len(patterns_path):].split(os.sep)
|
||
|
try:
|
||
|
npath.remove('')
|
||
|
except ValueError:
|
||
|
pass
|
||
|
|
||
|
lang = npath[0]
|
||
|
severity = npath[1]
|
||
|
pattern_category = f
|
||
|
|
||
|
try: # FIXME: Is there a better way?
|
||
|
a = patterns[lang]
|
||
|
except KeyError:
|
||
|
patterns[lang] = {}
|
||
|
try:
|
||
|
a = patterns[lang][severity]
|
||
|
except KeyError:
|
||
|
patterns[lang][severity] = {}
|
||
|
try:
|
||
|
a = patterns[lang][severity][pattern_category]
|
||
|
except KeyError:
|
||
|
rex = build_pattern(root + os.sep + f)
|
||
|
patterns[lang][severity][pattern_category] = re.compile(rex)
|
||
|
|
||
|
return patterns
|
||
|
|
||
|
|
||
|
def find_vuln(commit, pattern, versbose=False):
|
||
|
m = pattern.search(commit.message)
|
||
|
if m:
|
||
|
if versbose:
|
||
|
print("Match found: {}".format(m.group(0)), file=sys.stderr)
|
||
|
print(commit.message, file=sys.stderr)
|
||
|
print("---", file=sys.stderr)
|
||
|
ret = {}
|
||
|
ret['commit'] = commit
|
||
|
ret['match'] = m.groups()
|
||
|
return ret
|
||
|
else:
|
||
|
return None
|
||
|
|
||
|
|
||
|
def summary(commit,
|
||
|
branch,
|
||
|
pattern,
|
||
|
origin=None,
|
||
|
vuln_match=None,
|
||
|
tags_matching=False,
|
||
|
commit_state="under-review"
|
||
|
):
|
||
|
potential_vulnerabilities = {}
|
||
|
rcommit = commit
|
||
|
cve = extract_cve(rcommit.message)
|
||
|
if origin is not None:
|
||
|
origin = origin
|
||
|
if origin.find('github.com'):
|
||
|
origin_github_api = origin.split(':')[1]
|
||
|
(org_name, repo_name) = origin_github_api.split('/', 1)
|
||
|
if repo_name.find('.git$'):
|
||
|
repo_name = re.sub(r".git$","", repo_name)
|
||
|
origin_github_api = 'https://api.github.com/repos/{}/{}/commits/{}'.format(org_name, repo_name, rcommit.hexsha)
|
||
|
|
||
|
else:
|
||
|
origin = 'git origin unknown'
|
||
|
# deduplication if similar commits on different branches
|
||
|
if rcommit.hexsha in potential_vulnerabilities:
|
||
|
potential_vulnerabilities[rcommit.hexsha]['branches'].append(branch)
|
||
|
else:
|
||
|
potential_vulnerabilities[rcommit.hexsha] = {}
|
||
|
potential_vulnerabilities[rcommit.hexsha]['message'] = rcommit.message
|
||
|
potential_vulnerabilities[rcommit.hexsha]['language'] = langdetect(rcommit.message)
|
||
|
potential_vulnerabilities[rcommit.hexsha]['commit-id'] = rcommit.hexsha
|
||
|
potential_vulnerabilities[rcommit.hexsha]['summary'] = rcommit.summary
|
||
|
potential_vulnerabilities[rcommit.hexsha]['stats'] = rcommit.stats.total
|
||
|
potential_vulnerabilities[rcommit.hexsha]['author'] = rcommit.author.name
|
||
|
potential_vulnerabilities[rcommit.hexsha]['author-email'] = rcommit.author.email
|
||
|
potential_vulnerabilities[rcommit.hexsha]['authored_date'] = rcommit.authored_date
|
||
|
potential_vulnerabilities[rcommit.hexsha]['committed_date'] = rcommit.committed_date
|
||
|
potential_vulnerabilities[rcommit.hexsha]['branches'] = []
|
||
|
potential_vulnerabilities[rcommit.hexsha]['branches'].append(branch)
|
||
|
potential_vulnerabilities[rcommit.hexsha]['pattern-selected'] = pattern.pattern
|
||
|
potential_vulnerabilities[rcommit.hexsha]['pattern-matches'] = vuln_match
|
||
|
potential_vulnerabilities[rcommit.hexsha]['origin'] = origin
|
||
|
if origin_github_api:
|
||
|
potential_vulnerabilities[commit.hexsha]['origin-github-api'] = origin_github_api
|
||
|
potential_vulnerabilities[rcommit.hexsha]['tags'] = []
|
||
|
if tags_matching:
|
||
|
if repo.commit(rcommit).hexsha in tagmap:
|
||
|
potential_vulnerabilities[rcommit.hexsha]['tags'] = tagmap[repo.commit(rcommit).hexsha]
|
||
|
if cve: potential_vulnerabilities[rcommit.hexsha]['cve'] = cve
|
||
|
if cve:
|
||
|
potential_vulnerabilities[rcommit.hexsha]['state'] = "cve-assigned"
|
||
|
else:
|
||
|
potential_vulnerabilities[rcommit.hexsha]['state'] = commit_state
|
||
|
|
||
|
return rcommit.hexsha, potential_vulnerabilities
|
||
|
|
||
|
|
||
|
def extract_cve(commit):
|
||
|
cve_found = set()
|
||
|
cve_find = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,7}', re.IGNORECASE)
|
||
|
m = cve_find.findall(commit)
|
||
|
if m:
|
||
|
for v in m:
|
||
|
cve_found.add(v)
|
||
|
return m
|
||
|
else:
|
||
|
return None
|