mirror of
https://github.com/adulau/git-vuln-finder.git
synced 2024-11-22 10:07:11 +00:00
195 lines
7.1 KiB
Python
195 lines
7.1 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
#
|
|
# Finding potential software vulnerabilities from git commit messages
|
|
#
|
|
# Software is free software released under the "GNU Affero General Public License v3.0"
|
|
#
|
|
# This software is part of cve-search.org
|
|
#
|
|
# Copyright (c) 2019-2020 Alexandre Dulaunoy - a@foo.be
|
|
|
|
import git
|
|
import re
|
|
import sys
|
|
import typing
|
|
from langdetect import detect as langdetect
|
|
|
|
from git_vuln_finder import get_patterns
|
|
|
|
|
|
def find(repo, tags_matching=False, commit_state="under-review", verbose=False, defaultpattern="all"):
|
|
# Initialization of the variables for the results
|
|
repo = git.Repo(repo)
|
|
found = 0
|
|
all_potential_vulnerabilities = {}
|
|
all_cve_found = set()
|
|
|
|
# Initialization of the patterns
|
|
patterns = get_patterns()
|
|
vulnpatterns = patterns["en"]["medium"]["vuln"]
|
|
cryptopatterns = patterns["en"]["medium"]["crypto"]
|
|
cpatterns = patterns["en"]["medium"]["c"]
|
|
|
|
if defaultpattern == "vulnpatterns":
|
|
defaultpattern = vulnpatterns
|
|
elif defaultpattern == "cryptopatterns":
|
|
defaultpattern = cryptopatterns
|
|
elif defaultpattern == "cpatterns":
|
|
defaultpattern = cpatterns
|
|
elif defaultpattern == "all":
|
|
defaultpattern = [vulnpatterns, cryptopatterns, cpatterns]
|
|
|
|
repo_heads = repo.heads
|
|
repo_heads_names = [h.name for h in repo_heads]
|
|
print(repo_heads_names, file=sys.stderr)
|
|
origin = repo.remotes.origin.url
|
|
tagmap = {}
|
|
if tags_matching:
|
|
for t in repo.tags:
|
|
tagmap.setdefault(repo.commit(t).hexsha, []).append(str(t))
|
|
|
|
for branch in repo_heads_names:
|
|
commits = list(repo.iter_commits(branch))
|
|
defaultpattern
|
|
for commit in commits:
|
|
if isinstance(defaultpattern, typing.Pattern):
|
|
ret = find_vuln(commit, pattern=defaultpattern, verbose=verbose)
|
|
if ret:
|
|
rcommit = ret["commit"]
|
|
_, potential_vulnerabilities, cve_found = summary(
|
|
repo,
|
|
rcommit,
|
|
branch,
|
|
tagmap,
|
|
defaultpattern,
|
|
origin=origin,
|
|
vuln_match=ret["match"],
|
|
tags_matching=tags_matching,
|
|
commit_state=commit_state,
|
|
)
|
|
all_potential_vulnerabilities.update(potential_vulnerabilities)
|
|
all_cve_found.update(cve_found)
|
|
found += 1
|
|
elif isinstance(defaultpattern, list):
|
|
for p in defaultpattern:
|
|
ret = find_vuln(commit, pattern=p, verbose=verbose)
|
|
if ret:
|
|
rcommit = ret["commit"]
|
|
_, potential_vulnerabilities, cve_found = summary(
|
|
repo,
|
|
rcommit,
|
|
branch,
|
|
tagmap,
|
|
p,
|
|
origin=origin,
|
|
vuln_match=ret["match"],
|
|
tags_matching=tags_matching,
|
|
commit_state=commit_state,
|
|
)
|
|
all_potential_vulnerabilities.update(potential_vulnerabilities)
|
|
all_cve_found.update(cve_found)
|
|
found += 1
|
|
|
|
return all_potential_vulnerabilities, all_cve_found, found
|
|
|
|
|
|
def find_vuln(commit, pattern, verbose=False):
|
|
"""Find a potential vulnerability from a commit message thanks to a regex
|
|
pattern.
|
|
"""
|
|
m = pattern.search(commit.message)
|
|
if m:
|
|
if verbose:
|
|
print("Match found: {}".format(m.group(0)), file=sys.stderr)
|
|
print(commit.message, file=sys.stderr)
|
|
print("---", file=sys.stderr)
|
|
ret = {}
|
|
ret["commit"] = commit
|
|
ret["match"] = m.groups()
|
|
return ret
|
|
else:
|
|
return None
|
|
|
|
|
|
def summary(
|
|
repo,
|
|
commit,
|
|
branch,
|
|
tagmap,
|
|
pattern,
|
|
origin=None,
|
|
vuln_match=None,
|
|
tags_matching=False,
|
|
commit_state="under-review",
|
|
):
|
|
potential_vulnerabilities = {}
|
|
rcommit = commit
|
|
cve, cve_found = extract_cve(rcommit.message)
|
|
if origin is not None:
|
|
origin = origin
|
|
if origin.find("github.com"):
|
|
origin_github_api = origin.split(":")[1]
|
|
(org_name, repo_name) = origin_github_api.split("/", 1)
|
|
if repo_name.find(".git$"):
|
|
repo_name = re.sub(r".git$", "", repo_name)
|
|
origin_github_api = "https://api.github.com/repos/{}/{}/commits/{}".format(
|
|
org_name, repo_name, rcommit.hexsha
|
|
)
|
|
|
|
else:
|
|
origin = "git origin unknown"
|
|
# deduplication if similar commits on different branches
|
|
if rcommit.hexsha in potential_vulnerabilities:
|
|
potential_vulnerabilities[rcommit.hexsha]["branches"].append(branch)
|
|
else:
|
|
potential_vulnerabilities[rcommit.hexsha] = {}
|
|
potential_vulnerabilities[rcommit.hexsha]["message"] = rcommit.message
|
|
potential_vulnerabilities[rcommit.hexsha]["language"] = langdetect(
|
|
rcommit.message
|
|
)
|
|
potential_vulnerabilities[rcommit.hexsha]["commit-id"] = rcommit.hexsha
|
|
potential_vulnerabilities[rcommit.hexsha]["summary"] = rcommit.summary
|
|
potential_vulnerabilities[rcommit.hexsha]["stats"] = rcommit.stats.total
|
|
potential_vulnerabilities[rcommit.hexsha]["author"] = rcommit.author.name
|
|
potential_vulnerabilities[rcommit.hexsha]["author-email"] = rcommit.author.email
|
|
potential_vulnerabilities[rcommit.hexsha][
|
|
"authored_date"
|
|
] = rcommit.authored_date
|
|
potential_vulnerabilities[rcommit.hexsha][
|
|
"committed_date"
|
|
] = rcommit.committed_date
|
|
potential_vulnerabilities[rcommit.hexsha]["branches"] = []
|
|
potential_vulnerabilities[rcommit.hexsha]["branches"].append(branch)
|
|
potential_vulnerabilities[rcommit.hexsha]["pattern-selected"] = pattern.pattern
|
|
potential_vulnerabilities[rcommit.hexsha]["pattern-matches"] = vuln_match
|
|
potential_vulnerabilities[rcommit.hexsha]["origin"] = origin
|
|
if origin_github_api:
|
|
potential_vulnerabilities[commit.hexsha][
|
|
"origin-github-api"
|
|
] = origin_github_api
|
|
potential_vulnerabilities[rcommit.hexsha]["tags"] = []
|
|
if tags_matching:
|
|
if repo.commit(rcommit).hexsha in tagmap:
|
|
potential_vulnerabilities[rcommit.hexsha]["tags"] = tagmap[
|
|
repo.commit(rcommit).hexsha
|
|
]
|
|
if cve:
|
|
potential_vulnerabilities[rcommit.hexsha]["cve"] = cve
|
|
potential_vulnerabilities[rcommit.hexsha]["state"] = "cve-assigned"
|
|
else:
|
|
potential_vulnerabilities[rcommit.hexsha]["state"] = commit_state
|
|
|
|
return rcommit.hexsha, potential_vulnerabilities, cve_found
|
|
|
|
|
|
def extract_cve(commit):
|
|
cve_found = set()
|
|
cve_find = re.compile(r"CVE-[1-2]\d{1,4}-\d{1,7}", re.IGNORECASE)
|
|
m = cve_find.findall(commit)
|
|
if m:
|
|
for v in m:
|
|
cve_found.add(v)
|
|
return m, cve_found
|
|
else:
|
|
return None, set()
|