git-vuln-finder/git_vuln_finder/vulnerability.py

196 lines
7.1 KiB
Python
Raw Normal View History

2020-01-03 15:07:40 +00:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Finding potential software vulnerabilities from git commit messages
#
# Software is free software released under the "GNU Affero General Public License v3.0"
#
# This software is part of cve-search.org
#
2020-01-03 16:51:55 +00:00
# Copyright (c) 2019-2020 Alexandre Dulaunoy - a@foo.be
2020-01-03 15:07:40 +00:00
import git
2020-01-03 15:07:40 +00:00
import re
import sys
import typing
2020-01-03 15:07:40 +00:00
from langdetect import detect as langdetect
from git_vuln_finder import get_patterns
def find(repo, tags_matching=False, commit_state="under-review", verbose=False, defaultpattern="all"):
# Initialization of the variables for the results
repo = git.Repo(repo)
found = 0
all_potential_vulnerabilities = {}
all_cve_found = set()
# Initialization of the patterns
patterns = get_patterns()
vulnpatterns = patterns["en"]["medium"]["vuln"]
cryptopatterns = patterns["en"]["medium"]["crypto"]
cpatterns = patterns["en"]["medium"]["c"]
if defaultpattern == "vulnpatterns":
defaultpattern = vulnpatterns
elif defaultpattern == "cryptopatterns":
defaultpattern = cryptopatterns
elif defaultpattern == "cpatterns":
defaultpattern = cpatterns
elif defaultpattern == "all":
defaultpattern = [vulnpatterns, cryptopatterns, cpatterns]
repo_heads = repo.heads
repo_heads_names = [h.name for h in repo_heads]
print(repo_heads_names, file=sys.stderr)
origin = repo.remotes.origin.url
tagmap = {}
if tags_matching:
for t in repo.tags:
tagmap.setdefault(repo.commit(t).hexsha, []).append(str(t))
for branch in repo_heads_names:
commits = list(repo.iter_commits(branch))
defaultpattern
for commit in commits:
if isinstance(defaultpattern, typing.Pattern):
ret = find_vuln(commit, pattern=defaultpattern, verbose=verbose)
if ret:
rcommit = ret["commit"]
_, potential_vulnerabilities, cve_found = summary(
repo,
rcommit,
branch,
tagmap,
defaultpattern,
origin=origin,
vuln_match=ret["match"],
tags_matching=tags_matching,
commit_state=commit_state,
)
all_potential_vulnerabilities.update(potential_vulnerabilities)
all_cve_found.update(cve_found)
found += 1
elif isinstance(defaultpattern, list):
for p in defaultpattern:
ret = find_vuln(commit, pattern=p, verbose=verbose)
if ret:
rcommit = ret["commit"]
_, potential_vulnerabilities, cve_found = summary(
repo,
rcommit,
branch,
tagmap,
p,
origin=origin,
vuln_match=ret["match"],
tags_matching=tags_matching,
commit_state=commit_state,
)
all_potential_vulnerabilities.update(potential_vulnerabilities)
all_cve_found.update(cve_found)
found += 1
return all_potential_vulnerabilities, all_cve_found, found
2020-01-03 15:07:40 +00:00
2020-01-05 12:08:57 +00:00
def find_vuln(commit, pattern, verbose=False):
2020-01-06 20:28:31 +00:00
"""Find a potential vulnerability from a commit message thanks to a regex
pattern.
"""
2020-01-03 15:07:40 +00:00
m = pattern.search(commit.message)
if m:
2020-01-05 12:08:57 +00:00
if verbose:
2020-01-03 15:07:40 +00:00
print("Match found: {}".format(m.group(0)), file=sys.stderr)
print(commit.message, file=sys.stderr)
print("---", file=sys.stderr)
ret = {}
2020-01-06 06:50:21 +00:00
ret["commit"] = commit
ret["match"] = m.groups()
2020-01-03 15:07:40 +00:00
return ret
else:
return None
2020-01-06 06:50:21 +00:00
def summary(
repo,
commit,
branch,
tagmap,
pattern,
origin=None,
vuln_match=None,
tags_matching=False,
commit_state="under-review",
2020-01-03 15:07:40 +00:00
):
potential_vulnerabilities = {}
rcommit = commit
cve, cve_found = extract_cve(rcommit.message)
2020-01-03 15:07:40 +00:00
if origin is not None:
origin = origin
2020-01-06 06:50:21 +00:00
if origin.find("github.com"):
origin_github_api = origin.split(":")[1]
(org_name, repo_name) = origin_github_api.split("/", 1)
if repo_name.find(".git$"):
repo_name = re.sub(r".git$", "", repo_name)
origin_github_api = "https://api.github.com/repos/{}/{}/commits/{}".format(
org_name, repo_name, rcommit.hexsha
)
2020-01-03 15:07:40 +00:00
else:
2020-01-06 06:50:21 +00:00
origin = "git origin unknown"
2020-01-03 15:07:40 +00:00
# deduplication if similar commits on different branches
if rcommit.hexsha in potential_vulnerabilities:
2020-01-06 06:50:21 +00:00
potential_vulnerabilities[rcommit.hexsha]["branches"].append(branch)
2020-01-03 15:07:40 +00:00
else:
potential_vulnerabilities[rcommit.hexsha] = {}
2020-01-06 06:50:21 +00:00
potential_vulnerabilities[rcommit.hexsha]["message"] = rcommit.message
potential_vulnerabilities[rcommit.hexsha]["language"] = langdetect(
rcommit.message
)
potential_vulnerabilities[rcommit.hexsha]["commit-id"] = rcommit.hexsha
potential_vulnerabilities[rcommit.hexsha]["summary"] = rcommit.summary
potential_vulnerabilities[rcommit.hexsha]["stats"] = rcommit.stats.total
potential_vulnerabilities[rcommit.hexsha]["author"] = rcommit.author.name
potential_vulnerabilities[rcommit.hexsha]["author-email"] = rcommit.author.email
potential_vulnerabilities[rcommit.hexsha][
"authored_date"
] = rcommit.authored_date
potential_vulnerabilities[rcommit.hexsha][
"committed_date"
] = rcommit.committed_date
potential_vulnerabilities[rcommit.hexsha]["branches"] = []
potential_vulnerabilities[rcommit.hexsha]["branches"].append(branch)
potential_vulnerabilities[rcommit.hexsha]["pattern-selected"] = pattern.pattern
potential_vulnerabilities[rcommit.hexsha]["pattern-matches"] = vuln_match
potential_vulnerabilities[rcommit.hexsha]["origin"] = origin
2020-01-03 15:07:40 +00:00
if origin_github_api:
2020-01-06 06:50:21 +00:00
potential_vulnerabilities[commit.hexsha][
"origin-github-api"
] = origin_github_api
potential_vulnerabilities[rcommit.hexsha]["tags"] = []
2020-01-03 15:07:40 +00:00
if tags_matching:
if repo.commit(rcommit).hexsha in tagmap:
2020-01-06 06:50:21 +00:00
potential_vulnerabilities[rcommit.hexsha]["tags"] = tagmap[
repo.commit(rcommit).hexsha
]
2020-01-03 15:07:40 +00:00
if cve:
2020-01-06 06:50:21 +00:00
potential_vulnerabilities[rcommit.hexsha]["cve"] = cve
potential_vulnerabilities[rcommit.hexsha]["state"] = "cve-assigned"
2020-01-03 15:07:40 +00:00
else:
2020-01-06 06:50:21 +00:00
potential_vulnerabilities[rcommit.hexsha]["state"] = commit_state
2020-01-03 15:07:40 +00:00
return rcommit.hexsha, potential_vulnerabilities, cve_found
2020-01-03 15:07:40 +00:00
def extract_cve(commit):
cve_found = set()
2020-01-06 06:50:21 +00:00
cve_find = re.compile(r"CVE-[1-2]\d{1,4}-\d{1,7}", re.IGNORECASE)
2020-01-03 15:07:40 +00:00
m = cve_find.findall(commit)
if m:
for v in m:
cve_found.add(v)
return m, cve_found
2020-01-03 15:07:40 +00:00
else:
return None, set()