git-vuln-finder/git_vuln_finder/vulnerability.py
Alexandre Dulaunoy 68f273ee54
Fix #17 [main] - set language to unknown when langdetect cannot detect
the language for some reasons like empty commit message or unknown
language.

Notes: langdetect exception handler seems to be crap and do not use
a Base Exception handler. That's why the catch-all ;-)
2022-11-27 10:28:01 +01:00

166 lines
6.1 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Finding potential software vulnerabilities from git commit messages
#
# Software is free software released under the "GNU Affero General Public License v3.0"
#
# This software is part of cve-search.org
#
# Copyright (c) 2019-2020 Alexandre Dulaunoy - a@foo.be
import re
import sys
from langdetect import detect as langdetect
def find_vuln(commit, pattern, verbose=False):
"""Find a potential vulnerability from a commit message thanks to a regex
pattern.
"""
m = pattern.search(commit.message)
if m:
if verbose:
print("Match found: {}".format(m.group(0)), file=sys.stderr)
print(commit.message, file=sys.stderr)
print("---", file=sys.stderr)
ret = {}
ret["commit"] = commit
ret["match"] = m.groups()
return ret
else:
return None
def find_vuln_event(commit_msg, pattern, verbose=False):
"""Find a potential vulnerability from a commit message thanks to a regex
pattern.
"""
m = pattern.search(commit_msg)
if m:
if verbose:
print("Match found: {}".format(m.group(0)), file=sys.stderr)
print(commit_msg, file=sys.stderr)
print("---", file=sys.stderr)
ret = {}
ret["commit"] = commit_msg
ret["match"] = m.groups()
return ret
else:
return None
def summary(
repo,
commit,
branch,
tagmap,
pattern,
origin=None,
vuln_match=None,
tags_matching=False,
commit_state="under-review",
):
potential_vulnerabilities = {}
rcommit = commit
cve, cve_found = extract_cve(rcommit.message)
if origin is not None:
origin = origin
if origin.find("github.com"):
origin_github_api = origin.split(":")[1]
(org_name, repo_name) = origin_github_api.split("/", 1)
if repo_name.find(".git$"):
repo_name = re.sub(r".git$", "", repo_name)
origin_github_api = "https://api.github.com/repos/{}/{}/commits/{}".format(
org_name, repo_name, rcommit.hexsha
)
else:
origin = "git origin unknown"
# deduplication if similar commits on different branches
if rcommit.hexsha in potential_vulnerabilities:
potential_vulnerabilities[rcommit.hexsha]["branches"].append(branch)
else:
potential_vulnerabilities[rcommit.hexsha] = {}
potential_vulnerabilities[rcommit.hexsha]["message"] = rcommit.message
try:
lang = langdetect(rcommit.message)
except:
lang = "unknown"
potential_vulnerabilities[rcommit.hexsha]["language"] = lang
potential_vulnerabilities[rcommit.hexsha]["commit-id"] = rcommit.hexsha
potential_vulnerabilities[rcommit.hexsha]["summary"] = rcommit.summary
potential_vulnerabilities[rcommit.hexsha]["stats"] = rcommit.stats.total
potential_vulnerabilities[rcommit.hexsha]["author"] = rcommit.author.name
potential_vulnerabilities[rcommit.hexsha]["author-email"] = rcommit.author.email
potential_vulnerabilities[rcommit.hexsha][
"authored_date"
] = rcommit.authored_date
potential_vulnerabilities[rcommit.hexsha][
"committed_date"
] = rcommit.committed_date
potential_vulnerabilities[rcommit.hexsha]["branches"] = []
potential_vulnerabilities[rcommit.hexsha]["branches"].append(branch)
potential_vulnerabilities[rcommit.hexsha]["pattern-selected"] = pattern.pattern
potential_vulnerabilities[rcommit.hexsha]["pattern-matches"] = vuln_match
potential_vulnerabilities[rcommit.hexsha]["origin"] = origin
if origin_github_api:
potential_vulnerabilities[commit.hexsha][
"origin-github-api"
] = origin_github_api
potential_vulnerabilities[rcommit.hexsha]["tags"] = []
if tags_matching:
if repo.commit(rcommit).hexsha in tagmap:
potential_vulnerabilities[rcommit.hexsha]["tags"] = tagmap[
repo.commit(rcommit).hexsha
]
if cve:
potential_vulnerabilities[rcommit.hexsha]["cve"] = cve
potential_vulnerabilities[rcommit.hexsha]["state"] = "cve-assigned"
else:
potential_vulnerabilities[rcommit.hexsha]["state"] = commit_state
return rcommit.hexsha, potential_vulnerabilities, cve_found
def summary_event(
commit,
pattern,
element,
vuln_match=None,
commit_state="under-review"
):
potential_vulnerabilities = {}
cve, cve_found = extract_cve(commit["message"])
potential_vulnerabilities[commit["sha"]] = {}
potential_vulnerabilities[commit["sha"]]["repo_name"] = element["repo"]["name"]
potential_vulnerabilities[commit["sha"]]["message"] = commit["message"]
potential_vulnerabilities[commit["sha"]]["language"] = langdetect(commit["message"])
potential_vulnerabilities[commit["sha"]]["commit-id"] = commit["sha"]
potential_vulnerabilities[commit["sha"]]["author"] = commit["author"]["name"]
potential_vulnerabilities[commit["sha"]]["author-email"] = commit["author"]["email"]
potential_vulnerabilities[commit["sha"]]["authored_date"] = element["created_at"]
potential_vulnerabilities[commit["sha"]]["branches"] = element["payload"]["ref"]
potential_vulnerabilities[commit["sha"]]["pattern-selected"] = pattern.pattern
potential_vulnerabilities[commit["sha"]]["pattern-matches"] = vuln_match
potential_vulnerabilities[commit["sha"]]["origin-github-api"] = commit["url"]
if cve:
potential_vulnerabilities[commit["sha"]]["cve"] = cve
potential_vulnerabilities[commit["sha"]]["state"] = "cve-assigned"
else:
potential_vulnerabilities[commit["sha"]]["state"] = commit_state
return potential_vulnerabilities, cve_found
def extract_cve(commit):
cve_found = set()
cve_find = re.compile(r"CVE-[1-2]\d{1,4}-\d{1,7}", re.IGNORECASE)
m = cve_find.findall(commit)
if m:
for v in m:
cve_found.add(v)
return m, cve_found
else:
return None, set()