#!/usr/bin/env python # -*- coding: utf-8 -*- # # Finding potential software vulnerabilities from git commit messages # # Software is free software released under the "GNU Affero General Public License v3.0" # # This software is part of cve-search.org # # Copyright (c) 2019-2020 Alexandre Dulaunoy - a@foo.be import re import sys from langdetect import detect as langdetect def find_vuln(commit, pattern, verbose=False): """Find a potential vulnerability from a commit message thanks to a regex pattern. """ m = pattern.search(commit.message) if m: if verbose: print("Match found: {}".format(m.group(0)), file=sys.stderr) print(commit.message, file=sys.stderr) print("---", file=sys.stderr) ret = {} ret["commit"] = commit ret["match"] = m.groups() return ret else: return None def summary( repo, commit, branch, tagmap, pattern, origin=None, vuln_match=None, tags_matching=False, commit_state="under-review", ): potential_vulnerabilities = {} rcommit = commit cve, cve_found = extract_cve(rcommit.message) if origin is not None: origin = origin if origin.find("github.com"): origin_github_api = origin.split(":")[1] (org_name, repo_name) = origin_github_api.split("/", 1) if repo_name.find(".git$"): repo_name = re.sub(r".git$", "", repo_name) origin_github_api = "https://api.github.com/repos/{}/{}/commits/{}".format( org_name, repo_name, rcommit.hexsha ) else: origin = "git origin unknown" # deduplication if similar commits on different branches if rcommit.hexsha in potential_vulnerabilities: potential_vulnerabilities[rcommit.hexsha]["branches"].append(branch) else: potential_vulnerabilities[rcommit.hexsha] = {} potential_vulnerabilities[rcommit.hexsha]["message"] = rcommit.message potential_vulnerabilities[rcommit.hexsha]["language"] = langdetect( rcommit.message ) potential_vulnerabilities[rcommit.hexsha]["commit-id"] = rcommit.hexsha potential_vulnerabilities[rcommit.hexsha]["summary"] = rcommit.summary potential_vulnerabilities[rcommit.hexsha]["stats"] = rcommit.stats.total potential_vulnerabilities[rcommit.hexsha]["author"] = rcommit.author.name potential_vulnerabilities[rcommit.hexsha]["author-email"] = rcommit.author.email potential_vulnerabilities[rcommit.hexsha][ "authored_date" ] = rcommit.authored_date potential_vulnerabilities[rcommit.hexsha][ "committed_date" ] = rcommit.committed_date potential_vulnerabilities[rcommit.hexsha]["branches"] = [] potential_vulnerabilities[rcommit.hexsha]["branches"].append(branch) potential_vulnerabilities[rcommit.hexsha]["pattern-selected"] = pattern.pattern potential_vulnerabilities[rcommit.hexsha]["pattern-matches"] = vuln_match potential_vulnerabilities[rcommit.hexsha]["origin"] = origin if origin_github_api: potential_vulnerabilities[commit.hexsha][ "origin-github-api" ] = origin_github_api potential_vulnerabilities[rcommit.hexsha]["tags"] = [] if tags_matching: if repo.commit(rcommit).hexsha in tagmap: potential_vulnerabilities[rcommit.hexsha]["tags"] = tagmap[ repo.commit(rcommit).hexsha ] if cve: potential_vulnerabilities[rcommit.hexsha]["cve"] = cve potential_vulnerabilities[rcommit.hexsha]["state"] = "cve-assigned" else: potential_vulnerabilities[rcommit.hexsha]["state"] = commit_state return rcommit.hexsha, potential_vulnerabilities, cve_found def extract_cve(commit): cve_found = set() cve_find = re.compile(r"CVE-[1-2]\d{1,4}-\d{1,7}", re.IGNORECASE) m = cve_find.findall(commit) if m: for v in m: cve_found.add(v) return m, cve_found else: return None, set()