[Buildroot] [PATCH 1/2] support/scripts/pkg-stats: add support for CVE reporting
Arnout Vandecappelle
arnout at mind.be
Wed Feb 5 10:48:07 UTC 2020
On 04/02/2020 22:52, Thomas Petazzoni wrote:
> This commit extends the pkg-stats script to grab information about the
> CVEs affecting the Buildroot packages.
>
> To do so, it downloads the NVD database from
> https://nvd.nist.gov/vuln/data-feeds in JSON format, and processes the
> JSON file to determine which of our packages is affected by which
> CVE. The information is then displayed in both the HTML output and the
> JSON output of pkg-stats.
>
> To use this feature, you have to pass the new --nvd-path option,
> pointing to a writable directory where pkg-stats will store the NVD
> database. If the local database is less than 24 hours old, it will not
> re-download it. If it is more than 24 hours old, it will re-download
> only the files that have really been updated by upstream NVD.
>
> Packages can use the newly introduced <pkg>_IGNORE_CVES variable to
> tell pkg-stats that some CVEs should be ignored: it can be because a
> patch we have is fixing the CVE, or because the CVE doesn't apply in
> our case.
>
> From an implementation point of view:
>
> - The download_nvd() and download_nvd_year() function implement the
> NVD database downloading.
>
> - The check_package_cves() function will go through all the CVE
> reports of the NVD database, which has one JSON file per year. For
> each CVE report it will check if we have a package with the same
> name in Buildroot. If we do, then
> check_package_cve_version_affected() verifies if the version in
> Buildroot is affected by the CVE.
>
> - The statistics are extended with the total number of CVEs, and the
> total number of packages that have at least one CVE pending.
>
> - The HTML output is extended with these new details. There are no
> changes to the code generating the JSON output because the existing
> code is smart enough to automatically expose the new information.
>
> This development is a collective effort with Titouan Christophe
> <titouan.christophe at railnova.eu> and Thomas De Schampheleire
> <thomas.de_schampheleire at nokia.com>.
>
> Signed-off-by: Thomas Petazzoni <thomas.petazzoni at bootlin.com>
> ---
> support/scripts/pkg-stats | 157 +++++++++++++++++++++++++++++++++++++-
> 1 file changed, 156 insertions(+), 1 deletion(-)
>
> diff --git a/support/scripts/pkg-stats b/support/scripts/pkg-stats
> index e477828f7b..23e630363b 100755
> --- a/support/scripts/pkg-stats
> +++ b/support/scripts/pkg-stats
> @@ -26,10 +26,18 @@ import subprocess
> import requests # URL checking
> import json
> import certifi
> +import distutils.version
> +import time
> +import gzip
> +import glob
> from urllib3 import HTTPSConnectionPool
> from urllib3.exceptions import HTTPError
> from multiprocessing import Pool
>
> +NVD_START_YEAR = 2002
> +NVD_JSON_VERSION = "1.0"
> +NVD_BASE_URL = "https://nvd.nist.gov/feeds/json/cve/" + NVD_JSON_VERSION
> +
> INFRA_RE = re.compile(r"\$\(eval \$\(([a-z-]*)-package\)\)")
> URL_RE = re.compile(r"\s*https?://\S*\s*$")
>
> @@ -47,6 +55,7 @@ class Package:
> all_licenses = list()
> all_license_files = list()
> all_versions = dict()
> + all_ignored_cves = dict()
>
> def __init__(self, name, path):
> self.name = name
> @@ -61,6 +70,7 @@ class Package:
> self.url = None
> self.url_status = None
> self.url_worker = None
> + self.cves = list()
> self.latest_version = (RM_API_STATUS_ERROR, None, None)
>
> def pkgvar(self):
> @@ -152,6 +162,13 @@ class Package:
> self.warnings = int(m.group(1))
> return
>
> + def is_cve_ignored(self, cve):
> + """
> + Tells is the CVE is ignored by the package
Tells *if* ?
> + """
> + return self.pkgvar() in self.all_ignored_cves and \
> + cve in self.all_ignored_cves[self.pkgvar()]
A more pythonic way is:
return cve in self.all_ignored_cves.get(self.pkgvar(), [])
> +
> def __eq__(self, other):
> return self.path == other.path
>
> @@ -227,7 +244,7 @@ def get_pkglist(npackages, package_list):
> def package_init_make_info():
> # Fetch all variables at once
> variables = subprocess.check_output(["make", "BR2_HAVE_DOT_CONFIG=y", "-s", "printvars",
> - "VARS=%_LICENSE %_LICENSE_FILES %_VERSION"])
> + "VARS=%_LICENSE %_LICENSE_FILES %_VERSION %_IGNORE_CVES"])
> variable_list = variables.splitlines()
>
> # We process first the host package VERSION, and then the target
> @@ -261,6 +278,10 @@ def package_init_make_info():
> pkgvar = pkgvar[:-8]
> Package.all_versions[pkgvar] = value
>
> + elif pkgvar.endswith("_IGNORE_CVES"):
> + pkgvar = pkgvar[:-12]
> + Package.all_ignored_cves[pkgvar] = value.split(" ")
> +
>
> def check_url_status_worker(url, url_status):
> if url_status != "Missing" and url_status != "No Config.in":
> @@ -355,6 +376,49 @@ def check_package_latest_version(packages):
> del http_pool
>
>
> +def check_package_cve_version_affected(pkg, versions):
This could use some documenation, in particular of what the pkg and versions
arguments are.
> + for v in versions:
> + if v["version_affected"] == "=":
> + return pkg.current_version == v["version_value"]
> + elif v["version_affected"] == "<=":
Isn't there a >= we need to check as well?
> + pkg_version = distutils.version.LooseVersion(pkg.current_version)
> + if not hasattr(pkg_version, "version"):
> + print("Cannot parse package '%s' version '%s'" % (pkg.name, pkg.current_version))
> + continue
> + cve_affected_version = distutils.version.LooseVersion(v["version_value"])
> + if not hasattr(cve_affected_version, "version"):
> + print("Cannot parse CVE affected version '%s'" % v["version_value"])
> + continue
> + return pkg_version < cve_affected_version
Maybe use packaging.version instead? [1]
[1]
https://stackoverflow.com/questions/11887762/how-do-i-compare-version-numbers-in-python
Hm, but the packaging module is not installed on my system (which does have
setuptools), so maybe not...
> +
> + return False
> +
> +
> +def check_package_cve_filter(packages, cve):
> + for vendor_datum in cve['cve']['affects']['vendor']['vendor_data']:
> + for product_datum in vendor_datum['product']['product_data']:
> + cve_pkg_name = product_datum['product_name']
> + for pkg in packages:
> + if cve_pkg_name == pkg.name:
I think it would be useful (as in, speed things up) to convert the package list
into a dict at the top level, because then here we can just do
pkg = packages.get(cve_pkg_name):
if pkg:
if check_package_cve_version_affected(pkg, ...):
The dictionary would be created like this:
cve_packages = {pkg.name: pkg for pkg in packages}
And if we ever support a _CVENAME in the .mk file to improve the mapping, it
would become:
cve_packages = {pkg.cvename or pkg.name: pkg for pkg in packages}
> + if check_package_cve_version_affected(pkg, product_datum["version"]["version_data"]):
> + cveid = cve["cve"]["CVE_data_meta"]["ID"]
> + if not pkg.is_cve_ignored(cveid):
> + pkg.cves.append(cveid)
> +
> +
> +def check_package_cves_year(packages, nvd_file):
AFAIU, the "year" part here is not really relevant. It just happens to be that
the json files are organised per year. So I'd just call it check_package_cves_file.
> + cves = json.load(open(nvd_file))
> + for cve in cves["CVE_Items"]:
> + check_package_cve_filter(packages, cve)
> +
> +
> +def check_package_cves(nvd_path, packages):
> + nvd_files = sorted(glob.glob(os.path.join(nvd_path, "nvdcve-%s-*.json" %
Maybe it would be better if the nvd_download step would make an explicit list
of json files.
> + NVD_JSON_VERSION)))
> + for nvd_file in nvd_files:
> + check_package_cves_year(packages, nvd_file)
> +
> +
> def calculate_stats(packages):
> stats = defaultdict(int)
> for pkg in packages:
> @@ -390,6 +454,9 @@ def calculate_stats(packages):
> else:
> stats["version-not-uptodate"] += 1
> stats["patches"] += pkg.patch_count
> + stats["total-cves"] += len(pkg.cves)
> + if len(pkg.cves) != 0:
> + stats["pkg-cves"] += 1
> return stats
>
>
> @@ -601,6 +668,17 @@ def dump_html_pkg(f, pkg):
> f.write(" <td class=\"%s\">%s</td>\n" %
> (" ".join(td_class), url_str))
>
> + # CVEs
> + td_class = ["centered"]
> + if len(pkg.cves) == 0:
> + td_class.append("correct")
> + else:
> + td_class.append("wrong")
> + f.write(" <td class=\"%s\">\n" % " ".join(td_class))
> + for cve in pkg.cves:
> + f.write(" <a href=\"https://security-tracker.debian.org/tracker/%s\">%s<br/>\n" % (cve, cve))
> + f.write(" </td>\n")
> +
> f.write(" </tr>\n")
>
>
> @@ -618,6 +696,7 @@ def dump_html_all_pkgs(f, packages):
> <td class=\"centered\">Latest version</td>
> <td class=\"centered\">Warnings</td>
> <td class=\"centered\">Upstream URL</td>
> +<td class=\"centered\">CVEs</td>
> </tr>
> """)
> for pkg in sorted(packages):
> @@ -656,6 +735,10 @@ def dump_html_stats(f, stats):
> stats["version-not-uptodate"])
> f.write("<tr><td>Packages with no known upstream version</td><td>%s</td></tr>\n" %
> stats["version-unknown"])
> + f.write("<tr><td>Packages affected by CVEs</td><td>%s</td></tr>\n" %
> + stats["pkg-cves"])
> + f.write("<tr><td>Total number of CVEs affecting all packages</td><td>%s</td></tr>\n" %
> + stats["total-cves"])
Shouldn't this be conditional on whether we were checking CVEs? Same for the title.
> f.write("</table>\n")
>
>
> @@ -702,6 +785,72 @@ def dump_json(packages, stats, date, commit, output):
> f.write('\n')
>
>
> +def download_nvd_year(nvd_path, year):
> + metaf = "nvdcve-%s-%s.meta" % (NVD_JSON_VERSION, year)
> + path_metaf = os.path.join(nvd_path, metaf)
> +
> + # If the meta file is less than a day old, we assume the NVD data
> + # locally available is recent enough.
> + if os.path.exists(path_metaf) and os.stat(path_metaf).st_mtime >= time.time() - 86400:
> + return
> +
> + # If not, we download the meta file
> + url = "%s/%s" % (NVD_BASE_URL, metaf)
> + print("Getting %s" % url)
> + r = requests.get(url)
> + meta_new = dict(x.strip().split(':', 1) for x in r.text.splitlines())
> + if os.path.exists(path_metaf):
> + # If the meta file already existed, we compare the existing
> + # one with the data newly downloaded. If they are different,
> + # we need to re-download the database.
> + with open(path_metaf, "r") as f:
> + meta_old = dict(x.strip().split(':', 1) for x in f)
> + needs_download = meta_new != meta_old
Since the meta is only used for comparing new and old, why not simply
meta_old = f.read()
needs_download = r != meta_old
?
> + else:
> + # If the meta file does not exist locally, of course we need
> + # to download the database
> + needs_download = True
> +
> + # If we don't need to download the database, bail out
> + if not needs_download:
> + return
> +
> + # Write the meta file, possibly overwriting the existing one
> + with open(path_metaf, "w") as f:
> + f.write(r.text)
> +
> + # Grab the compressed JSON NVD database
> + jsonf = "nvdcve-%s-%s.json" % (NVD_JSON_VERSION, year)
> + jsonf_gz = jsonf + ".gz"
> + path_jsonf = os.path.join(nvd_path, jsonf)
> + path_jsonf_gz = os.path.join(nvd_path, jsonf_gz)
> + url = "%s/%s" % (NVD_BASE_URL, jsonf_gz)
> + print("Getting %s" % url)
> + r = requests.get(url)
> + with open(path_jsonf_gz, "wb") as f:
> + f.write(r.content)
> +
> + # Uncompress and write it
> + gz = gzip.GzipFile(path_jsonf_gz)
> + with open(path_jsonf, "w") as f:
> + f.write(gz.read())
> +
> + # Remove the temporary compressed file
> + os.unlink(path_jsonf_gz)
Like Peter, I think it's better to store the gzipped file and unzip when
reading the json.
> +
> +
> +def download_nvd(nvd_path):
> + if nvd_path is None:
> + print("NVD database path not provided, not checking CVEs")
> + return
> +
> + if not os.path.isdir(nvd_path):
> + raise RuntimeError("NVD database path doesn't exist")
> +
> + for year in range(NVD_START_YEAR, datetime.date.today().year + 1):
> + download_nvd_year(nvd_path, year)
> +
> +
> def parse_args():
> parser = argparse.ArgumentParser()
> output = parser.add_argument_group('output', 'Output file(s)')
> @@ -714,6 +863,8 @@ def parse_args():
> help='Number of packages')
> packages.add_argument('-p', dest='packages', action='store',
> help='List of packages (comma separated)')
> + parser.add_argument('--nvd-path', dest='nvd_path',
> + help='Path to the local NVD database')
> args = parser.parse_args()
> if not args.html and not args.json:
> parser.error('at least one of --html or --json (or both) is required')
> @@ -729,6 +880,7 @@ def __main__():
> date = datetime.datetime.utcnow()
> commit = subprocess.check_output(['git', 'rev-parse',
> 'HEAD']).splitlines()[0]
> + download_nvd(args.nvd_path)
> print("Build package list ...")
> packages = get_pkglist(args.npackages, package_list)
> print("Getting package make info ...")
> @@ -746,6 +898,9 @@ def __main__():
> check_package_urls(packages)
> print("Getting latest versions ...")
> check_package_latest_version(packages)
> + if args.nvd_path:
> + print("CVE")
> + check_package_cves(args.nvd_path, packages)
> print("Calculate stats")
> stats = calculate_stats(packages)
> if args.html:
>
More information about the buildroot
mailing list