From 88807c7062788254f654ea8c03427adc859321f0 Mon Sep 17 00:00:00 2001 From: Jason R. Coombs Date: Mon Apr 29 20:01:38 2024 -0400 Subject: [PATCH] Merge pull request #4332 from pypa/debt/package-index-vcs Modernize package_index VCS handling Source: https://git.launchpad.net/ubuntu/+source/setuptools/tree/debian/patches/CVE-2024-6345.patch?h=applied/ubuntu/jammy-devel CVE: CVE-2024-6345 Upstream-Status: Backport [https://github.com/pypa/setuptools/commit/88807c7062788254f654ea8c03427adc859321f0] Note: Cannot do exact upstream patch backport as the code changed. Signed-off-by: Soumya Sambu --- setup.cfg | 1 + setuptools/package_index.py | 145 +++++++++++++++----------- setuptools/tests/test_packageindex.py | 78 +++++++------- 3 files changed, 123 insertions(+), 101 deletions(-) diff --git a/setup.cfg b/setup.cfg index 0bc0101..b8585d7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -56,6 +56,7 @@ testing = jaraco.envs>=2.2 pytest-xdist sphinx + pytest-subprocess jaraco.path>=3.2.0 docs = sphinx diff --git a/setuptools/package_index.py b/setuptools/package_index.py index e93fcc6..3a893df 100644 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -1,5 +1,6 @@ """PyPI and direct package downloading""" import sys +import subprocess import os import re import io @@ -566,7 +567,7 @@ class PackageIndex(Environment): scheme = URL_SCHEME(spec) if scheme: # It's a url, download it to tmpdir - found = self._download_url(scheme.group(1), spec, tmpdir) + found = self._download_url(spec, tmpdir) base, fragment = egg_info_for_url(spec) if base.endswith('.py'): found = self.gen_setup(found, fragment, tmpdir) @@ -785,7 +786,7 @@ class PackageIndex(Environment): raise DistutilsError("Download error for %s: %s" % (url, v)) from v - def _download_url(self, scheme, url, tmpdir): + def _download_url(self, url, tmpdir): # Determine download filename # name, fragment = egg_info_for_url(url) @@ -800,19 +801,57 @@ class PackageIndex(Environment): filename = os.path.join(tmpdir, name) - # Download the file - # - if scheme == 'svn' or scheme.startswith('svn+'): - return self._download_svn(url, filename) - elif scheme == 'git' or scheme.startswith('git+'): - return self._download_git(url, filename) - elif scheme.startswith('hg+'): - return self._download_hg(url, filename) - elif scheme == 'file': - return urllib.request.url2pathname(urllib.parse.urlparse(url)[2]) - else: - self.url_ok(url, True) # raises error if not allowed - return self._attempt_download(url, filename) + return self._download_vcs(url, filename) or self._download_other(url, filename) + + @staticmethod + def _resolve_vcs(url): + """ + >>> rvcs = PackageIndex._resolve_vcs + >>> rvcs('git+http://foo/bar') + 'git' + >>> rvcs('hg+https://foo/bar') + 'hg' + >>> rvcs('git:myhost') + 'git' + >>> rvcs('hg:myhost') + >>> rvcs('http://foo/bar') + """ + scheme = urllib.parse.urlsplit(url).scheme + pre, sep, post = scheme.partition('+') + # svn and git have their own protocol; hg does not + allowed = set(['svn', 'git'] + ['hg'] * bool(sep)) + return next(iter({pre} & allowed), None) + + def _download_vcs(self, url, spec_filename): + vcs = self._resolve_vcs(url) + if not vcs: + return + if vcs == 'svn': + return self._download_svn(url, spec_filename) + + filename, _, _ = spec_filename.partition('#') + url, rev = self._vcs_split_rev_from_url(url) + + self.info(f"Doing {vcs} clone from {url} to {filename}") + subprocess.check_call([vcs, 'clone', '--quiet', url, filename]) + + co_commands = dict( + git=[vcs, '-C', filename, 'checkout', '--quiet', rev], + hg=[vcs, '--cwd', filename, 'up', '-C', '-r', rev, '-q'], + ) + if rev is not None: + self.info(f"Checking out {rev}") + subprocess.check_call(co_commands[vcs]) + + return filename + + def _download_other(self, url, filename): + scheme = urllib.parse.urlsplit(url).scheme + if scheme == 'file': # pragma: no cover + return urllib.request.url2pathname(urllib.parse.urlparse(url).path) + # raise error if not allowed + self.url_ok(url, True) + return self._attempt_download(url, filename) def scan_url(self, url): self.process_url(url, True) @@ -842,7 +881,7 @@ class PackageIndex(Environment): def _download_svn(self, url, filename): warnings.warn("SVN download support is deprecated", UserWarning) url = url.split('#', 1)[0] # remove any fragment for svn's sake - creds = '' + creds = [] if url.lower().startswith('svn:') and '@' in url: scheme, netloc, path, p, q, f = urllib.parse.urlparse(url) if not netloc and path.startswith('//') and '/' in path[2:]: @@ -851,65 +890,49 @@ class PackageIndex(Environment): if auth: if ':' in auth: user, pw = auth.split(':', 1) - creds = " --username=%s --password=%s" % (user, pw) + creds.extend(["--username", user, "--password", pw]) else: - creds = " --username=" + auth + creds.extend(["--username", auth]) netloc = host parts = scheme, netloc, url, p, q, f url = urllib.parse.urlunparse(parts) self.info("Doing subversion checkout from %s to %s", url, filename) - os.system("svn checkout%s -q %s %s" % (creds, url, filename)) + cmd = ["svn", "checkout", "-q"] + creds + [url, filename] + subprocess.check_call(cmd) + return filename @staticmethod - def _vcs_split_rev_from_url(url, pop_prefix=False): - scheme, netloc, path, query, frag = urllib.parse.urlsplit(url) + def _vcs_split_rev_from_url(url): + """ + Given a possible VCS URL, return a clean URL and resolved revision if any. + + >>> vsrfu = PackageIndex._vcs_split_rev_from_url + >>> vsrfu('git+https://github.com/pypa/setuptools@v69.0.0#egg-info=setuptools') + ('https://github.com/pypa/setuptools', 'v69.0.0') + >>> vsrfu('git+https://github.com/pypa/setuptools#egg-info=setuptools') + ('https://github.com/pypa/setuptools', None) + >>> vsrfu('http://foo/bar') + ('http://foo/bar', None) + """ + parts = urllib.parse.urlsplit(url) - scheme = scheme.split('+', 1)[-1] + clean_scheme = parts.scheme.split('+', 1)[-1] # Some fragment identification fails - path = path.split('#', 1)[0] - - rev = None - if '@' in path: - path, rev = path.rsplit('@', 1) + no_fragment_path, _, _ = parts.path.partition('#') - # Also, discard fragment - url = urllib.parse.urlunsplit((scheme, netloc, path, query, '')) + pre, sep, post = no_fragment_path.rpartition('@') + clean_path, rev = (pre, post) if sep else (post, None) - return url, rev - - def _download_git(self, url, filename): - filename = filename.split('#', 1)[0] - url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True) - - self.info("Doing git clone from %s to %s", url, filename) - os.system("git clone --quiet %s %s" % (url, filename)) - - if rev is not None: - self.info("Checking out %s", rev) - os.system("git -C %s checkout --quiet %s" % ( - filename, - rev, - )) + resolved = parts._replace( + scheme=clean_scheme, + path=clean_path, + # discard the fragment + fragment='', + ).geturl() - return filename - - def _download_hg(self, url, filename): - filename = filename.split('#', 1)[0] - url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True) - - self.info("Doing hg clone from %s to %s", url, filename) - os.system("hg clone --quiet %s %s" % (url, filename)) - - if rev is not None: - self.info("Updating to %s", rev) - os.system("hg --cwd %s up -C -r %s -q" % ( - filename, - rev, - )) - - return filename + return resolved, rev def debug(self, msg, *args): log.debug(msg, *args) diff --git a/setuptools/tests/test_packageindex.py b/setuptools/tests/test_packageindex.py index 8e9435e..cc7e86c 100644 --- a/setuptools/tests/test_packageindex.py +++ b/setuptools/tests/test_packageindex.py @@ -6,7 +6,6 @@ import urllib.request import urllib.error import http.client -import mock import pytest import setuptools.package_index @@ -193,61 +192,60 @@ class TestPackageIndex: assert dists[0].version == '' assert dists[1].version == vc - def test_download_git_with_rev(self, tmpdir): + def test_download_git_with_rev(self, tmp_path, fp): url = 'git+https://github.example/group/project@master#egg=foo' index = setuptools.package_index.PackageIndex() - with mock.patch("os.system") as os_system_mock: - result = index.download(url, str(tmpdir)) + expected_dir = tmp_path / 'project@master' + fp.register([ + 'git', + 'clone', + '--quiet', + 'https://github.example/group/project', + expected_dir, + ]) + fp.register(['git', '-C', expected_dir, 'checkout', '--quiet', 'master']) - os_system_mock.assert_called() + result = index.download(url, tmp_path) - expected_dir = str(tmpdir / 'project@master') - expected = ( - 'git clone --quiet ' - 'https://github.example/group/project {expected_dir}' - ).format(**locals()) - first_call_args = os_system_mock.call_args_list[0][0] - assert first_call_args == (expected,) + assert result == str(expected_dir) + assert len(fp.calls) == 2 - tmpl = 'git -C {expected_dir} checkout --quiet master' - expected = tmpl.format(**locals()) - assert os_system_mock.call_args_list[1][0] == (expected,) - assert result == expected_dir - - def test_download_git_no_rev(self, tmpdir): + def test_download_git_no_rev(self, tmp_path, fp): url = 'git+https://github.example/group/project#egg=foo' index = setuptools.package_index.PackageIndex() - with mock.patch("os.system") as os_system_mock: - result = index.download(url, str(tmpdir)) - - os_system_mock.assert_called() + expected_dir = tmp_path / 'project' + fp.register([ + 'git', + 'clone', + '--quiet', + 'https://github.example/group/project', + expected_dir, + ]) + result = index.download(url, tmp_path) - expected_dir = str(tmpdir / 'project') - expected = ( - 'git clone --quiet ' - 'https://github.example/group/project {expected_dir}' - ).format(**locals()) - os_system_mock.assert_called_once_with(expected) + assert result == str(expected_dir) + assert len(fp.calls) == 1 - def test_download_svn(self, tmpdir): + def test_download_svn(self, tmp_path, fp): url = 'svn+https://svn.example/project#egg=foo' index = setuptools.package_index.PackageIndex() - with pytest.warns(UserWarning): - with mock.patch("os.system") as os_system_mock: - result = index.download(url, str(tmpdir)) - - os_system_mock.assert_called() + expected_dir = tmp_path / 'project' + fp.register([ + 'svn', + 'checkout', + '-q', + 'svn+https://svn.example/project', + expected_dir, + ]) - expected_dir = str(tmpdir / 'project') - expected = ( - 'svn checkout -q ' - 'svn+https://svn.example/project {expected_dir}' - ).format(**locals()) - os_system_mock.assert_called_once_with(expected) + with pytest.warns(UserWarning, match="SVN download support is deprecated"): + result = index.download(url, tmp_path) + assert result == str(expected_dir) + assert len(fp.calls) == 1 class TestContentCheckers: def test_md5(self): -- 2.40.0