Skip to content

Commit ecf55d7

Browse files
committed
GPU case: refactor Ubuntu Debian Installation
1 parent 1e70265 commit ecf55d7

File tree

4 files changed

+79
-90
lines changed

4 files changed

+79
-90
lines changed

lisa/microsoft/testsuites/cvm/cvm_attestation_tool.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def _install(self) -> bool:
8989

9090
git.clone(self.repo, root_path)
9191

92-
posix_os._install_package_from_url(
92+
posix_os.install_package_from_url(
9393
self.deb_file, package_name="azguestattestation1.deb"
9494
)
9595
self.node.execute(

lisa/microsoft/testsuites/gpu/gpusuite.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
Debian,
2424
Linux,
2525
Oracle,
26-
Suse,
2726
Ubuntu,
2827
Windows,
2928
)
@@ -33,6 +32,7 @@
3332
from lisa.tools.python import PythonVenv
3433
from lisa.util import UnsupportedOperationException, get_matched_str
3534

35+
3636
_cudnn_location = (
3737
"https://developer.download.nvidia.com/compute/redist/cudnn/"
3838
"v7.5.0/cudnn-10.0-linux-x64-v7.5.0.56.tgz"
@@ -77,7 +77,7 @@ def before_case(self, log: Logger, **kwargs: Any) -> None:
7777
timeout=TIMEOUT,
7878
requirement=simple_requirement(
7979
supported_features=[GpuEnabled(), SerialConsole, AzureExtension],
80-
unsupported_os=[Suse, AlmaLinux, Oracle],
80+
unsupported_os=[AlmaLinux, Oracle],
8181
),
8282
priority=1,
8383
)
@@ -143,7 +143,7 @@ def verify_max_gpu_provision(self, node: Node, log: Logger) -> None:
143143
timeout=TIMEOUT,
144144
requirement=simple_requirement(
145145
supported_features=[GpuEnabled(), SerialConsole, AzureExtension],
146-
unsupported_os=[Suse, AlmaLinux, Oracle],
146+
unsupported_os=[AlmaLinux, Oracle],
147147
),
148148
priority=2,
149149
)
@@ -178,7 +178,7 @@ def verify_gpu_extension_installation(
178178
timeout=TIMEOUT,
179179
requirement=simple_requirement(
180180
supported_features=[GpuEnabled()],
181-
unsupported_os=[Suse, AlmaLinux, Oracle],
181+
unsupported_os=[AlmaLinux, Oracle],
182182
),
183183
priority=2,
184184
)
@@ -254,7 +254,7 @@ def verify_gpu_rescind_validation(
254254
priority=3,
255255
requirement=simple_requirement(
256256
supported_features=[GpuEnabled()],
257-
unsupported_os=[Suse, AlmaLinux, Oracle],
257+
unsupported_os=[AlmaLinux, Oracle],
258258
),
259259
)
260260
def verify_gpu_cuda_with_pytorch(

lisa/operating_system.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -687,7 +687,7 @@ def _get_package_list(
687687
self._initialize_package_installation()
688688
return package_names
689689

690-
def _install_package_from_url(
690+
def install_package_from_url(
691691
self,
692692
package_url: str,
693693
package_name: str = "",

lisa/tools/gpu_drivers.py

Lines changed: 72 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
Oracle,
1616
Posix,
1717
Redhat,
18+
Suse,
1819
Ubuntu,
1920
)
2021

@@ -303,6 +304,10 @@ class NvidiaCudaDriver(GpuDriver):
303304
"""
304305

305306
DEFAULT_CUDA_VERSION = "10.1.243-1"
307+
DEFAULT_CUDA_PACKAGE = "cuda-drivers"
308+
NVIDIA_CUDA_REPO_BASE_URL = (
309+
"https://developer.download.nvidia.com/compute/cuda/repos"
310+
)
306311

307312
@classmethod
308313
def type_name(cls) -> str:
@@ -406,10 +411,11 @@ def _install_dependencies(self) -> None:
406411
if release == "7":
407412
assert isinstance(self.node.os, Posix)
408413
self._log.debug("Installing vulkan-filesystem for CentOS 7")
409-
self.node.os._install_package_from_url(
414+
package_url = (
410415
"https://vault.centos.org/centos/7/os/x86_64/Packages/"
411416
"vulkan-filesystem-1.1.97.0-1.el7.noarch.rpm"
412417
)
418+
self.node.os.install_package_from_url(package_url, signed=False)
413419

414420
def _install_driver(self) -> None:
415421
"""Install CUDA driver based on OS"""
@@ -436,8 +442,8 @@ def _install_cuda_redhat(self) -> None:
436442

437443
# Add CUDA repository
438444
self.node.os.add_repository(
439-
f"http://developer.download.nvidia.com/compute/cuda/"
440-
f"repos/rhel{release}/x86_64/cuda-rhel{release}.repo"
445+
f"{self.NVIDIA_CUDA_REPO_BASE_URL}/"
446+
f"rhel{release}/x86_64/cuda-rhel{release}.repo"
441447
)
442448

443449
# Install CUDA packages
@@ -450,108 +456,93 @@ def _install_cuda_redhat(self) -> None:
450456

451457
def _install_cuda_ubuntu(self) -> None:
452458
"""Install CUDA driver on Ubuntu"""
453-
self._log.debug("Installing CUDA driver for Ubuntu")
454-
455-
assert isinstance(self.node.os, Posix), "CUDA installation requires Posix OS"
456-
457-
cuda_package_name = "cuda-drivers"
458-
cuda_drivers_package_pattern = re.compile(
459-
r"^cuda-drivers-(\d+)/.*$", re.MULTILINE
460-
)
461-
462459
os_info = self.node.os.information
463-
release = re.sub("[^0-9]+", "", os_info.release)
464460

465-
# Handle unsupported releases by using closest supported version
466-
if release in ["2110", "2104"]:
467-
release = "2004"
468-
if release in ["2210", "2304", "2310"]:
469-
release = "2204"
461+
# NVIDIA only provides CUDA repos for LTS releases (even major version, XX.04)
462+
# For non-LTS releases or odd major versions, use the previous LTS release
463+
# e.g., 21.10 -> 20.04, 22.10 -> 22.04, 23.04 -> 22.04, 23.10 -> 22.04
464+
major_version = os_info.version.major
465+
minor_version = os_info.version.minor
470466

471-
# Install CUDA public GPG key
472-
cuda_keyring = "cuda-keyring_1.1-1_all.deb"
473-
self.node.tools[Wget].get(
474-
f"https://developer.download.nvidia.com/compute/cuda/repos/"
475-
f"ubuntu{release}/x86_64/{cuda_keyring}"
476-
)
477-
self.node.execute(
478-
f"dpkg -i {cuda_keyring}",
479-
sudo=True,
480-
cwd=self.node.get_working_path(),
481-
)
467+
# If odd major version (e.g., 21, 23), use previous even major version
468+
if major_version % 2 == 1:
469+
major_version = major_version - 1
470+
release = f"{major_version}04"
471+
self._log.debug(
472+
f"Using previous LTS release {release} for CUDA repository "
473+
f"(original: {os_info.release}, odd major version)"
474+
)
475+
elif minor_version != 4:
476+
# Even major but non-LTS release (not XX.04), use corresponding LTS
477+
release = f"{major_version}04"
478+
self._log.debug(
479+
f"Using LTS release {release} for CUDA repository "
480+
f"(original: {os_info.release})"
481+
)
482+
else:
483+
# LTS release (even major, XX.04)
484+
release = f"{major_version}{minor_version:0>2}"
482485

483486
# For Ubuntu 16.04, use legacy installation method
484487
if release == "1604":
485-
cuda_repo_pkg = (
486-
f"cuda-repo-ubuntu{release}_" f"{self.DEFAULT_CUDA_VERSION}_amd64.deb"
487-
)
488-
cuda_repo = (
489-
f"http://developer.download.nvidia.com/compute/cuda/repos/"
490-
f"ubuntu{release}/x86_64/{cuda_repo_pkg}"
488+
self._log.debug("Installing CUDA driver for Ubuntu 16.04 (legacy method)")
489+
490+
assert isinstance(self.node.os, Ubuntu), "Ubuntu installation expected"
491+
492+
# Install CUDA keyring
493+
cuda_keyring_url = (
494+
f"{self.NVIDIA_CUDA_REPO_BASE_URL}/"
495+
f"ubuntu{release}/x86_64/cuda-keyring_1.1-1_all.deb"
491496
)
492-
self.node.os._install_package_from_url(
493-
cuda_repo, package_name="cuda-drivers.deb", signed=False
494-
)
495-
else:
496-
# Modern Ubuntu versions
497-
self.node.tools[Wget].get(
498-
f"https://developer.download.nvidia.com/compute/cuda/repos/"
499-
f"ubuntu{release}/x86_64/cuda-ubuntu{release}.pin",
500-
"/etc/apt/preferences.d",
501-
"cuda-repository-pin-600",
502-
sudo=True,
503-
overwrite=False,
497+
self.node.os.install_package_from_url(
498+
cuda_keyring_url,
499+
package_name="cuda-keyring.deb",
500+
signed=False,
504501
)
505502

506-
# Add CUDA repository
507-
repo_entry = (
508-
f"deb http://developer.download.nvidia.com/compute/cuda/repos/"
509-
f"ubuntu{release}/x86_64/ /"
503+
# Install CUDA repository package
504+
cuda_repo_url = (
505+
f"{self.NVIDIA_CUDA_REPO_BASE_URL}/"
506+
f"ubuntu{release}/x86_64/cuda-repo-ubuntu{release}_"
507+
f"{self.DEFAULT_CUDA_VERSION}_amd64.deb"
510508
)
511-
self.node.execute(
512-
f'add-apt-repository -y "{repo_entry}"',
513-
sudo=True,
514-
expected_exit_code=0,
515-
expected_exit_code_failure_message=f"failed to add repo {repo_entry}",
509+
self.node.os.install_package_from_url(
510+
cuda_repo_url,
511+
package_name="cuda-drivers.deb",
512+
signed=False,
516513
)
517-
518-
# Find available CUDA driver versions
519-
result = self.node.execute(f"apt search {cuda_package_name}", sudo=True)
520-
available_versions = cuda_drivers_package_pattern.findall(result.stdout)
521-
522-
if available_versions:
523-
# Sort versions and select the highest one
524-
highest_version = max(available_versions, key=int)
525-
package_name = f"{cuda_package_name}-{highest_version}"
526-
else:
527-
raise MissingPackagesException([cuda_package_name])
528-
529-
self.node.os.install_packages(package_name)
514+
else:
515+
# Modern Ubuntu versions use the same method as Debian
516+
self._install_cuda_debian_based(f"ubuntu{release}")
530517

531518
self._log.info("Successfully installed CUDA driver for Ubuntu")
532519

533520
def _install_cuda_debian(self) -> None:
534521
"""Install CUDA driver on Debian"""
535-
self._log.debug("Installing CUDA driver for Debian")
522+
os_info = self.node.os.information
523+
major_version = str(os_info.version.major)
524+
release = f"debian{major_version}"
525+
self._install_cuda_debian_based(release)
526+
self._log.info("Successfully installed CUDA driver for Debian")
527+
528+
def _install_cuda_debian_based(self, release: str) -> None:
529+
"""
530+
Shared installation method for Debian-based distributions.
531+
Supports both Debian and Ubuntu.
532+
"""
533+
self._log.debug(f"Installing CUDA driver for {release}")
536534

537535
assert isinstance(self.node.os, Posix), "CUDA installation requires Posix OS"
538536

539-
cuda_package_name = "cuda-drivers"
537+
cuda_package_name = self.DEFAULT_CUDA_PACKAGE
540538
cuda_drivers_package_pattern = re.compile(
541-
r"^cuda-drivers-(\d+)\s", re.MULTILINE
539+
rf"^{cuda_package_name}-(\d+)\s", re.MULTILINE
542540
)
543541

544-
os_info = self.node.os.information
545-
major_version = str(os_info.version.major)
546-
547-
# Debian CUDA repos follow pattern: debian{major_version}
548-
release = f"debian{major_version}"
549-
550542
# Install CUDA public GPG key
551543
cuda_keyring = "cuda-keyring_1.1-1_all.deb"
552544
self.node.tools[Wget].get(
553-
f"https://developer.download.nvidia.com/compute/cuda/repos/"
554-
f"{release}/x86_64/{cuda_keyring}"
545+
f"{self.NVIDIA_CUDA_REPO_BASE_URL}/{release}/x86_64/{cuda_keyring}"
555546
)
556547
self.node.execute(
557548
f"dpkg -i {cuda_keyring}",
@@ -562,7 +553,7 @@ def _install_cuda_debian(self) -> None:
562553
# Add CUDA repository
563554
repo_entry = (
564555
f"deb [signed-by=/usr/share/keyrings/cuda-archive-keyring.gpg] "
565-
f"https://developer.download.nvidia.com/compute/cuda/repos/"
556+
f"{self.NVIDIA_CUDA_REPO_BASE_URL}/"
566557
f"{release}/x86_64/ /"
567558
)
568559

@@ -593,8 +584,6 @@ def _install_cuda_debian(self) -> None:
593584

594585
self.node.os.install_packages(package_name)
595586

596-
self._log.info("Successfully installed CUDA driver for Debian")
597-
598587
def _install_cuda_mariner(self) -> None:
599588
"""Install CUDA driver on CBL-Mariner"""
600589
self._log.debug("Installing CUDA driver for CBL-Mariner")

0 commit comments

Comments
 (0)