Skip to content

Commit b35f262

Browse files
committed
GPU case: Add SUSE support, refactor Ubuntu Debian Installation
1 parent 0afeb41 commit b35f262

File tree

4 files changed

+98
-83
lines changed

4 files changed

+98
-83
lines changed

lisa/microsoft/testsuites/cvm/cvm_attestation_tool.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def _install(self) -> bool:
8989

9090
git.clone(self.repo, root_path)
9191

92-
posix_os._install_package_from_url(
92+
posix_os.install_package_from_url(
9393
self.deb_file, package_name="azguestattestation1.deb"
9494
)
9595
self.node.execute(

lisa/microsoft/testsuites/gpu/gpusuite.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
Debian,
2424
Linux,
2525
Oracle,
26-
Suse,
2726
Ubuntu,
2827
Windows,
2928
)
@@ -77,7 +76,7 @@ def before_case(self, log: Logger, **kwargs: Any) -> None:
7776
timeout=TIMEOUT,
7877
requirement=simple_requirement(
7978
supported_features=[GpuEnabled(), SerialConsole, AzureExtension],
80-
unsupported_os=[Suse, AlmaLinux, Oracle],
79+
unsupported_os=[AlmaLinux, Oracle],
8180
),
8281
priority=1,
8382
)
@@ -143,7 +142,7 @@ def verify_max_gpu_provision(self, node: Node, log: Logger) -> None:
143142
timeout=TIMEOUT,
144143
requirement=simple_requirement(
145144
supported_features=[GpuEnabled(), SerialConsole, AzureExtension],
146-
unsupported_os=[Suse, AlmaLinux, Oracle],
145+
unsupported_os=[AlmaLinux, Oracle],
147146
),
148147
priority=2,
149148
)
@@ -178,7 +177,7 @@ def verify_gpu_extension_installation(
178177
timeout=TIMEOUT,
179178
requirement=simple_requirement(
180179
supported_features=[GpuEnabled()],
181-
unsupported_os=[Suse, AlmaLinux, Oracle],
180+
unsupported_os=[AlmaLinux, Oracle],
182181
),
183182
priority=2,
184183
)
@@ -254,7 +253,7 @@ def verify_gpu_rescind_validation(
254253
priority=3,
255254
requirement=simple_requirement(
256255
supported_features=[GpuEnabled()],
257-
unsupported_os=[Suse, AlmaLinux, Oracle],
256+
unsupported_os=[AlmaLinux, Oracle],
258257
),
259258
)
260259
def verify_gpu_cuda_with_pytorch(

lisa/operating_system.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -687,7 +687,7 @@ def _get_package_list(
687687
self._initialize_package_installation()
688688
return package_names
689689

690-
def _install_package_from_url(
690+
def install_package_from_url(
691691
self,
692692
package_url: str,
693693
package_name: str = "",

lisa/tools/gpu_drivers.py

Lines changed: 92 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
Oracle,
1616
Posix,
1717
Redhat,
18+
Suse,
1819
Ubuntu,
1920
)
2021

@@ -303,6 +304,9 @@ class NvidiaCudaDriver(GpuDriver):
303304
"""
304305

305306
DEFAULT_CUDA_VERSION = "10.1.243-1"
307+
NVIDIA_CUDA_REPO_BASE_URL = (
308+
"https://developer.download.nvidia.com/compute/cuda/repos"
309+
)
306310

307311
@classmethod
308312
def type_name(cls) -> str:
@@ -331,6 +335,8 @@ def can_install(self) -> bool:
331335
# Only Debian 10, 11, 12 are currently supported by NVIDIA CUDA repos
332336
major_version = os_info.version.major
333337
return bool(os_info.version >= "10.0.0" and major_version <= 12)
338+
elif isinstance(self.node.os, Suse):
339+
return bool(os_info.version >= "15.0.0")
334340
elif isinstance(self.node.os, CBLMariner):
335341
return bool(os_info.version >= "2.0.0")
336342

@@ -406,10 +412,14 @@ def _install_dependencies(self) -> None:
406412
if release == "7":
407413
assert isinstance(self.node.os, Posix)
408414
self._log.debug("Installing vulkan-filesystem for CentOS 7")
409-
self.node.os._install_package_from_url(
415+
package_url = (
410416
"https://vault.centos.org/centos/7/os/x86_64/Packages/"
411417
"vulkan-filesystem-1.1.97.0-1.el7.noarch.rpm"
412418
)
419+
pkg = self.node.tools[Wget].get(
420+
package_url, str(self.node.working_path)
421+
)
422+
self.node.os.install_packages(pkg, signed=False)
413423

414424
def _install_driver(self) -> None:
415425
"""Install CUDA driver based on OS"""
@@ -419,6 +429,8 @@ def _install_driver(self) -> None:
419429
self._install_cuda_ubuntu()
420430
elif isinstance(self.node.os, Debian):
421431
self._install_cuda_debian()
432+
elif isinstance(self.node.os, Suse):
433+
self._install_cuda_suse()
422434
elif isinstance(self.node.os, CBLMariner):
423435
self._install_cuda_mariner()
424436
else:
@@ -448,91 +460,104 @@ def _install_cuda_redhat(self) -> None:
448460
self.node.os.install_packages(packages, signed=False)
449461
self._log.info(f"Successfully installed CUDA driver packages: {packages}")
450462

451-
def _install_cuda_ubuntu(self) -> None:
452-
"""Install CUDA driver on Ubuntu"""
453-
self._log.debug("Installing CUDA driver for Ubuntu")
463+
def _install_cuda_suse(self) -> None:
464+
"""Install CUDA driver on SUSE"""
465+
self._log.debug("Installing CUDA driver for SUSE")
454466

455467
assert isinstance(self.node.os, Posix), "CUDA installation requires Posix OS"
456468

457-
cuda_package_name = "cuda-drivers"
458-
cuda_drivers_package_pattern = re.compile(
459-
r"^cuda-drivers-(\d+)/.*$", re.MULTILINE
469+
os_info = self.node.os.information
470+
major_version = str(os_info.version.major)
471+
472+
# Add CUDA repository for SUSE
473+
# SUSE repos follow pattern: sles{major_version}
474+
self.node.os.add_repository(
475+
f"http://developer.download.nvidia.com/compute/cuda/"
476+
f"repos/sles{major_version}/x86_64/cuda-sles{major_version}.repo"
460477
)
461478

479+
# Install CUDA packages
480+
packages = ["nvidia-driver-cuda"]
481+
self.node.os.install_packages(packages, signed=False)
482+
self._log.info(f"Successfully installed CUDA driver packages: {packages}")
483+
484+
def _install_cuda_ubuntu(self) -> None:
485+
"""Install CUDA driver on Ubuntu"""
462486
os_info = self.node.os.information
463-
release = re.sub("[^0-9]+", "", os_info.release)
464487

465-
# Handle unsupported releases by using closest supported version
466-
if release in ["2110", "2104"]:
467-
release = "2004"
468-
if release in ["2210", "2304", "2310"]:
469-
release = "2204"
488+
# NVIDIA only provides CUDA repos for LTS releases (even major version, XX.04)
489+
# For non-LTS releases or odd major versions, use the previous LTS release
490+
# e.g., 21.10 -> 20.04, 22.10 -> 22.04, 23.04 -> 22.04, 23.10 -> 22.04
491+
major_version = os_info.version.major
492+
minor_version = os_info.version.minor
470493

471-
# Install CUDA public GPG key
472-
cuda_keyring = "cuda-keyring_1.1-1_all.deb"
473-
self.node.tools[Wget].get(
474-
f"https://developer.download.nvidia.com/compute/cuda/repos/"
475-
f"ubuntu{release}/x86_64/{cuda_keyring}"
476-
)
477-
self.node.execute(
478-
f"dpkg -i {cuda_keyring}",
479-
sudo=True,
480-
cwd=self.node.get_working_path(),
481-
)
494+
# If odd major version (e.g., 21, 23), use previous even major version
495+
if major_version % 2 == 1:
496+
major_version = major_version - 1
497+
release = f"{major_version}04"
498+
self._log.debug(
499+
f"Using previous LTS release {release} for CUDA repository "
500+
f"(original: {os_info.release}, odd major version)"
501+
)
502+
elif minor_version != 4:
503+
# Even major but non-LTS release (not XX.04), use corresponding LTS
504+
release = f"{major_version}04"
505+
self._log.debug(
506+
f"Using LTS release {release} for CUDA repository "
507+
f"(original: {os_info.release})"
508+
)
509+
else:
510+
# LTS release (even major, XX.04)
511+
release = f"{major_version}{minor_version:0>2}"
482512

483513
# For Ubuntu 16.04, use legacy installation method
484514
if release == "1604":
485-
cuda_repo_pkg = (
486-
f"cuda-repo-ubuntu{release}_" f"{self.DEFAULT_CUDA_VERSION}_amd64.deb"
487-
)
488-
cuda_repo = (
489-
f"http://developer.download.nvidia.com/compute/cuda/repos/"
490-
f"ubuntu{release}/x86_64/{cuda_repo_pkg}"
491-
)
492-
self.node.os._install_package_from_url(
493-
cuda_repo, package_name="cuda-drivers.deb", signed=False
515+
self._log.debug("Installing CUDA driver for Ubuntu 16.04 (legacy method)")
516+
517+
assert isinstance(self.node.os, Ubuntu), "Ubuntu installation expected"
518+
519+
# Install CUDA keyring
520+
cuda_keyring_url = (
521+
f"{self.NVIDIA_CUDA_REPO_BASE_URL}/"
522+
f"ubuntu{release}/x86_64/cuda-keyring_1.1-1_all.deb"
494523
)
495-
else:
496-
# Modern Ubuntu versions
497-
self.node.tools[Wget].get(
498-
f"https://developer.download.nvidia.com/compute/cuda/repos/"
499-
f"ubuntu{release}/x86_64/cuda-ubuntu{release}.pin",
500-
"/etc/apt/preferences.d",
501-
"cuda-repository-pin-600",
502-
sudo=True,
503-
overwrite=False,
524+
self.node.os.install_package_from_url(
525+
cuda_keyring_url,
526+
package_name="cuda-keyring.deb",
527+
signed=False,
504528
)
505529

506-
# Add CUDA repository
507-
repo_entry = (
508-
f"deb http://developer.download.nvidia.com/compute/cuda/repos/"
509-
f"ubuntu{release}/x86_64/ /"
530+
# Install CUDA repository package
531+
cuda_repo_url = (
532+
f"{self.NVIDIA_CUDA_REPO_BASE_URL}/"
533+
f"ubuntu{release}/x86_64/cuda-repo-ubuntu{release}_"
534+
f"{self.DEFAULT_CUDA_VERSION}_amd64.deb"
510535
)
511-
self.node.execute(
512-
f'add-apt-repository -y "{repo_entry}"',
513-
sudo=True,
514-
expected_exit_code=0,
515-
expected_exit_code_failure_message=f"failed to add repo {repo_entry}",
536+
self.node.os.install_package_from_url(
537+
cuda_repo_url,
538+
package_name="cuda-drivers.deb",
539+
signed=False,
516540
)
517-
518-
# Find available CUDA driver versions
519-
result = self.node.execute(f"apt search {cuda_package_name}", sudo=True)
520-
available_versions = cuda_drivers_package_pattern.findall(result.stdout)
521-
522-
if available_versions:
523-
# Sort versions and select the highest one
524-
highest_version = max(available_versions, key=int)
525-
package_name = f"{cuda_package_name}-{highest_version}"
526-
else:
527-
raise MissingPackagesException([cuda_package_name])
528-
529-
self.node.os.install_packages(package_name)
541+
else:
542+
# Modern Ubuntu versions use the same method as Debian
543+
self._install_cuda_debian_based(f"ubuntu{release}")
530544

531545
self._log.info("Successfully installed CUDA driver for Ubuntu")
532546

533547
def _install_cuda_debian(self) -> None:
534548
"""Install CUDA driver on Debian"""
535-
self._log.debug("Installing CUDA driver for Debian")
549+
os_info = self.node.os.information
550+
major_version = str(os_info.version.major)
551+
release = f"debian{major_version}"
552+
self._install_cuda_debian_based(release)
553+
self._log.info("Successfully installed CUDA driver for Debian")
554+
555+
def _install_cuda_debian_based(self, release: str) -> None:
556+
"""
557+
Shared installation method for Debian-based distributions.
558+
Supports both Debian and Ubuntu.
559+
"""
560+
self._log.debug(f"Installing CUDA driver for {release}")
536561

537562
assert isinstance(self.node.os, Posix), "CUDA installation requires Posix OS"
538563

@@ -541,17 +566,10 @@ def _install_cuda_debian(self) -> None:
541566
r"^cuda-drivers-(\d+)\s", re.MULTILINE
542567
)
543568

544-
os_info = self.node.os.information
545-
major_version = str(os_info.version.major)
546-
547-
# Debian CUDA repos follow pattern: debian{major_version}
548-
release = f"debian{major_version}"
549-
550569
# Install CUDA public GPG key
551570
cuda_keyring = "cuda-keyring_1.1-1_all.deb"
552571
self.node.tools[Wget].get(
553-
f"https://developer.download.nvidia.com/compute/cuda/repos/"
554-
f"{release}/x86_64/{cuda_keyring}"
572+
f"{self.NVIDIA_CUDA_REPO_BASE_URL}/{release}/x86_64/{cuda_keyring}"
555573
)
556574
self.node.execute(
557575
f"dpkg -i {cuda_keyring}",
@@ -562,7 +580,7 @@ def _install_cuda_debian(self) -> None:
562580
# Add CUDA repository
563581
repo_entry = (
564582
f"deb [signed-by=/usr/share/keyrings/cuda-archive-keyring.gpg] "
565-
f"https://developer.download.nvidia.com/compute/cuda/repos/"
583+
f"{self.NVIDIA_CUDA_REPO_BASE_URL}/"
566584
f"{release}/x86_64/ /"
567585
)
568586

@@ -593,8 +611,6 @@ def _install_cuda_debian(self) -> None:
593611

594612
self.node.os.install_packages(package_name)
595613

596-
self._log.info("Successfully installed CUDA driver for Debian")
597-
598614
def _install_cuda_mariner(self) -> None:
599615
"""Install CUDA driver on CBL-Mariner"""
600616
self._log.debug("Installing CUDA driver for CBL-Mariner")

0 commit comments

Comments
 (0)