1515 Oracle ,
1616 Posix ,
1717 Redhat ,
18+ Suse ,
1819 Ubuntu ,
1920)
2021
@@ -303,6 +304,10 @@ class NvidiaCudaDriver(GpuDriver):
303304 """
304305
305306 DEFAULT_CUDA_VERSION = "10.1.243-1"
307+ DEFAULT_CUDA_PACKAGE = "cuda-drivers"
308+ NVIDIA_CUDA_REPO_BASE_URL = (
309+ "https://developer.download.nvidia.com/compute/cuda/repos"
310+ )
306311
307312 @classmethod
308313 def type_name (cls ) -> str :
@@ -406,10 +411,11 @@ def _install_dependencies(self) -> None:
406411 if release == "7" :
407412 assert isinstance (self .node .os , Posix )
408413 self ._log .debug ("Installing vulkan-filesystem for CentOS 7" )
409- self . node . os . _install_package_from_url (
414+ package_url = (
410415 "https://vault.centos.org/centos/7/os/x86_64/Packages/"
411416 "vulkan-filesystem-1.1.97.0-1.el7.noarch.rpm"
412417 )
418+ self .node .os .install_package_from_url (package_url , signed = False )
413419
414420 def _install_driver (self ) -> None :
415421 """Install CUDA driver based on OS"""
@@ -436,8 +442,8 @@ def _install_cuda_redhat(self) -> None:
436442
437443 # Add CUDA repository
438444 self .node .os .add_repository (
439- f"http://developer.download.nvidia.com/compute/cuda /"
440- f"repos/ rhel{ release } /x86_64/cuda-rhel{ release } .repo"
445+ f"{ self . NVIDIA_CUDA_REPO_BASE_URL } /"
446+ f"rhel{ release } /x86_64/cuda-rhel{ release } .repo"
441447 )
442448
443449 # Install CUDA packages
@@ -450,108 +456,93 @@ def _install_cuda_redhat(self) -> None:
450456
451457 def _install_cuda_ubuntu (self ) -> None :
452458 """Install CUDA driver on Ubuntu"""
453- self ._log .debug ("Installing CUDA driver for Ubuntu" )
454-
455- assert isinstance (self .node .os , Posix ), "CUDA installation requires Posix OS"
456-
457- cuda_package_name = "cuda-drivers"
458- cuda_drivers_package_pattern = re .compile (
459- r"^cuda-drivers-(\d+)/.*$" , re .MULTILINE
460- )
461-
462459 os_info = self .node .os .information
463- release = re .sub ("[^0-9]+" , "" , os_info .release )
464460
465- # Handle unsupported releases by using closest supported version
466- if release in [ "2110" , "2104" ]:
467- release = "2004"
468- if release in [ "2210" , "2304" , "2310" ]:
469- release = "2204"
461+ # NVIDIA only provides CUDA repos for LTS releases (even major version, XX.04)
462+ # For non-LTS releases or odd major versions, use the previous LTS release
463+ # e.g., 21.10 -> 20.04, 22.10 -> 22.04, 23.04 -> 22.04, 23.10 -> 22.04
464+ major_version = os_info . version . major
465+ minor_version = os_info . version . minor
470466
471- # Install CUDA public GPG key
472- cuda_keyring = "cuda-keyring_1.1-1_all.deb"
473- self .node .tools [Wget ].get (
474- f"https://developer.download.nvidia.com/compute/cuda/repos/"
475- f"ubuntu{ release } /x86_64/{ cuda_keyring } "
476- )
477- self .node .execute (
478- f"dpkg -i { cuda_keyring } " ,
479- sudo = True ,
480- cwd = self .node .get_working_path (),
481- )
467+ # If odd major version (e.g., 21, 23), use previous even major version
468+ if major_version % 2 == 1 :
469+ major_version = major_version - 1
470+ release = f"{ major_version } 04"
471+ self ._log .debug (
472+ f"Using previous LTS release { release } for CUDA repository "
473+ f"(original: { os_info .release } , odd major version)"
474+ )
475+ elif minor_version != 4 :
476+ # Even major but non-LTS release (not XX.04), use corresponding LTS
477+ release = f"{ major_version } 04"
478+ self ._log .debug (
479+ f"Using LTS release { release } for CUDA repository "
480+ f"(original: { os_info .release } )"
481+ )
482+ else :
483+ # LTS release (even major, XX.04)
484+ release = f"{ major_version } { minor_version :0>2} "
482485
483486 # For Ubuntu 16.04, use legacy installation method
484487 if release == "1604" :
485- cuda_repo_pkg = (
486- f"cuda-repo-ubuntu{ release } _" f"{ self .DEFAULT_CUDA_VERSION } _amd64.deb"
487- )
488- cuda_repo = (
489- f"http://developer.download.nvidia.com/compute/cuda/repos/"
490- f"ubuntu{ release } /x86_64/{ cuda_repo_pkg } "
488+ self ._log .debug ("Installing CUDA driver for Ubuntu 16.04 (legacy method)" )
489+
490+ assert isinstance (self .node .os , Ubuntu ), "Ubuntu installation expected"
491+
492+ # Install CUDA keyring
493+ cuda_keyring_url = (
494+ f"{ self .NVIDIA_CUDA_REPO_BASE_URL } /"
495+ f"ubuntu{ release } /x86_64/cuda-keyring_1.1-1_all.deb"
491496 )
492- self .node .os ._install_package_from_url (
493- cuda_repo , package_name = "cuda-drivers.deb" , signed = False
494- )
495- else :
496- # Modern Ubuntu versions
497- self .node .tools [Wget ].get (
498- f"https://developer.download.nvidia.com/compute/cuda/repos/"
499- f"ubuntu{ release } /x86_64/cuda-ubuntu{ release } .pin" ,
500- "/etc/apt/preferences.d" ,
501- "cuda-repository-pin-600" ,
502- sudo = True ,
503- overwrite = False ,
497+ self .node .os .install_package_from_url (
498+ cuda_keyring_url ,
499+ package_name = "cuda-keyring.deb" ,
500+ signed = False ,
504501 )
505502
506- # Add CUDA repository
507- repo_entry = (
508- f"deb http://developer.download.nvidia.com/compute/cuda/repos/"
509- f"ubuntu{ release } /x86_64/ /"
503+ # Install CUDA repository package
504+ cuda_repo_url = (
505+ f"{ self .NVIDIA_CUDA_REPO_BASE_URL } /"
506+ f"ubuntu{ release } /x86_64/cuda-repo-ubuntu{ release } _"
507+ f"{ self .DEFAULT_CUDA_VERSION } _amd64.deb"
510508 )
511- self .node .execute (
512- f'add-apt-repository -y "{ repo_entry } "' ,
513- sudo = True ,
514- expected_exit_code = 0 ,
515- expected_exit_code_failure_message = f"failed to add repo { repo_entry } " ,
509+ self .node .os .install_package_from_url (
510+ cuda_repo_url ,
511+ package_name = "cuda-drivers.deb" ,
512+ signed = False ,
516513 )
517-
518- # Find available CUDA driver versions
519- result = self .node .execute (f"apt search { cuda_package_name } " , sudo = True )
520- available_versions = cuda_drivers_package_pattern .findall (result .stdout )
521-
522- if available_versions :
523- # Sort versions and select the highest one
524- highest_version = max (available_versions , key = int )
525- package_name = f"{ cuda_package_name } -{ highest_version } "
526- else :
527- raise MissingPackagesException ([cuda_package_name ])
528-
529- self .node .os .install_packages (package_name )
514+ else :
515+ # Modern Ubuntu versions use the same method as Debian
516+ self ._install_cuda_debian_based (f"ubuntu{ release } " )
530517
531518 self ._log .info ("Successfully installed CUDA driver for Ubuntu" )
532519
533520 def _install_cuda_debian (self ) -> None :
534521 """Install CUDA driver on Debian"""
535- self ._log .debug ("Installing CUDA driver for Debian" )
522+ os_info = self .node .os .information
523+ major_version = str (os_info .version .major )
524+ release = f"debian{ major_version } "
525+ self ._install_cuda_debian_based (release )
526+ self ._log .info ("Successfully installed CUDA driver for Debian" )
527+
528+ def _install_cuda_debian_based (self , release : str ) -> None :
529+ """
530+ Shared installation method for Debian-based distributions.
531+ Supports both Debian and Ubuntu.
532+ """
533+ self ._log .debug (f"Installing CUDA driver for { release } " )
536534
537535 assert isinstance (self .node .os , Posix ), "CUDA installation requires Posix OS"
538536
539- cuda_package_name = "cuda-drivers"
537+ cuda_package_name = self . DEFAULT_CUDA_PACKAGE
540538 cuda_drivers_package_pattern = re .compile (
541- r"^cuda-drivers -(\d+)\s" , re .MULTILINE
539+ rf"^ { cuda_package_name } -(\d+)\s" , re .MULTILINE
542540 )
543541
544- os_info = self .node .os .information
545- major_version = str (os_info .version .major )
546-
547- # Debian CUDA repos follow pattern: debian{major_version}
548- release = f"debian{ major_version } "
549-
550542 # Install CUDA public GPG key
551543 cuda_keyring = "cuda-keyring_1.1-1_all.deb"
552544 self .node .tools [Wget ].get (
553- f"https://developer.download.nvidia.com/compute/cuda/repos/"
554- f"{ release } /x86_64/{ cuda_keyring } "
545+ f"{ self .NVIDIA_CUDA_REPO_BASE_URL } /{ release } /x86_64/{ cuda_keyring } "
555546 )
556547 self .node .execute (
557548 f"dpkg -i { cuda_keyring } " ,
@@ -562,7 +553,7 @@ def _install_cuda_debian(self) -> None:
562553 # Add CUDA repository
563554 repo_entry = (
564555 f"deb [signed-by=/usr/share/keyrings/cuda-archive-keyring.gpg] "
565- f"https://developer.download.nvidia.com/compute/cuda/repos /"
556+ f"{ self . NVIDIA_CUDA_REPO_BASE_URL } /"
566557 f"{ release } /x86_64/ /"
567558 )
568559
@@ -593,8 +584,6 @@ def _install_cuda_debian(self) -> None:
593584
594585 self .node .os .install_packages (package_name )
595586
596- self ._log .info ("Successfully installed CUDA driver for Debian" )
597-
598587 def _install_cuda_mariner (self ) -> None :
599588 """Install CUDA driver on CBL-Mariner"""
600589 self ._log .debug ("Installing CUDA driver for CBL-Mariner" )
0 commit comments