CUDA compatibility with CTranslate2 #1086

MahmoudAshraf97 · 2024-10-24T17:16:34Z

Hi Everyone,
as per @BBC-Esq research, ctranslate2>=4.5.0 uses CuDNN v9 which requires CUDA >= 12.3.
Since most issues occur from a conflicting torch and ctranslate2 installations these are tested working combinations:

Torch Version	CT2 Version
`2..+cu121`	`<=4.4.0`
`2..+cu124`	`>=4.5.0`
`>=2.4.0`	`>=4.5.0`
`<2.4.0`	`<4.5.0`

For google colab users, the quick solution is to downgrade to 4.4.0 as of 24/10/2024 as colab uses torch==2.5.0+cu12.1

The text was updated successfully, but these errors were encountered:

jhj0517 · 2024-10-24T17:33:40Z

Thanks for the version matrix & appreciate your work!

Currently colab uses CUDA 12.2 and torch==2.5.0+cu121 by default,
So it needs to be installed with:

!pip install faster-whisper ctranslate2==4.4.0

I think it would be better to include this version matrix in the README or somewhere.

MahmoudAshraf97 · 2024-10-24T17:41:25Z

It's going to be outdated in a week or two once colab changes the version, I've pinned the issue for visibility

BBC-Esq · 2024-10-28T18:37:07Z

In the meantime, pursuant to the discussion at OpenNMT/CTranslate2#1806, I've created a script that will download that appropriate CUDA Toolkit files (by version) or you can choose to download the cuDNN files. Remember, you must still set the appropriate PATH and other variables...and you must still make sure that the cuDNN version you're using is compatible with cuDNN and/or Torch and/or Ctranslate2 and/or any other library you plan to use in your program.

You will only need to pip install pyside6 above and beyond the standard python libraries used. You should name this script download_cuda.py.

FULL SCRIPT HERE

``` __version__ = "0.5.0" minimum = "3.8"

import sys
if sys.version_info < tuple(map(int, minimum.split("."))):
print(
"ERROR: script", file, "version", version, "requires Python %s or later" % minimum
)
sys.exit(1)

import argparse
import os
import stat
import json
import re
import shutil
import tarfile
import zipfile
from urllib.request import urlopen
from pathlib import Path
from PySide6.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout,
QHBoxLayout, QLabel, QComboBox, QLineEdit, QPushButton,
QTextEdit, QFileDialog, QMessageBox)
from PySide6.QtCore import Qt, QThread, Signal
import subprocess

ARCHIVES = {}
DOMAIN = "https://developer.download.nvidia.com"

CUDA_RELEASES = {
"CUDA 12.4.0": "12.4.0",
"CUDA 12.4.1": "12.4.1",
"CUDA 12.5.0": "12.5.0",
"CUDA 12.5.1": "12.5.1",
"CUDA 12.6.0": "12.6.0",
"CUDA 12.6.1": "12.6.1",
"CUDA 12.6.2": "12.6.2"
}

CUDNN_RELEASES = [
"9.0.0",
"9.1.1",
"9.2.0",
"9.2.1",
"9.3.0",
"9.4.0",
"9.5.0",
"9.5.1"
]

PRODUCTS = {
"CUDA Toolkit": "cuda",
"cuDNN": "cudnn"
}

OPERATING_SYSTEMS = {
"Windows": "windows",
"Linux": "linux"
}

ARCHITECTURES = {
"x86_64": "x86_64",
"PPC64le (Linux only)": "ppc64le",
"SBSA (Linux only)": "sbsa",
"AARCH64 (Linux only)": "aarch64"
}

VARIANTS = {
"CUDA 11": "cuda11",
"CUDA 12": "cuda12"
}

COMPONENTS = {
"All Components": None,
"CUDA Runtime (cudart)": "cuda_cudart",
"CXX Core Compute Libraries": "cuda_cccl",
"CUDA Object Dump Tool": "cuda_cuobjdump",
"CUDA Profiling Tools Interface": "cuda_cupti",
"CUDA Demangler Tool": "cuda_cuxxfilt",
"CUDA Demo Suite": "cuda_demo_suite",
"CUDA Documentation": "cuda_documentation",
"NVIDIA CUDA Compiler": "cuda_nvcc",
"CUDA Binary Utility": "cuda_nvdisasm",
"NVIDIA Management Library Headers": "cuda_nvml_dev",
"CUDA Profiler": "cuda_nvprof",
"CUDA Binary Utility": "cuda_nvprune",
"CUDA Runtime Compilation Library": "cuda_nvrtc",
"CUDA Tools SDK": "cuda_nvtx",
"NVIDIA Visual Profiler": "cuda_nvvp",
"CUDA OpenCL": "cuda_opencl",
"CUDA Profiler API": "cuda_profiler_api",
"CUDA Compute Sanitizer API": "cuda_sanitizer_api",
"CUDA BLAS Library": "libcublas",
"CUDA FFT Library": "libcufft",
"CUDA Random Number Generation Library": "libcurand",
"CUDA Solver Library": "libcusolver",
"CUDA Sparse Matrix Library": "libcusparse",
"NVIDIA Performance Primitives Library": "libnpp",
"NVIDIA Fatbin Utilities": "libnvfatbin",
"NVIDIA JIT Linker Library": "libnvjitlink",
"NVIDIA JPEG Library": "libnvjpeg",
"Nsight Compute": "nsight_compute",
"Nsight Systems": "nsight_systems",
"Nsight Visual Studio Edition": "nsight_vse",
"Visual Studio Integration": "visual_studio_integration"
}

def err(msg):
print("ERROR: " + msg)
sys.exit(1)

def fetch_file(full_path, filename):
download = urlopen(full_path)
if download.status != 200:
print(" -> Failed: " + filename)
else:
print(":: Fetching: " + full_path)
with open(filename, "wb") as file:
file.write(download.read())
print(" -> Wrote: " + filename)

def fix_permissions(directory):
for root, dirs, files in os.walk(directory):
for file in files:
filename = os.path.join(root, file)
octal = os.stat(filename)
os.chmod(filename, octal.st_mode | stat.S_IWRITE)

def flatten_tree(src, dest, tag=None):
if tag:
dest = os.path.join(dest, tag)

try:
    shutil.copytree(src, dest, symlinks=1, dirs_exist_ok=1, ignore_dangling_symlinks=1)
except FileExistsError:
    pass
shutil.rmtree(src)

def parse_artifact(
parent,
manifest,
component,
platform,
retrieve=True,
variant=None,
):
if variant:
full_path = parent + manifest[component][platform][variant]["relative_path"]
else:
full_path = parent + manifest[component][platform]["relative_path"]

filename = os.path.basename(full_path)
file_path = filename
pwd = os.path.join(os.getcwd(), component, platform)

if (
    retrieve
    and not os.path.exists(filename)
    and not os.path.exists(full_path)
    and not os.path.exists(parent + filename)
    and not os.path.exists(pwd + filename)
):
    fetch_file(full_path, filename)
    file_path = filename
    ARCHIVES[platform].append(filename)
elif os.path.exists(filename):
    print("  -> Found: " + filename)
    file_path = filename
    ARCHIVES[platform].append(filename)
elif os.path.exists(full_path):
    file_path = full_path
    print("  -> Found: " + file_path)
    ARCHIVES[platform].append(file_path)
elif os.path.exists(os.path.join(parent, filename)):
    file_path = os.path.join(parent, filename)
    print("  -> Found: " + file_path)
    ARCHIVES[platform].append(file_path)
elif os.path.exists(os.path.join(pwd, filename)):
    file_path = os.path.join(pwd, filename)
    print("  -> Found: " + file_path)
    ARCHIVES[platform].append(file_path)
else:
    print("Parent: " + os.path.join(pwd, filename))
    print("  -> Artifact: " + filename)

def fetch_action(
parent, manifest, component_filter, platform_filter, cuda_filter, retrieve
):
for component in manifest.keys():
if not "name" in manifest[component]:
continue

    if component_filter is not None and component != component_filter:
        continue

    print("\n" + manifest[component]["name"] + ": " + manifest[component]["version"])

    for platform in manifest[component].keys():
        if "variant" in platform:
            continue

        if not platform in ARCHIVES:
            ARCHIVES[platform] = []

        if not isinstance(manifest[component][platform], str):
            if (
                platform_filter is not None
                and platform != platform_filter
                and platform != "source"
            ):
                print("  -> Skipping platform: " + platform)
                continue

            if not "relative_path" in manifest[component][platform]:
                for variant in manifest[component][platform].keys():
                    if cuda_filter is not None and variant != cuda_filter:
                        print("  -> Skipping variant: " + variant)
                        continue

                    parse_artifact(
                        parent,
                        manifest,
                        component,
                        platform,
                        retrieve,
                        variant,
                    )
            else:
                parse_artifact(
                    parent, manifest, component, platform, retrieve
                )

def post_action(output_dir, collapse=True):
if len(ARCHIVES) == 0:
return

print("\nArchives:")
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

for platform in ARCHIVES:
    for archive in ARCHIVES[platform]:
        try:
            binTag = archive.split("-")[3].split("_")[1]
            # print(platform, binTag)
        except:
            binTag = None

        if re.search(r"\.tar\.", archive):
            print(":: tar: " + archive)
            tarball = tarfile.open(archive)
            topdir = os.path.commonprefix(tarball.getnames())
            tarball.extractall()
            tarball.close()

            print("  -> Extracted: " + topdir + "/")
            fix_permissions(topdir)

            if collapse:
                flatdir = os.path.join(output_dir, platform)
                flatten_tree(topdir, flatdir, binTag)
                print("  -> Flattened: " + flatdir + "/")

        elif re.search(r"\.zip", archive):
            print(":: zip: " + archive)
            with zipfile.ZipFile(archive) as zippy:
                topdir = os.path.commonprefix(zippy.namelist())
                zippy.extractall()
            zippy.close()

            print("  -> Extracted: " + topdir)
            fix_permissions(topdir)

            if collapse:
                flatdir = os.path.join(output_dir, platform)
                flatten_tree(topdir, flatdir, binTag)
                print("  -> Flattened: " + flatdir + "/")

print("\nOutput: " + output_dir + "/")
for item in sorted(os.listdir(output_dir)):
    if os.path.isdir(os.path.join(output_dir, item)):
        print(" - " + item + "/")
    elif os.path.isfile(os.path.join(output_dir, item)):
        print(" - " + item)

class DownloadWorker(QThread):
finished = Signal(bool, str)

def __init__(self, args):
    super().__init__()
    self.args = args

def run(self):
    try:
        cmd = [
            sys.executable,
            sys.argv[0],
            "--download-only",
        ]
        
        for arg, value in vars(self.args).items():
            if value is not None:
                cmd.extend([f"--{arg.replace('_', '-')}", str(value)])

        result = subprocess.run(
            cmd,
            check=True,
            capture_output=True,
            text=True
        )
        self.finished.emit(True, "")
    except subprocess.CalledProcessError as e:
        self.finished.emit(False, f"{str(e)}\nOutput: {e.output}")

class DownloaderGUI(QMainWindow):
def init(self):
super().init()
self.setWindowTitle("NVIDIA Package Downloader")
self.download_worker = None
self.setup_ui()

def setup_ui(self):
    central_widget = QWidget()
    self.setCentralWidget(central_widget)
    layout = QVBoxLayout(central_widget)
    layout.setSpacing(10)
    
    # Product selection
    product_layout = QHBoxLayout()
    product_label = QLabel("Product:")
    self.product_combo = QComboBox()
    self.product_combo.addItems(PRODUCTS.keys())
    self.product_combo.currentTextChanged.connect(self.on_product_change)
    product_layout.addWidget(product_label)
    product_layout.addWidget(self.product_combo)
    layout.addLayout(product_layout)

    # Release Label selection
    version_layout = QHBoxLayout()
    version_label = QLabel("Release Label:")
    self.version_combo = QComboBox()
    self.version_combo.addItems(CUDA_RELEASES.keys())
    version_layout.addWidget(version_label)
    version_layout.addWidget(self.version_combo)
    layout.addLayout(version_layout)

    # OS selection
    os_layout = QHBoxLayout()
    os_label = QLabel("Operating System:")
    self.os_combo = QComboBox()
    self.os_combo.addItems(OPERATING_SYSTEMS.keys())
    os_layout.addWidget(os_label)
    os_layout.addWidget(self.os_combo)
    layout.addLayout(os_layout)

    # Architecture selection
    arch_layout = QHBoxLayout()
    arch_label = QLabel("Architecture:")
    self.arch_combo = QComboBox()
    self.arch_combo.addItems(ARCHITECTURES.keys())
    arch_layout.addWidget(arch_label)
    arch_layout.addWidget(self.arch_combo)
    layout.addLayout(arch_layout)

    # Component selection
    comp_layout = QHBoxLayout()
    comp_label = QLabel("Component:")
    self.component_combo = QComboBox()
    self.component_combo.addItem("All Components")
    self.component_combo.addItems(COMPONENTS.keys())
    comp_layout.addWidget(comp_label)
    comp_layout.addWidget(self.component_combo)
    layout.addLayout(comp_layout)

    # Variant selection
    variant_layout = QHBoxLayout()
    variant_label = QLabel("CUDA Variant:")
    self.variant_combo = QComboBox()
    self.variant_combo.addItems(VARIANTS.keys())
    self.variant_combo.setEnabled(False)
    variant_layout.addWidget(variant_label)
    variant_layout.addWidget(self.variant_combo)
    layout.addLayout(variant_layout)

    # Output directory selection
    output_layout = QHBoxLayout()
    output_label = QLabel("Output Directory:")
    self.output_entry = QLineEdit()
    browse_button = QPushButton("Browse")
    browse_button.clicked.connect(self.browse_output)
    output_layout.addWidget(output_label)
    output_layout.addWidget(self.output_entry)
    output_layout.addWidget(browse_button)
    layout.addLayout(output_layout)

    # Command preview
    preview_label = QLabel("Command Preview:")
    self.command_text = QTextEdit()
    self.command_text.setReadOnly(True)
    self.command_text.setMaximumHeight(100)
    layout.addWidget(preview_label)
    layout.addWidget(self.command_text)

    # Download button
    self.download_button = QPushButton("Download")
    self.download_button.clicked.connect(self.execute_download)
    layout.addWidget(self.download_button)

    self.product_combo.currentTextChanged.connect(self.update_command_preview)
    self.version_combo.currentTextChanged.connect(self.update_command_preview)
    self.os_combo.currentTextChanged.connect(self.update_command_preview)
    self.arch_combo.currentTextChanged.connect(self.update_command_preview)
    self.component_combo.currentTextChanged.connect(self.update_command_preview)
    self.variant_combo.currentTextChanged.connect(self.update_command_preview)
    self.output_entry.textChanged.connect(self.update_command_preview)

    self.setMinimumWidth(600)
    self.setMinimumHeight(500)

def on_product_change(self, product_text):
    is_cudnn = PRODUCTS[product_text] == "cudnn"
    
    self.variant_combo.setEnabled(is_cudnn)
    if not is_cudnn:
        self.variant_combo.setCurrentIndex(-1)
    
    self.component_combo.setEnabled(not is_cudnn)
    if is_cudnn:
        self.component_combo.setCurrentIndex(-1)
    
    self.version_combo.blockSignals(True)
    self.version_combo.clear()
    if is_cudnn:
        self.version_combo.addItems(CUDNN_RELEASES)
    else:
        self.version_combo.addItems(CUDA_RELEASES.keys())
    self.version_combo.blockSignals(False)
    
    self.update_command_preview()

def browse_output(self):
    directory = QFileDialog.getExistingDirectory(self, "Select Output Directory")
    if directory:
        self.output_entry.setText(directory)

def update_command_preview(self):
    command = ["python", "download_cuda.py"]
    
    product_text = self.product_combo.currentText()
    if product_text:
        product_key = PRODUCTS[product_text]
        command.extend(["--product", product_key])
    
    if self.version_combo.currentText():
        if PRODUCTS[self.product_combo.currentText()] == "cudnn":
            release_label = self.version_combo.currentText()
        else:
            release_label = CUDA_RELEASES.get(
                self.version_combo.currentText(),
                self.version_combo.currentText()
            )
        command.extend(["--label", release_label])
        
    if self.os_combo.currentText():
        os_key = OPERATING_SYSTEMS[self.os_combo.currentText()]
        command.extend(["--os", os_key])
    
    if self.arch_combo.currentText():
        arch_key = ARCHITECTURES[self.arch_combo.currentText()]
        command.extend(["--arch", arch_key])
    
    if (
        self.product_combo.currentText() != "cuDNN" and
        self.component_combo.currentText() != "All Components" and
        self.component_combo.currentText()
    ):
        component_key = COMPONENTS[self.component_combo.currentText()]
        command.extend(["--component", component_key])
    
    if self.variant_combo.isEnabled() and self.variant_combo.currentText():
        variant_key = VARIANTS[self.variant_combo.currentText()]
        command.extend(["--variant", variant_key])
    
    if self.output_entry.text():
        command.extend(["--output", self.output_entry.text()])
    
    self.command_text.setText(" ".join(command))


def execute_download(self):
    command = self.command_text.toPlainText().strip()
    if command:
        self.download_button.setEnabled(False)
        
        args = argparse.Namespace()
        args.product = PRODUCTS[self.product_combo.currentText()]
        
        if PRODUCTS[self.product_combo.currentText()] == "cudnn":
            args.label = self.version_combo.currentText()
        else:
            args.label = CUDA_RELEASES.get(
                self.version_combo.currentText(),
                self.version_combo.currentText()
            )
        
        args.os = OPERATING_SYSTEMS[self.os_combo.currentText()]
        args.arch = ARCHITECTURES[self.arch_combo.currentText()]
        
        if self.variant_combo.isEnabled() and self.variant_combo.currentText():
            args.variant = VARIANTS[self.variant_combo.currentText()]
        else:
            args.variant = None
        
        if (
            self.product_combo.currentText() != "cuDNN" and
            self.component_combo.currentText() != "All Components" and
            self.component_combo.currentText()
        ):
            args.component = COMPONENTS[self.component_combo.currentText()]
        else:
            args.component = None
        
        args.output = self.output_entry.text() if self.output_entry.text() else "flat"
        
        self.download_worker = DownloadWorker(args)
        self.download_worker.finished.connect(self.on_download_complete)
        self.download_worker.start()
    else:
        QMessageBox.warning(
            self, 
            "Warning", 
            "Please configure the download options first."
        )


def on_download_complete(self, success, error_message):
    self.download_button.setEnabled(True)
    if success:
        QMessageBox.information(self, "Success", "Download completed successfully!")
    else:
        QMessageBox.critical(self, "Error", f"Download failed: {error_message}")

def main():
parser = argparse.ArgumentParser()
parser.add_argument("--download-only", action="store_true", help=argparse.SUPPRESS)
parser.add_argument("--product", help="Product name")
parser.add_argument("--label", help="Release label version")
parser.add_argument("--os", help="Operating System")
parser.add_argument("--arch", help="Architecture")
parser.add_argument("--component", help="Component name")
parser.add_argument("--variant", help="Variant")
parser.add_argument("--output", help="Output directory")

args = parser.parse_args()

if args.download_only:
    try:
        parent = f"{DOMAIN}/compute/{args.product}/redist/"
        manifest_uri = f"{parent}redistrib_{args.label}.json"
        
        manifest_response = urlopen(manifest_uri)
        manifest = json.loads(manifest_response.read())
        
        platform = f"{args.os}-{args.arch}"
        
        fetch_action(
            parent,
            manifest,
            args.component,
            platform,
            args.variant,
            True
        )
        
        post_action(args.output, True)
        
        sys.exit(0)
    except Exception as e:
        print(f"Error during download: {str(e)}", file=sys.stderr)
        sys.exit(1)
else:
    app = QApplication(sys.argv)
    app.setStyle('Fusion')
    window = DownloaderGUI()
    window.show()
    sys.exit(app.exec())

if name == "main":
main()

</details>

BBC-Esq · 2024-10-30T17:46:22Z

Official compatibility matrix that I found at:

https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix

PyTorch version	Python	C++	Stable CUDA	Experimental CUDA	Stable ROCm
2.5	>=3.9, <=3.12, (3.13 experimental)	C++17	CUDA 11.8, CUDA 12.1, CUDA 12.4, CUDNN 9.1.0.70	None	ROCm 6.2
2.4	>=3.8, <=3.12	C++17	CUDA 11.8, CUDA 12.1, CUDNN 9.1.0.70	CUDA 12.4, CUDNN 9.1.0.70	ROCm 6.1
2.3	>=3.8, <=3.11, (3.12 experimental)	C++17	CUDA 11.8, CUDNN 8.7.0.84	CUDA 12.1, CUDNN 8.9.2.26	ROCm 6.0
2.2	>=3.8, <=3.11, (3.12 experimental)	C++17	CUDA 11.8, CUDNN 8.7.0.84	CUDA 12.1, CUDNN 8.9.2.26	ROCm 5.7
2.1	>=3.8, <=3.11	C++17	CUDA 11.8, CUDNN 8.7.0.84	CUDA 12.1, CUDNN 8.9.2.26	ROCm 5.6
2.0	>=3.8, <=3.11	C++14	CUDA 11.7, CUDNN 8.5.0.96	CUDA 11.8, CUDNN 8.7.0.84	ROCm 5.4
1.13	>=3.7, <=3.10	C++14	CUDA 11.6, CUDNN 8.3.2.44	CUDA 11.7, CUDNN 8.5.0.96	ROCm 5.2
1.12	>=3.7, <=3.10	C++14	CUDA 11.3, CUDNN 8.3.2.44	CUDA 11.6, CUDNN 8.3.2.44	ROCm 5.0

zhou13 · 2024-12-02T06:49:12Z

On my machine, I have cuda 12.7, torch==2.5.1.
If I use ctranslate2==4.5.0 with torch, I got

Unable to load any of {libcudnn_cnn.so.9.1.0, libcudnn_cnn.so.9.1, libcudnn_cnn.so.9, libcudnn_cnn.so}

If I use ctranslate2==4.4.0 with torch, I got

Could not load library libcudnn_ops_infer.so.8. Error: libcudnn_ops_infer.so.8: cannot open shared object file: No such file or directory

Using cuda with ctranslate2 or torch individually is fine. But you cannot invoke torch.cuda before ctranslate2.

The following files exist on my environment:

.venv/lib/python3.12/site-packages/nvidia/cudnn/lib/libcudnn_cnn.so.9
/usr/lib/libcudnn_cnn.so
/usr/lib/libcudnn_cnn.so.9
/usr/lib/libcudnn_cnn.so.9.5.1

Setting LD_LIBRARY_PATH="/path/to/.venv/lib/python3.12/site-packages/nvidia/cudnn/lib works around the problem.

Very weird problem. The problem does not exist when torch is not loaded.

jhj0517 · 2024-12-02T07:08:27Z

@zhou13 Hi, the latest CUDA version is 12.6.3 : https://developer.nvidia.com/cuda-toolkit-archive.
You might have checked your CUDA version with nvidia-smi, the CUDA version in there indicates the max compatible CUDA version with your GPU, not the actual insatlled CUDA version in your PC.

You should check your installed CUDA version with

nvcc -V

If it says CUDA 12.4 >, it should work fine with torch==2.5.1+cu124 and ctranslate2==4.5.0.

zhou13 · 2024-12-02T19:37:49Z

@jhj0517 Thank you for the input.

$ nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Tue_Oct_29_23:50:19_PDT_2024
Cuda compilation tools, release 12.6, V12.6.85
Build cuda_12.6.r12.6/compiler.35059454_0

$ pacman -Ss cudnn
extra/cudnn 9.5.1.17-1 [installed]
    NVIDIA CUDA Deep Neural Network library

So I am running CUDA 12.4+.

One thing I found in addition is that if I remove the system libcudnn under /usr/lib/, then ctranslate2 will fail even WITHOUT import torch:

Unable to load any of {libcudnn_cnn.so.9.1.0, libcudnn_cnn.so.9.1, libcudnn_cnn.so.9, libcudnn_cnn.so}

My hypothesis is that torch always loads /path/to/.venv/lib/python3.12/site-packages/nvidia/cudnn/lib but ld used by ctranslate2 tries to load /usr/lib/libcudnn_cnn.so (if LD_LIBRARY_PATH is not set). When used individually, both will work fine. However, the later will fail if torch already loads the its cudnn in the same application due to mismatch in cudnn version.

BTW, I am using torch==2.5.1 for pypi. Correct me if I am wrong but I don't think torch==2.5.1+cu124 exists accoding to https://pypi.org/project/torch/#files.

jhj0517 · 2024-12-03T05:30:27Z

@zhou13 You should install torch with

pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu124

if you're using CUDA:

https://pytorch.org/get-started/locally/

heimoshuiyu · 2024-12-03T07:28:56Z

@zhou13 I had the same problem on ArchLinux. You need to set export LD_LIBRARY_PATH="/opt/cuda/lib64" or in your case with pip .venv/lib/python3.12/site-packages/nvidia/cudnn/lib
Or, install cudnn with anaconda to avoid this problem.

zhou13 · 2024-12-03T17:38:13Z

@jhj0517 The official documents suggest not adding --index-url https://download.pytorch.org/whl/cu124 when using cuda 12.4. That said, I think it is not that relevant to the problem I have.
@heimoshuiyu I am also running ArchLinux. I guess the problem is probably the version of some packages (likely CUDNN) in the system is too new.

jhj0517 · 2024-12-03T18:38:25Z

@zhou13 Can you source me documentation that says not to use torch which is the CUDA distribution?
I'm now using CUDA 12.6 & torch==2.5.1+cu124 & ctranslate2==4.5.0 & faster-whisper==1.0.3 and have no problems.

Missing .so files are usually caused by a cuDNN version mismatch as you said.

AFAIK torch automatically installs and uses its own dependent cuda/cudnn - #958 (comment) and I suspect this is most likely the cause.

Since @MahmoudAshraf97 made this version matrix, as long as you follow it, it shouldn't be a problem.

Torch Version	CT2 Version
`2..+cu121`	`<=4.4.0`
`2..+cu124`	`>=4.5.0`
`>=2.4.0`	`>=4.5.0`
`<2.4.0`	`<4.5.0`

+) For a Linux workaround, you may need to export the LD_LIBRARY_PATH path, as described in the README:

https://github.com/SYSTRAN/faster-whisper?tab=readme-ov-file#gpu
( Expand the "Other installation methods" and see the Linux guide there )

zhou13 · 2024-12-03T19:21:11Z

If you follow the official python document on installation of torch with pip on cuda 12.4, you will install torch=2.5.1 instead of torch==2.5.1+cu124.

I just wish things can work out-of-box without the need of setting LD_LIBRARY_PATH manually when using pip. Anyway, google should be able to bring people to this post in case they have the same issue.

jhj0517 · 2024-12-04T07:04:07Z

If you follow the official python document on installation of torch with pip on cuda 12.4, you will install torch=2.5.1 instead of torch==2.5.1+cu124.

Thanks. I'm wondering if this is really intended or just a mistake in the documentation.
Up to CUDA 12.1 it needs to be installed with --index-url, I wonder why they suddenly removed it in CUDA 12.4?

Personally, I think it's just a mistake in the documentation.

I'm not sure if it's the right place to ask about it, but I made a question in the pytorch discussion forum about it:

https://discuss.pytorch.org/t/no-cuda-12-4-distribution-for-linux-when-using-pip/213783

zhou13 · 2024-12-04T17:04:35Z

@jhj0517 I don't think it is a mistake: it installs all cu124 dependencies for me. I think it is an excellent move, personally at least. Using +cuxxx tag makes package version management harder, especially with modern python package managers like poetry or uv.

jhj0517 · 2024-12-04T17:14:51Z

Yeah it seems to be, according to the discussion the default torch would automatically go with cuda distribution on Linux. It didn't automatically install CUDA on Windows, that's why I was confused.

Anyway, regarding the missing .so files, it would definitely be better if we can use it without exporting LD_LIBRARY_PATH specifically.

MahmoudAshraf97 pinned this issue Oct 24, 2024

This was referenced Oct 24, 2024

Update requirements.txt: latest release from ctranslate2 from 10-22-2024 breaks faster-whisper #1082

Closed

Latest CTranslate2 release breaks faster-whisper #1083

Closed

cudnn ops64_9.dll is not found #1080

Closed

MahmoudAshraf97 mentioned this issue Oct 24, 2024

When I run whisper-diarization I get a "Kernel Died" in my terminal MahmoudAshraf97/whisper-diarization#261

Closed

jhj0517 mentioned this issue Oct 25, 2024

Application fails to start due to incompatible cuDNN version on Linux pinokiofactory/whisper-webui#1

Open

kanjieater mentioned this issue Oct 27, 2024

Invalid handle error when using Colab page kanjieater/SubPlz#18

Closed

This was referenced Oct 29, 2024

Fix ctranslate2 / CUDNN dependency issues linuxserver/docker-faster-whisper#23

Merged

[BUG] gpu-2.0.0-ls42 don't work linuxserver/docker-faster-whisper#22

Closed

Jiltseb mentioned this issue Oct 29, 2024

Batched whisper integration mobiusml/aana_sdk#192

Closed

jhj0517 mentioned this issue Nov 5, 2024

RuntimeError: CUDA failed with error CUDA driver version is insufficient for CUDA runtime version #1115

Closed

kanjieater mentioned this issue Nov 9, 2024

Faster-whisper fails due to CUDA compatibility with CTranslate2 jianfch/stable-ts#414

Open

davidecantoni mentioned this issue Nov 10, 2024

fasterwhisper and cuda version #1120

Closed

jhj0517 mentioned this issue Nov 20, 2024

libcudnn - Error pavelzbornik/whisperX-FastAPI#31

Closed

kanjieater mentioned this issue Nov 29, 2024

CTranslate Issue kanjieater/SubPlz#21

Open

Nenesh mentioned this issue Dec 3, 2024

CuDNN error kimjammer/Neuro#19

Closed

Troffifi mentioned this issue Dec 6, 2024

OOM when using VAD #1193

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

CUDA compatibility with CTranslate2 #1086

CUDA compatibility with CTranslate2 #1086

MahmoudAshraf97 commented Oct 24, 2024

jhj0517 commented Oct 24, 2024 •

edited

Loading

MahmoudAshraf97 commented Oct 24, 2024

BBC-Esq commented Oct 28, 2024 •

edited

Loading

BBC-Esq commented Oct 30, 2024

zhou13 commented Dec 2, 2024 •

edited

Loading

jhj0517 commented Dec 2, 2024 •

edited

Loading

zhou13 commented Dec 2, 2024 •

edited

Loading

jhj0517 commented Dec 3, 2024

heimoshuiyu commented Dec 3, 2024

zhou13 commented Dec 3, 2024 •

edited

Loading

jhj0517 commented Dec 3, 2024 •

edited

Loading

zhou13 commented Dec 3, 2024

jhj0517 commented Dec 4, 2024

zhou13 commented Dec 4, 2024

jhj0517 commented Dec 4, 2024

CUDA compatibility with CTranslate2 #1086

CUDA compatibility with CTranslate2 #1086

Comments

MahmoudAshraf97 commented Oct 24, 2024

jhj0517 commented Oct 24, 2024 • edited Loading

MahmoudAshraf97 commented Oct 24, 2024

BBC-Esq commented Oct 28, 2024 • edited Loading

BBC-Esq commented Oct 30, 2024

zhou13 commented Dec 2, 2024 • edited Loading

jhj0517 commented Dec 2, 2024 • edited Loading

zhou13 commented Dec 2, 2024 • edited Loading

jhj0517 commented Dec 3, 2024

heimoshuiyu commented Dec 3, 2024

zhou13 commented Dec 3, 2024 • edited Loading

jhj0517 commented Dec 3, 2024 • edited Loading

zhou13 commented Dec 3, 2024

jhj0517 commented Dec 4, 2024

zhou13 commented Dec 4, 2024

jhj0517 commented Dec 4, 2024

jhj0517 commented Oct 24, 2024 •

edited

Loading

BBC-Esq commented Oct 28, 2024 •

edited

Loading

zhou13 commented Dec 2, 2024 •

edited

Loading

jhj0517 commented Dec 2, 2024 •

edited

Loading

zhou13 commented Dec 2, 2024 •

edited

Loading

zhou13 commented Dec 3, 2024 •

edited

Loading

jhj0517 commented Dec 3, 2024 •

edited

Loading