diff --git a/.github/workflows/build_wheels_cuda.yml b/.github/workflows/build_wheels_cuda.yml index 122c3cf..f34412e 100644 --- a/.github/workflows/build_wheels_cuda.yml +++ b/.github/workflows/build_wheels_cuda.yml @@ -20,8 +20,6 @@ jobs: steps: - uses: actions/checkout@v3 - with: - ref: 'main' - uses: actions/setup-python@v3 with: @@ -50,7 +48,10 @@ jobs: $env:CUDA_PATH = $env:CONDA_PREFIX $env:CUDA_HOME = $env:CONDA_PREFIX if ($IsLinux) {$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH} + + # TODO: remove this if (!$IsLinux) {$env:INCLUDE_EXLLAMA_KERNELS = 0} + $env:TORCH_CUDA_ARCH_LIST = '6.0 6.1 7.0 7.5 8.0 8.6+PTX' if ([decimal]$env:CUDA_VERSION -ge 11.8) { $env:TORCH_CUDA_ARCH_LIST = '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } python setup.py sdist bdist_wheel diff --git a/.github/workflows/build_wheels_pypi.yml b/.github/workflows/build_wheels_pypi.yml new file mode 100644 index 0000000..1a7116b --- /dev/null +++ b/.github/workflows/build_wheels_pypi.yml @@ -0,0 +1,68 @@ +name: Build AutoGPTQ Wheels for PyPI with CUDA + +on: workflow_dispatch + +jobs: + build_wheels: + if: ${{ github.repository_owner == 'PanQiWei' }} + name: Build wheels for ${{ matrix.os }} and Python ${{ matrix.python }} and CUDA ${{ matrix.cuda }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-20.04, windows-latest] + pyver: ["3.8", "3.9", "3.10", "3.11"] + defaults: + run: + shell: pwsh + env: + CUDA_VERSION: "11.7" + + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.pyver }} + + - name: Setup Miniconda + uses: conda-incubator/setup-miniconda@v2.2.0 + with: + activate-environment: "build" + python-version: ${{ matrix.pyver }} + mamba-version: "*" + use-mamba: false + channels: conda-forge,defaults + channel-priority: true + add-pip-as-python-dependency: true + auto-activate-base: false + + - name: Install Dependencies + run: | + conda install cuda-toolkit -c "nvidia/label/cuda-${env:CUDA_VERSION}.0" + conda install pytorch "pytorch-cuda=${env:CUDA_VERSION}" -c pytorch -c nvidia + python -m pip install --upgrade build setuptools wheel ninja + + - name: Build Wheel + run: | + $env:CUDA_PATH = $env:CONDA_PREFIX + $env:CUDA_HOME = $env:CONDA_PREFIX + if ($IsLinux) {$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH} + + $env:TORCH_CUDA_ARCH_LIST = '6.0 6.1 7.0 7.5 8.0 8.6+PTX' + if ([decimal]$env:CUDA_VERSION -ge 11.8) { $env:TORCH_CUDA_ARCH_LIST = '6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' } + + echo "CUDA_PATH: $CUDA_PATH" + echo "CUDA_VERSION: $CUDA_VERSION" + PYPI_RELEASE=1 python setup.py sdist bdist_wheel + + - uses: actions/upload-artifact@v3 + if: runner.os == 'Linux' + with: + name: 'linux-cuda-wheels' + path: ./dist/*.whl + + - uses: actions/upload-artifact@v3 + if: runner.os == 'Windows' + with: + name: 'windows-cuda-wheels' + path: ./dist/*.whl diff --git a/.github/workflows/build_wheels_rocm.yml b/.github/workflows/build_wheels_rocm.yml index 9914704..20d599c 100644 --- a/.github/workflows/build_wheels_rocm.yml +++ b/.github/workflows/build_wheels_rocm.yml @@ -21,8 +21,6 @@ jobs: steps: - uses: actions/checkout@v3 - with: - ref: 'main' - name: Free disk space run: | diff --git a/setup.py b/setup.py index 1056f48..0fc8cb3 100644 --- a/setup.py +++ b/setup.py @@ -30,6 +30,7 @@ common_setup_kwargs = { } +PYPI_RELEASE = os.environ.get('PYPI_RELEASE', None) BUILD_CUDA_EXT = int(os.environ.get('BUILD_CUDA_EXT', '1')) == 1 if BUILD_CUDA_EXT: try: @@ -60,8 +61,10 @@ if BUILD_CUDA_EXT: "is installed without CUDA support." ) sys.exit(-1) - common_setup_kwargs['version'] += f"+cu{CUDA_VERSION}" + # For the PyPI release, the version is simply x.x.x to comply with PEP 440. + if not PYPI_RELEASE: + common_setup_kwargs['version'] += f"+cu{CUDA_VERSION}" requirements = [ "accelerate>=0.19.0", @@ -110,19 +113,28 @@ if BUILD_CUDA_EXT: ) ] - if os.environ.get("INCLUDE_EXLLAMA_KERNELS", "1") == "1": # TODO: improve github action to always compile exllama_kernels - extensions.append( - cpp_extension.CUDAExtension( - "exllama_kernels", - [ - "autogptq_cuda/exllama/exllama_ext.cpp", - "autogptq_cuda/exllama/cuda_buffers.cu", - "autogptq_cuda/exllama/cuda_func/column_remap.cu", - "autogptq_cuda/exllama/cuda_func/q4_matmul.cu", - "autogptq_cuda/exllama/cuda_func/q4_matrix.cu" - ] - ) + if os.name == "nt": + # On Windows, fix an error LNK2001: unresolved external symbol cublasHgemm bug in the compilation + cuda_path = os.environ.get("CUDA_PATH", None) + if cuda_path is None: + raise ValueError("The environment variable CUDA_PATH must be set to the path to the CUDA install when installing from source on Windows systems.") + extra_link_args = ["-L", f"{cuda_path}/lib/x64/cublas.lib"] + else: + extra_link_args = [] + + extensions.append( + cpp_extension.CUDAExtension( + "exllama_kernels", + [ + "autogptq_cuda/exllama/exllama_ext.cpp", + "autogptq_cuda/exllama/cuda_buffers.cu", + "autogptq_cuda/exllama/cuda_func/column_remap.cu", + "autogptq_cuda/exllama/cuda_func/q4_matmul.cu", + "autogptq_cuda/exllama/cuda_func/q4_matrix.cu" + ], + extra_link_args=extra_link_args ) + ) additional_setup_kwargs = { "ext_modules": extensions,