ModelCloud · Qubitium · Sep 17, 2025 · Sep 17, 2025 · Sep 17, 2025 · Sep 17, 2025
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -55,6 +55,7 @@ env:
   CUDA_DEVICE_ORDER: PCI_BUS_ID
   RUNNER: 10.0.14.248
   TORCH_CUDA_ARCH_LIST: '8.0 8.6 8.9 9.0 12.0'
+  CUDA_ARCH_LIST: '8.0 8.6 8.9 9.0 12.0'
   RELEASE_MODE: 1
   CI: 1
   GPTQMODEL_FORCE_BUILD: 1
@@ -262,6 +263,7 @@ jobs:
       - name: Print Env
         run: |
           if [[ "${{ matrix.cuda }}" -lt 128 ]]; then # CUDA >= 12.8 supports 12.0 (5090)
+            echo "CUDA_ARCH_LIST=8.0 8.6 8.9 9.0" >> $GITHUB_ENV
             echo "TORCH_CUDA_ARCH_LIST=8.0 8.6 8.9 9.0" >> $GITHUB_ENV
           fi
           python_version=${{ matrix.python }}

diff --git a/gptqmodel/version.py b/gptqmodel/version.py
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "4.2.5"
+__version__ = "4.3-dev0"
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,3 +1,84 @@
 [build-system]
 requires = ["setuptools >= 64"]
 build-backend = "setuptools.build_meta:__legacy__"
+
+[project]
+name = "gptqmodel"
+description = "Production ready LLM model compression/quantization toolkit with hw accelerated inference support for both cpu/gpu via HF, vLLM, and SGLang."
+readme = "README.md"
+requires-python = ">=3.11"
+license = { text = "Apache-2.0" }
+authors = [
+    { name = "ModelCloud", email = "[email protected]" },
+]
+keywords = ["gptq", "quantization", "large-language-models", "transformers", "4bit", "llm"]
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: C++",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Education",
+    "Intended Audience :: Science/Research",
+    "Intended Audience :: Information Technology",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Scientific/Engineering :: Information Analysis",
+]
+dynamic = ["version", "dependencies"]
+
+[tool.setuptools.dynamic]
+dependencies = {file = ["requirements.txt"]}
+
+[project.urls]
+Homepage = "https://github.com/ModelCloud/GPTQModel"
+
+[project.optional-dependencies]
+test = [
+    "pytest>=8.2.2",
+    "parameterized",
+]
+quality = [
+    "ruff==0.13.0",
+    "isort==6.0.1",
+]
+vllm = [
+    "vllm>=0.8.5",
+    "flashinfer-python>=0.2.1",
+]
+sglang = [
+    "sglang[srt]>=0.4.6",
+    "flashinfer-python>=0.2.1",
+]
+bitblas = [
+    "bitblas==0.0.1-dev13",
+]
+hf = [
+    "optimum>=1.21.2",
+]
+ipex = [
+    "intel_extension_for_pytorch>=2.7.0",
+]
+auto_round = [
+    "auto_round>=0.3",
+]
+logger = [
+    "clearml",
+    "random_word",
+    "plotly",
+]
+eval = [
+    "lm_eval>=0.4.7",
+    "evalplus>=0.3.1",
+]
+triton = [
+    "triton>=3.0.0",
+]
+openai = [
+    "uvicorn",
+    "fastapi",
+    "pydantic",
+]
+mlx = [
+    "mlx_lm>=0.24.0",
+]