Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ env:
CUDA_DEVICE_ORDER: PCI_BUS_ID
RUNNER: 10.0.14.248
TORCH_CUDA_ARCH_LIST: '8.0 8.6 8.9 9.0 12.0'
CUDA_ARCH_LIST: '8.0 8.6 8.9 9.0 12.0'
RELEASE_MODE: 1
CI: 1
GPTQMODEL_FORCE_BUILD: 1
Expand Down Expand Up @@ -262,6 +263,7 @@ jobs:
- name: Print Env
run: |
if [[ "${{ matrix.cuda }}" -lt 128 ]]; then # CUDA >= 12.8 supports 12.0 (5090)
echo "CUDA_ARCH_LIST=8.0 8.6 8.9 9.0" >> $GITHUB_ENV
echo "TORCH_CUDA_ARCH_LIST=8.0 8.6 8.9 9.0" >> $GITHUB_ENV
fi
python_version=${{ matrix.python }}
Expand Down
2 changes: 1 addition & 1 deletion gptqmodel/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "4.2.5"
__version__ = "4.3-dev0"
81 changes: 81 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,84 @@
[build-system]
requires = ["setuptools >= 64"]
build-backend = "setuptools.build_meta:__legacy__"

[project]
name = "gptqmodel"
description = "Production ready LLM model compression/quantization toolkit with hw accelerated inference support for both cpu/gpu via HF, vLLM, and SGLang."
readme = "README.md"
requires-python = ">=3.11"
license = { text = "Apache-2.0" }
authors = [
{ name = "ModelCloud", email = "[email protected]" },
]
keywords = ["gptq", "quantization", "large-language-models", "transformers", "4bit", "llm"]
classifiers = [
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: C++",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: Science/Research",
"Intended Audience :: Information Technology",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Scientific/Engineering :: Information Analysis",
]
dynamic = ["version", "dependencies"]

[tool.setuptools.dynamic]
dependencies = {file = ["requirements.txt"]}

[project.urls]
Homepage = "https://github.com/ModelCloud/GPTQModel"

[project.optional-dependencies]
test = [
"pytest>=8.2.2",
"parameterized",
]
quality = [
"ruff==0.13.0",
"isort==6.0.1",
]
vllm = [
"vllm>=0.8.5",
"flashinfer-python>=0.2.1",
]
sglang = [
"sglang[srt]>=0.4.6",
"flashinfer-python>=0.2.1",
]
bitblas = [
"bitblas==0.0.1-dev13",
]
hf = [
"optimum>=1.21.2",
]
ipex = [
"intel_extension_for_pytorch>=2.7.0",
]
auto_round = [
"auto_round>=0.3",
]
logger = [
"clearml",
"random_word",
"plotly",
]
eval = [
"lm_eval>=0.4.7",
"evalplus>=0.3.1",
]
triton = [
"triton>=3.0.0",
]
openai = [
"uvicorn",
"fastapi",
"pydantic",
]
mlx = [
"mlx_lm>=0.24.0",
]
Loading