convert g_idx to float32 since int32 cannot reside on GPU in TF

JyotinderSingh · JyotinderSingh · commit 904d0a05797f · 2025-09-11T01:00:43.000+05:30
diff --git a/keras/src/layers/core/dense.py b/keras/src/layers/core/dense.py
@@ -440,7 +440,7 @@ def _gptq_build(self, kernel_shape, config):
             name="g_idx",
             shape=(self.kernel_shape[0],),
             initializer="zeros",
-            dtype="int32",
+            dtype="float32",
             trainable=False,
         )
 
diff --git a/keras/src/layers/core/einsum_dense.py b/keras/src/layers/core/einsum_dense.py
@@ -549,7 +549,7 @@ def _gptq_build(self, kernel_shape, config):
             name="g_idx",
             shape=(rows,),
             initializer="zeros",
-            dtype="int32",
+            dtype="float32",
             trainable=False,
         )
 
diff --git a/keras/src/quantizers/gptq.py b/keras/src/quantizers/gptq.py
@@ -247,7 +247,7 @@ def gptq_quantize_matrix(
     # g_idx in permuted domain
     g_idx = ops.arange(0, in_features, dtype="int32")
     g_idx = ops.divide(g_idx, base_group)
-    g_idx = ops.cast(g_idx, "int32")
+    g_idx = ops.cast(g_idx, "float32")
 
     # Map group indices and quantized weights back to original column order
     if activation_order:
diff --git a/keras/src/quantizers/quantizers.py b/keras/src/quantizers/quantizers.py
@@ -878,8 +878,9 @@ def quantize_with_sz_map(weights_matrix, scale, zero, g_idx, maxq):
         A tensor with the same shape as `weights_matrix` containing the
         quantized weights produced using the provided group parameters.
     """
-    scale_cols = ops.take(scale, g_idx, axis=1)  # [out_features, in_features]
-    zero_cols = ops.take(zero, g_idx, axis=1)  # [out_features, in_features]
+    groups = ops.cast(g_idx, "int32")
+    scale_cols = ops.take(scale, groups, axis=1)  # [out_features, in_features]
+    zero_cols = ops.take(zero, groups, axis=1)  # [out_features, in_features]
 
     # Quantize elementwise, then cast to int
     return quantize_with_zero_point(weights_matrix, scale_cols, zero_cols, maxq)
@@ -907,8 +908,9 @@ def dequantize_with_sz_map(weights_matrix, scale, zero, g_idx):
         dequantized weights produced using the provided group parameters.
     """
     # Map group indices to scales and zeros
-    scales_mapped = ops.take(scale, g_idx, axis=1)
-    zeros_mapped = ops.take(zero, g_idx, axis=1)
+    groups = ops.cast(g_idx, "int32")
+    scales_mapped = ops.take(scale, groups, axis=1)
+    zeros_mapped = ops.take(zero, groups, axis=1)
     zeros_mapped = ops.cast(zeros_mapped, scales_mapped.dtype)
 
     quantized = ops.multiply(

Original file line number	Diff line number	Diff line change
`@@ -440,7 +440,7 @@ def _gptq_build(self, kernel_shape, config):`
`440`	`440`	`name="g_idx",`
`441`	`441`	`shape=(self.kernel_shape[0],),`
`442`	`442`	`initializer="zeros",`
`443`		`- dtype="int32",`
	`443`	`+ dtype="float32",`
`444`	`444`	`trainable=False,`
`445`	`445`	`)`
`446`	`446`
Original file line number	Diff line number	Diff line change
`@@ -549,7 +549,7 @@ def _gptq_build(self, kernel_shape, config):`
`549`	`549`	`name="g_idx",`
`550`	`550`	`shape=(rows,),`
`551`	`551`	`initializer="zeros",`
`552`		`- dtype="int32",`
	`552`	`+ dtype="float32",`
`553`	`553`	`trainable=False,`
`554`	`554`	`)`
`555`	`555`