@@ -1339,6 +1339,23 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
13391339 .vec_dot_type = GGML_TYPE_Q8_K32,
13401340#else
13411341 .vec_dot_type = GGML_TYPE_Q8_K,
1342+ #endif
1343+ .nrows = 1,
1344+ .row_meta_size = 4,
1345+ },
1346+ [GGML_TYPE_IQ5_KS_R4] = {
1347+ .type_name = "iq5_ks_r4",
1348+ .blck_size = QK_K,
1349+ .type_size = sizeof(block_iq5_ks),
1350+ .is_quantized = true,
1351+ .to_float = (ggml_to_float_t) dequantize_row_iq5_ks_r4,
1352+ .from_float = quantize_row_iq5_ks_r4,
1353+ .from_float_ref = (ggml_from_float_t)quantize_row_iq5_ks_r4_ref,
1354+ .vec_dot = vec_dot_iq5_ks_r4_q8_k,
1355+ #if defined __AVX2__
1356+ .vec_dot_type = GGML_TYPE_Q8_K32,
1357+ #else
1358+ .vec_dot_type = GGML_TYPE_Q8_K,
13421359#endif
13431360 .nrows = 1,
13441361 .row_meta_size = 4,
@@ -4478,6 +4495,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
44784495 case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break;
44794496 case GGML_FTYPE_MOSTLY_IQ4_KS: wtype = GGML_TYPE_IQ4_KS; break;
44804497 case GGML_FTYPE_MOSTLY_IQ4_KS_R4: wtype = GGML_TYPE_IQ4_KS_R4;break;
4498+ case GGML_FTYPE_MOSTLY_IQ5_KS_R4: wtype = GGML_TYPE_IQ5_KS_R4;break;
44814499 case GGML_FTYPE_MOSTLY_IQ4_KSS: wtype = GGML_TYPE_IQ4_KSS; break;
44824500 case GGML_FTYPE_MOSTLY_IQ5_KS: wtype = GGML_TYPE_IQ5_KS; break;
44834501 case GGML_FTYPE_MOSTLY_IQ2_K: wtype = GGML_TYPE_IQ2_K; break;
@@ -11242,6 +11260,7 @@ static void ggml_compute_forward_add(
1124211260 case GGML_TYPE_IQ4_XS:
1124311261 case GGML_TYPE_IQ4_KS:
1124411262 case GGML_TYPE_IQ4_KS_R4:
11263+ case GGML_TYPE_IQ5_KS_R4:
1124511264 case GGML_TYPE_IQ4_KSS:
1124611265 case GGML_TYPE_IQ5_KS:
1124711266 case GGML_TYPE_IQ2_K:
@@ -11715,6 +11734,7 @@ static void ggml_compute_forward_add1(
1171511734 case GGML_TYPE_IQ4_XS:
1171611735 case GGML_TYPE_IQ4_KS:
1171711736 case GGML_TYPE_IQ4_KS_R4:
11737+ case GGML_TYPE_IQ5_KS_R4:
1171811738 case GGML_TYPE_IQ4_KSS:
1171911739 case GGML_TYPE_IQ5_KS:
1172011740 case GGML_TYPE_IQ2_K:
@@ -11885,6 +11905,7 @@ static void ggml_compute_forward_acc(
1188511905 case GGML_TYPE_IQ4_XS:
1188611906 case GGML_TYPE_IQ4_KS:
1188711907 case GGML_TYPE_IQ4_KS_R4:
11908+ case GGML_TYPE_IQ5_KS_R4:
1188811909 case GGML_TYPE_IQ4_KSS:
1188911910 case GGML_TYPE_IQ5_KS:
1189011911 case GGML_TYPE_IQ2_K:
@@ -15382,6 +15403,7 @@ static void ggml_compute_forward_out_prod(
1538215403 case GGML_TYPE_IQ4_XS:
1538315404 case GGML_TYPE_IQ4_KS:
1538415405 case GGML_TYPE_IQ4_KS_R4:
15406+ case GGML_TYPE_IQ5_KS_R4:
1538515407 case GGML_TYPE_IQ4_KSS:
1538615408 case GGML_TYPE_IQ5_KS:
1538715409 case GGML_TYPE_IQ2_K:
@@ -15792,6 +15814,7 @@ static void ggml_compute_forward_set(
1579215814 case GGML_TYPE_IQ4_XS:
1579315815 case GGML_TYPE_IQ4_KS:
1579415816 case GGML_TYPE_IQ4_KS_R4:
15817+ case GGML_TYPE_IQ5_KS_R4:
1579515818 case GGML_TYPE_IQ4_KSS:
1579615819 case GGML_TYPE_IQ5_KS:
1579715820 case GGML_TYPE_IQ2_K:
@@ -16108,6 +16131,7 @@ static void ggml_compute_forward_get_rows(
1610816131 case GGML_TYPE_IQ4_XS:
1610916132 case GGML_TYPE_IQ4_KS:
1611016133 case GGML_TYPE_IQ4_KS_R4:
16134+ case GGML_TYPE_IQ5_KS_R4:
1611116135 case GGML_TYPE_IQ4_KSS:
1611216136 case GGML_TYPE_IQ5_KS:
1611316137 case GGML_TYPE_IQ2_K:
@@ -16741,6 +16765,7 @@ static void ggml_compute_forward_clamp(
1674116765 case GGML_TYPE_IQ4_XS:
1674216766 case GGML_TYPE_IQ4_KS:
1674316767 case GGML_TYPE_IQ4_KS_R4:
16768+ case GGML_TYPE_IQ5_KS_R4:
1674416769 case GGML_TYPE_IQ4_KSS:
1674516770 case GGML_TYPE_IQ5_KS:
1674616771 case GGML_TYPE_IQ2_K:
@@ -23810,6 +23835,7 @@ size_t ggml_quantize_chunk(
2381023835 case GGML_TYPE_IQ4_XS: result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2381123836 case GGML_TYPE_IQ4_KS: result = quantize_iq4_ks (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2381223837 case GGML_TYPE_IQ4_KS_R4:result = quantize_iq4_ks_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
23838+ case GGML_TYPE_IQ5_KS_R4:result = quantize_iq5_ks_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2381323839 case GGML_TYPE_IQ4_KSS: result = quantize_iq4_kss(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2381423840 case GGML_TYPE_IQ5_KS: result = quantize_iq5_ks (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2381523841 case GGML_TYPE_IQ2_K: result = quantize_iq2_k (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
0 commit comments