## ----setup, include=FALSE-----------------------------------------------------
# Vignette code is executed locally (NOT_CRAN=true) but not on CRAN, where
# the CPU fallback would multi-thread and trip the "CPU time > elapsed" NOTE.
knitr::opts_chunk$set(eval = identical(Sys.getenv("NOT_CRAN"), "true"))

## -----------------------------------------------------------------------------
# library(ggmlR)

## -----------------------------------------------------------------------------
# # Original float weights (must be a multiple of block size, typically 32)
# weights <- rnorm(256L)
# 
# # Quantize to Q4_0
# raw_q4 <- quantize_q4_0(weights, n_rows = 1L, n_per_row = length(weights))
# cat("Original size: ", length(weights) * 4L, "bytes\n")
# cat("Q4_0 size:     ", length(raw_q4), "bytes\n")
# cat("Compression:   ", round(length(weights) * 4L / length(raw_q4), 1), "x\n")
# 
# # Dequantize back to float
# recovered <- dequantize_row_q4_0(raw_q4, length(weights))
# cat("Max abs error: ", max(abs(recovered - weights)), "\n")

## -----------------------------------------------------------------------------
# weights <- rnorm(512L)
# 
# # Q4_K — 4-bit K-quant
# raw_q4k <- quantize_q4_K(weights, n_rows = 1L, n_per_row = length(weights))
# rec_q4k <- dequantize_row_q4_K(raw_q4k, length(weights))
# cat("Q4_K max error:", max(abs(rec_q4k - weights)), "\n")
# 
# # Q8_0 — 8-bit (near-lossless)
# raw_q8 <- quantize_q8_0(weights, n_rows = 1L, n_per_row = length(weights))
# rec_q8 <- dequantize_row_q8_0(raw_q8, length(weights))
# cat("Q8_0 max error:", max(abs(rec_q8 - weights)), "\n")

## -----------------------------------------------------------------------------
# weights    <- rnorm(512L)
# importance <- abs(weights)^2          # example: weight magnitude as importance
# 
# # IQ4_XS — 4-bit with importance
# raw_iq4 <- quantize_iq4_xs(weights, n_rows = 1L, n_per_row = length(weights),
#                            imatrix = importance)
# rec_iq4 <- dequantize_row_iq4_xs(raw_iq4, length(weights))
# cat("IQ4_XS max error:", max(abs(rec_iq4 - weights)), "\n")

## -----------------------------------------------------------------------------
# weights <- rnorm(512L)
# n_bytes_f32 <- length(weights) * 4L
# 
# formats <- list(
#   Q4_0 = list(q = quantize_q4_0,  dq = dequantize_row_q4_0),
#   Q8_0 = list(q = quantize_q8_0,  dq = dequantize_row_q8_0),
#   Q4_K = list(q = quantize_q4_K,  dq = dequantize_row_q4_K),
#   Q6_K = list(q = quantize_q6_K,  dq = dequantize_row_q6_K)
# )
# 
# n <- length(weights)
# cat(sprintf("%-8s  %6s  %8s  %10s\n", "Format", "Bytes", "Ratio", "MaxError"))
# cat(strrep("-", 40), "\n")
# for (nm in names(formats)) {
#   raw <- formats[[nm]]$q(weights, n_rows = 1L, n_per_row = n)
#   rec <- formats[[nm]]$dq(raw, n)
#   cat(sprintf("%-8s  %6d  %8.2fx  %10.6f\n",
#               nm, length(raw),
#               n_bytes_f32 / length(raw),
#               max(abs(rec - weights))))
# }

## -----------------------------------------------------------------------------
# row <- rnorm(32L)   # exactly one Q4_0 block
# 
# raw_row <- quantize_row_q4_0_ref(row, length(row))
# rec_row <- dequantize_row_q4_0(raw_row, length(row))

