Initial commit

2025-06-30 09:05:05 +00:00
commit 46f6cc100b
1231 changed files with 278432 additions and 0 deletions
--- a/research/micro/llm_compress.py
+++ b/research/micro/llm_compress.py
@@ -0,0 +1,23 @@
+from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
+from llmcompressor.modifiers.quantization import GPTQModifier
+from llmcompressor import oneshot
+
+# Select quantization algorithm. In this case, we:
+#   * apply SmoothQuant to make the activations easier to quantize
+#   * quantize the weights to int8 with GPTQ (static per channel)
+#   * quantize the activations to int8 (dynamic per token)
+recipe = [
+    SmoothQuantModifier(smoothing_strength=0.8),
+    GPTQModifier(scheme="W8A8", targets="Linear", ignore=["lm_head"]),
+]
+
+# Apply quantization using the built in open_platypus dataset.
+#   * See examples for demos showing how to pass a custom calibration set
+oneshot(
+    model="facebook/contriever",
+    dataset="open_platypus",
+    recipe=recipe,
+    output_dir="contriever-INT4",
+    max_seq_length=2048,
+    num_calibration_samples=512,
+)