Initial commit
This commit is contained in:
23
research/micro/llm_compress.py
Normal file
23
research/micro/llm_compress.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
|
||||
from llmcompressor.modifiers.quantization import GPTQModifier
|
||||
from llmcompressor import oneshot
|
||||
|
||||
# Select quantization algorithm. In this case, we:
|
||||
# * apply SmoothQuant to make the activations easier to quantize
|
||||
# * quantize the weights to int8 with GPTQ (static per channel)
|
||||
# * quantize the activations to int8 (dynamic per token)
|
||||
recipe = [
|
||||
SmoothQuantModifier(smoothing_strength=0.8),
|
||||
GPTQModifier(scheme="W8A8", targets="Linear", ignore=["lm_head"]),
|
||||
]
|
||||
|
||||
# Apply quantization using the built in open_platypus dataset.
|
||||
# * See examples for demos showing how to pass a custom calibration set
|
||||
oneshot(
|
||||
model="facebook/contriever",
|
||||
dataset="open_platypus",
|
||||
recipe=recipe,
|
||||
output_dir="contriever-INT4",
|
||||
max_seq_length=2048,
|
||||
num_calibration_samples=512,
|
||||
)
|
||||
Reference in New Issue
Block a user