You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
raiseValueError(f"Invalid quantization mode {mode} needs to be one of [int8, int4, int4-gpptq]")
597
636
@@ -606,7 +645,7 @@ def quantize(
606
645
importargparse
607
646
parser=argparse.ArgumentParser(description='Quantize a model.')
608
647
parser.add_argument('--checkpoint_path', type=Path, default=Path("checkpoints/meta-llama/Llama-2-7b-chat-hf/model.pth"), help='Path to the model checkpoint to be quantized.')
609
-
parser.add_argument('--mode', '-q', type=str, default='int8', choices=['int8', 'int4', 'int4-gptq'], help='type of quantization to perform')
648
+
parser.add_argument('--mode', '-q', type=str, default='int8', choices=['int8', 'int4', 'int4-gptq', 'int4-hqq'], help='type of quantization to perform')
610
649
parser.add_argument('--groupsize', type=int, default=32, help='Group size for int4 quantization.')
611
650
parser.add_argument('--calibration_tasks', type=str, nargs='+', default=['wikitext'], help='tasks to do gptq calibration on, if doing gptq')
612
651
parser.add_argument('--calibration_limit', type=int, default=1000, help='number of samples to use for gptq calibration')
0 commit comments