import sys | |
sys.path.insert(1, '/workspace/asr/peft/src') | |
# TODO set this path to the lazy-lora source code path, or you can install it from source code: | |
# TODO, please install lazylora for usage: | |
# git clone [email protected]:Xianchao-Wu/peft.git | |
# cd peft | |
# python setup.py install | |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig | |
from peft import PeftModel, PeftConfig | |
import os | |
import torch | |
#import ipdb; ipdb.set_trace() | |
cache_dir="/workspace/asr/peft/qlora" | |
# TODO set this cache_dir to the path where you stored (or, want to store) llama2-13b-chat-hf model | |
lazylora_dir=os.getcwd() # the path that contains 'adapter_config.json' and 'adapter_model.bin' | |
config = PeftConfig.from_pretrained(lazylora_dir) | |
tokenizer = AutoTokenizer.from_pretrained( | |
config.base_model_name_or_path, | |
cache_dir=cache_dir, | |
use_auth_token=True | |
) | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_use_double_quant=True, | |
bnb_4bit_quant_type='nf4', | |
bnb_4bit_compute_dtype=torch.bfloat16 | |
) | |
model = AutoModelForCausalLM.from_pretrained( | |
config.base_model_name_or_path, | |
quantization_config=bnb_config, | |
device_map="auto", | |
cache_dir=cache_dir, | |
use_auth_token=True | |
) | |
#model.print_trainable_parameters() | |
print(sum(p.numel() for p in model.parameters())) | |
# 6,671,979,520 -> half-size of 13B due to 4-bit loading | |
model = PeftModel.from_pretrained(model, lazylora_dir) | |
print('after adding lazy lora parameters:') | |
model.print_trainable_parameters() | |
# trainable params: 0 || all params: 6,922,300,928 || trainable%: 0.0 | |