MistralForCausalLM'对象没有属性'merge_and_unload
我在自己的数据上对OpenChat模型(一个Mistral 7B的微调版本)进行了进一步的训练。这一过程效果不错,推理结果也很好。现在我想把适配器的权重和原始模型合并,以便在后续步骤中对模型进行量化。不过,当我调用model.merge_and_unload()时,出现了错误:
"AttributeError: 'MistralForCausalLM' object has no attribute 'merge_and_unload".
有没有办法解决这个问题,或者有没有其他方法可以把我的权重适配器和原始模型合并呢?
这是我的代码:
训练
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
model_id = "openchat/openchat-3.5-1210"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})
from peft import prepare_model_for_kbit_training
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
from peft import LoraConfig, get_peft_model
config = LoraConfig(
r=8,
lora_alpha=32,
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, config)
data_train = ...
import transformers
from trl import SFTTrainer
from transformers import TrainingArguments
tokenizer.pad_token = tokenizer.eos_token
trainer = transformers.Trainer(
model=model,
train_dataset=data_train,
args=transformers.TrainingArguments(
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
warmup_steps=2,
#eval_steps=100,
logging_dir="./logs",
#max_steps=10,
num_train_epochs=1,
#evaluation_strategy="steps",
logging_strategy="steps",
learning_rate=2e-4,
fp16=True,
logging_steps=5 ,
save_total_limit=3,
output_dir="outputs",
optim="paged_adamw_8bit"
),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False
trainer.train()
trainer.save_model("pretrained_model")
tokenizer.save_pretrained("pretrained_model")
合并
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training
from peft import (
LoraConfig,
PeftConfig,
PeftModel,
get_peft_model,
prepare_model_for_kbit_training
)
model_id = "openchat/openchat-3.5-1210"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
model_name = "pretrained_model"
config = PeftConfig.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
config.base_model_name_or_path,
return_dict=True,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model.merge_and_unload()
1 个回答
0
如果你尝试运行 print(model)
,你应该能看到模型的描述,类似下面这样:
MistralForCausalLM(
(model): MistralModel(
(embed_tokens): Embedding(32000, 4096)
(layers): ModuleList(
(0-31): 32 x MistralDecoderLayer(
(self_attn): MistralAttention(
(q_proj): lora.Linear(
(base_layer): Linear(in_features=4096, out_features=4096, bias=False)
(lora_dropout): ModuleDict(
(default): Dropout(p=0.1, inplace=False)
...
如果提到了lora,你可以看到LoRa已经应用到了基础模型上。MistralForCausalLM这个类对LoRa一无所知,所以你不能在它上面调用“merge_and_unload”。
PeftModel确实有 merge_and_unload
这个方法,所以你需要使用这个:
# merge base + LoRa models and save the model
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import sys
import torch
device_map = {"": 0}
lora_dir = "mistralai-my-lora-finetuning"
base_model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
model = AutoPeftModelForCausalLM.from_pretrained(lora_dir, device_map=device_map, torch_dtype=torch.bfloat16)
model = model.merge_and_unload()
output_dir = "output/my_merged_model"
model.save_pretrained(output_dir)