MistralForCausalLM'对象没有属性'merge_and_unload

1 投票
1 回答
223 浏览
提问于 2025-04-14 18:10

我在自己的数据上对OpenChat模型(一个Mistral 7B的微调版本)进行了进一步的训练。这一过程效果不错,推理结果也很好。现在我想把适配器的权重和原始模型合并,以便在后续步骤中对模型进行量化。不过,当我调用model.merge_and_unload()时,出现了错误:

 "AttributeError: 'MistralForCausalLM' object has no attribute 'merge_and_unload".

有没有办法解决这个问题,或者有没有其他方法可以把我的权重适配器和原始模型合并呢?

这是我的代码:

训练

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_id = "openchat/openchat-3.5-1210"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})


from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)




from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)


data_train = ...



import transformers
from trl import SFTTrainer
from transformers import TrainingArguments


tokenizer.pad_token = tokenizer.eos_token

trainer = transformers.Trainer(
    model=model,
    train_dataset=data_train,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        #eval_steps=100,
        logging_dir="./logs",
        #max_steps=10,   
        num_train_epochs=1,  
        #evaluation_strategy="steps",   
        logging_strategy="steps",           
        learning_rate=2e-4,        
        fp16=True,
        logging_steps=5 ,
        save_total_limit=3,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False

trainer.train()

trainer.save_model("pretrained_model")
tokenizer.save_pretrained("pretrained_model")

合并

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

from peft import prepare_model_for_kbit_training

from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training
)

model_id = "openchat/openchat-3.5-1210"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)


model_name = "pretrained_model"

config = PeftConfig.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)


model.merge_and_unload()

1 个回答

0

如果你尝试运行 print(model),你应该能看到模型的描述,类似下面这样:

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): lora.Linear(
            (base_layer): Linear(in_features=4096, out_features=4096, bias=False)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.1, inplace=False)
...

如果提到了lora,你可以看到LoRa已经应用到了基础模型上。MistralForCausalLM这个类对LoRa一无所知,所以你不能在它上面调用“merge_and_unload”。

PeftModel确实有 merge_and_unload 这个方法,所以你需要使用这个:

# merge base + LoRa models and save the model

from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import sys
import torch

device_map = {"": 0}
lora_dir = "mistralai-my-lora-finetuning"
base_model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
model = AutoPeftModelForCausalLM.from_pretrained(lora_dir, device_map=device_map, torch_dtype=torch.bfloat16)


model = model.merge_and_unload()

output_dir = "output/my_merged_model"
model.save_pretrained(output_dir)

撰写回答