intel-analytics · Uxito-Ada · Apr 22, 2024 · Apr 22, 2024 · Apr 22, 2024 · Apr 22, 2024
diff --git a/python/llm/example/GPU/LLM-Finetuning/RewardModeling/README.md b/python/llm/example/GPU/LLM-Finetuning/RewardModeling/README.md
@@ -0,0 +1,56 @@
+# Reward Modeling Finetuning for Sequence Classfication with IPEX-LLM
+
+This is an example of IPEX-LLM [reward modeling](https://huggingface.co/docs/trl/main/en/reward_trainer) (a kind of RLHF) on [Intel MAX GPU](../../../README.md), which refers [TRL example](https://github.com/huggingface/trl/blob/main/examples/scripts/reward_modeling.py) to tune model [facebook/opt-350m](https://huggingface.co/facebook/opt-350m) on a sequence classfication task.
+
+### 0. Requirements
+To run this example with IPEX-LLM on Intel MAX GPU, we have some recommended requirements for your machine, please refer to [here](../../../README.md#requirements) for more information.
+
+### 1. Install
+
+```bash
+conda create -n llm python=3.11
+conda activate llm
+# below command will install intel_extension_for_pytorch==2.1.10+xpu as default
+pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+pip install accelerate==0.28.0
+pip install bitsandbytes==0.43.0
+pip install datasets==2.18.0
+pip install transformers==4.39.1
+pip install trl
+pip install wandb
+```
+
+### 2. Reward Modeling Finetune
+
+Here is how to fine-tune opt-350m on an Intel Max GPU server:
+
+```bash
+# arguments can be reset in the script e.g. model_name_or_path, per_device_train_batch_size and other hyperparameters
+bash start-reward-modeling-finetuning.sh
+```
+After starting, you can configure where wanda report or directly disable it by select no-visualization option.
+
+Default values of the arguments in the script:
+```bash
+--model_name_or_path=facebook/opt-350m
+--output_dir="reward_modeling_ipex_llm"
+--per_device_train_batch_size=8
+--num_train_epochs=1
+--gradient_accumulation_steps=16
+--gradient_checkpointing=True
+--learning_rate=1.41e-5
+--remove_unused_columns=False
+--optim="adamw_torch"
+--logging_steps=10
+--evaluation_strategy="steps"
+--max_length=512
+```
+
+### 3. Sample Output
+```log
+......
+{'loss': 0.8613, 'grad_norm': 2.837268590927124, 'learning_rate': 1.3854569190600522e-05, 'epoch': 0.02}
+{'eval_loss': 0.8356835246086121, 'eval_accuracy': 0.4996802660186725, 'eval_runtime': xxxx, 'eval_samples_per_second': xxxx, 'eval_steps_per_second': xxxx, 'epoch': 0.03}
+  4%|██▊                                                                          | 42/1149 [xx:xx<xx:xx:xx, xx.xx s/it]
+......
+```
diff --git a/python/llm/example/GPU/LLM-Finetuning/RewardModeling/reward_modeling_finetuning.py b/python/llm/example/GPU/LLM-Finetuning/RewardModeling/reward_modeling_finetuning.py
@@ -0,0 +1,115 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Some parts of this file is adapted from
+# https://github.com/huggingface/trl/blob/main/examples/scripts/reward_modeling.py
+# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from datasets import load_dataset
+from tqdm import tqdm
+from transformers import AutoTokenizer, HfArgumentParser, AutoModelForSequenceClassification
+from ipex_llm.transformers import AutoModelForSequenceClassification
+from ipex_llm import optimize_model
+from trl import ModelConfig, RewardConfig, RewardTrainer, get_kbit_device_map, get_peft_config, get_quantization_config
+import datasets
+
+tqdm.pandas()
+
+
+if __name__ == "__main__":
+    parser = HfArgumentParser((RewardConfig, ModelConfig))
+    reward_config, model_config = parser.parse_args_into_dataclasses()
+    reward_config.gradient_checkpointing_kwargs = dict(use_reentrant=False)
+
+    torch_dtype = (
+        model_config.torch_dtype
+        if model_config.torch_dtype in ["auto", None]
+        else getattr(torch, model_config.torch_dtype)
+    )
+
+    model_kwargs = dict(
+        trust_remote_code=True,
+        torch_dtype = torch.bfloat16,
+        use_cache = False,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(model_config.model_name_or_path, use_fast=True)
+    model = AutoModelForSequenceClassification.from_pretrained(
+        model_config.model_name_or_path, num_labels=1, **model_kwargs
+    )
+    model = optimize_model(model, low_bit="fp4")
+    print(model)
+    model = model.to("xpu")
+
+    if model_config.lora_task_type != "SEQ_CLS":
+        warnings.warn(
+            "You are using a `task_type` that is different than `SEQ_CLS` for PEFT. This will lead to silent bugs"
+            " Make sure to pass --lora_task_type SEQ_CLS when using this script."
+        )
+
+    raw_datasets = load_dataset("Anthropic/hh-rlhf")
+
+    def preprocess_function(examples):
+        new_examples = {
+            "input_ids_chosen": [],
+            "attention_mask_chosen": [],
+            "input_ids_rejected": [],
+            "attention_mask_rejected": [],
+        }
+        for chosen, rejected in zip(examples["chosen"], examples["rejected"]):
+            tokenized_chosen = tokenizer(chosen)
+            tokenized_rejected = tokenizer(rejected)
+
+            new_examples["input_ids_chosen"].append(tokenized_chosen["input_ids"])
+            new_examples["attention_mask_chosen"].append(tokenized_chosen["attention_mask"])
+            new_examples["input_ids_rejected"].append(tokenized_rejected["input_ids"])
+            new_examples["attention_mask_rejected"].append(tokenized_rejected["attention_mask"])
+
+        return new_examples
+
+    raw_datasets = raw_datasets.map(
+        preprocess_function,
+        batched=True,
+        num_proc=4,
+    )
+    raw_datasets = raw_datasets.filter(
+        lambda x: len(x["input_ids_chosen"]) <= reward_config.max_length
+        and len(x["input_ids_rejected"]) <= reward_config.max_length
+    )
+    train_dataset = raw_datasets["train"]
+    eval_dataset = raw_datasets["test"]
+
+    trainer = RewardTrainer(
+        model=model,
+        tokenizer=tokenizer,
+        args=reward_config,
+        train_dataset=train_dataset,
+        eval_dataset=eval_dataset,
+        peft_config=get_peft_config(model_config),
+    )
+    trainer.train()
+    trainer.save_model(reward_config.output_dir)
diff --git a/python/llm/example/GPU/LLM-Finetuning/RewardModeling/start-reward-modeling-finetuning.sh b/python/llm/example/GPU/LLM-Finetuning/RewardModeling/start-reward-modeling-finetuning.sh
@@ -0,0 +1,16 @@
+# Configures OneAPI environment variables
+source /opt/intel/oneapi/setvars.sh
+
+python reward_modeling_finetuning.py \
+       --model_name_or_path=facebook/opt-350m \
+       --output_dir="reward_modeling_ipex_llm" \
+       --per_device_train_batch_size=8 \
+       --num_train_epochs=1 \
+       --gradient_accumulation_steps=16 \
+       --gradient_checkpointing=True \
+       --learning_rate=1.41e-5 \
+       --remove_unused_columns=False \
+       --optim="adamw_torch" \
+       --logging_steps=10 \
+       --evaluation_strategy="steps" \
+       --max_length=512