Feb 2024 Release (#187)

unslothai · Feb 20, 2024 · 3e4c5a3 · 3e4c5a3
1 parent a030e80
commit 3e4c5a3
Show file tree

Hide file tree

Showing 9 changed files with 611 additions and 148 deletions.
diff --git a/README.md b/README.md
@@ -30,7 +30,7 @@ All notebooks are **beginner friendly**! Add your dataset, click "Run All", and
 | **Mistral 7b** 1xT4  | [▶️ Start on Kaggle](https://www.kaggle.com/code/danielhanchen/kaggle-mistral-7b-unsloth-notebook) | 5x faster\* | 62% less |
 
 - This [conversational notebook](https://colab.research.google.com/drive/1Aau3lgPzeZKQ-98h69CCu1UJcvIBLmy2?usp=sharing) is useful for ShareGPT ChatML / Vicuna templates.
-- Our [raw text notebook](https://colab.research.google.com/drive/1ef-tab5bhkvWmBOObepl1WgJvfvSzn5Q?usp=sharing) is useful for text completion.
+- This [text completion notebook](https://colab.research.google.com/drive/1ef-tab5bhkvWmBOObepl1WgJvfvSzn5Q?usp=sharing) is for raw text. This [DPO notebook](https://colab.research.google.com/drive/15vttTpzzVXv_tJwEk-hIcQ0S9FcEWvwP?usp=sharing) replicates Zephyr.
 - Colab provides a free GPU sometimes. Kaggle has 30 hrs free per week on a 12 hr running cap.
 - \* Kaggle has 2x T4s, but we use 1. Due to overhead, 1x T4 is 5x faster. Use Colab as Kaggle takes 10 mins to install.
 
@@ -86,9 +86,12 @@ All notebooks are **beginner friendly**! Add your dataset, click "Run All", and
 ### Conda Installation
 Select either `pytorch-cuda=11.8` for CUDA 11.8 or `pytorch-cuda=12.1` for CUDA 12.1. If you have `mamba`, use `mamba` instead of `conda` for faster solving. See this [Github issue](https://github.com/unslothai/unsloth/issues/73) for help on debugging Conda installs.
 ```bash
-conda install pytorch torchvision torchaudio pytorch-cuda=<12.1/11.8> -c pytorch -c nvidia
+conda create --name unsloth_env python=3.10
+conda activate unsloth_env
 
-conda install xformers -c xformers -y
+conda install pytorch cudatoolkit torchvision torchaudio pytorch-cuda=<12.1/11.8> -c pytorch -c nvidia
+
+conda install xformers -c xformers
 
 pip install bitsandbytes
 
@@ -141,6 +144,7 @@ pip install --upgrade pip
 ```
 
 ## 📜 Documentation
+- Go to our [Wiki page](https://github.com/unslothai/unsloth/wiki) for saving to GGUF, checkpointing, evaluation and more!
 - We support Huggingface's TRL, Trainer, Seq2SeqTrainer or even Pytorch code!
 - We're in 🤗Hugging Face's official docs! Check out the [SFT docs](https://huggingface.co/docs/trl/main/en/sft_trainer#accelerate-fine-tuning-2x-using-unsloth) and [DPO docs](https://huggingface.co/docs/trl/main/en/dpo_trainer#accelerate-dpo-fine-tuning-using-unsloth)!
 
@@ -162,7 +166,8 @@ fourbit_models = [
     "unsloth/llama-2-13b-bnb-4bit",
     "unsloth/codellama-34b-bnb-4bit",
     "unsloth/tinyllama-bnb-4bit",
-]
+] # Go to https://huggingface.co/unsloth for more 4-bit models!
+
 # Load Llama model
 model, tokenizer = FastLanguageModel.from_pretrained(
     model_name = "unsloth/mistral-7b-bnb-4bit", # Supports Llama, Mistral - replace this!
@@ -183,6 +188,8 @@ model = FastLanguageModel.get_peft_model(
     use_gradient_checkpointing = True,
     random_state = 3407,
     max_seq_length = max_seq_length,
+    use_rslora = False,  # We support rank stabilized LoRA
+    loftq_config = None, # And LoftQ
 )
 
 trainer = SFTTrainer(
@@ -205,6 +212,12 @@ trainer = SFTTrainer(
     ),
 )
 trainer.train()
+
+# Go to https://github.com/unslothai/unsloth/wiki for advanced tips like
+# (1) Saving to GGUF / merging to 16bit for vLLM
+# (2) Continued training from a saved LoRA adapter
+# (3) Adding an evaluation loop / OOMs
+# (4) Cutomized chat templates
 ```
 
 <a name="DPO"></a>

diff --git a/pyproject.toml b/pyproject.toml
@@ -42,6 +42,7 @@ huggingface = [
     "tqdm",
     "psutil",
     "wheel>=0.42.0",
+    "numpy",
 ]
 cu118only = [
     "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.22.post7%2Bcu118-cp39-cp39-manylinux2014_x86_64.whl ; python_version=='3.9'",
@@ -83,22 +84,22 @@ cu121 = [
     "bitsandbytes",
     "unsloth[cu121only]",
 ]
-cu118_torch211 = [
+cu118-torch211 = [
     "unsloth[huggingface]",
     "bitsandbytes",
     "unsloth[cu118onlytorch211]",
 ]
-cu121_torch211 = [
+cu121-torch211 = [
     "unsloth[huggingface]",
     "bitsandbytes",
     "unsloth[cu121onlytorch211]",
 ]
-cu118_torch220 = [
+cu118-torch220 = [
     "unsloth[huggingface]",
     "bitsandbytes",
     "unsloth[cu118onlytorch220]",
 ]
-cu121_torch220 = [
+cu121-torch220 = [
     "unsloth[huggingface]",
     "bitsandbytes",
     "unsloth[cu121onlytorch220]",
@@ -112,79 +113,79 @@ conda = [
 colab = [
     "unsloth[cu121]",
 ]
-colab_ampere = [
+colab-ampere = [
     "unsloth[cu121]",
     "packaging",
     "ninja",
     "flash-attn",
 ]
-colab_torch211 = [
+colab-torch211 = [
     "unsloth[huggingface]",
     "bitsandbytes",
     "unsloth[cu121onlytorch211]",
 ]
-colab_ampere_torch211 = [
+colab-ampere-torch211 = [
     "unsloth[huggingface]",
     "bitsandbytes",
     "unsloth[cu121onlytorch211]",
     "packaging",
     "ninja",
     "flash-attn",
 ]
-colab_torch220 = [
+colab-torch220 = [
     "unsloth[huggingface]",
     "bitsandbytes",
     "unsloth[cu121onlytorch220]",
 ]
-colab_ampere_torch220 = [
+colab-ampere-torch220 = [
     "unsloth[huggingface]",
     "bitsandbytes",
     "unsloth[cu121onlytorch220]",
     "packaging",
     "ninja",
     "flash-attn",
 ]
-cu118_ampere = [
+cu118-ampere = [
     "unsloth[huggingface]",
     "bitsandbytes",
     "unsloth[cu118only]",
     "packaging",
     "ninja",
     "flash-attn",
 ]
-cu121_ampere = [
+cu121-ampere = [
     "unsloth[huggingface]",
     "bitsandbytes",
     "unsloth[cu121only]",
     "packaging",
     "ninja",
     "flash-attn",
 ]
-cu118_ampere_torch211 = [
+cu118-ampere-torch211 = [
     "unsloth[huggingface]",
     "bitsandbytes",
     "unsloth[cu118onlytorch211]",
     "packaging",
     "ninja",
     "flash-attn",
 ]
-cu121_ampere_torch211 = [
+cu121-ampere-torch211 = [
     "unsloth[huggingface]",
     "bitsandbytes",
     "unsloth[cu121onlytorch211]",
     "packaging",
     "ninja",
     "flash-attn",
 ]
-cu118_ampere_torch220 = [
+cu118-ampere-torch220 = [
     "unsloth[huggingface]",
     "bitsandbytes",
     "unsloth[cu118onlytorch220]",
     "packaging",
     "ninja",
     "flash-attn",
 ]
-cu121_ampere_torch220 = [
+cu121-ampere-torch220 = [
     "unsloth[huggingface]",
     "bitsandbytes",
     "unsloth[cu121onlytorch220]",

diff --git a/unsloth/__init__.py b/unsloth/__init__.py
@@ -59,14 +59,38 @@
 import bitsandbytes as bnb
 import triton
 from triton.common.build import libcuda_dirs
+import os
+import re
+import numpy as np
+import subprocess
+
 try:
     cdequantize_blockwise_fp32 = bnb.functional.lib.cdequantize_blockwise_fp32
     libcuda_dirs()
 except:
     warnings.warn(
-        "Running `ldconfig /usr/lib64-nvidia` to link CUDA."\
+        "Unsloth: Running `ldconfig /usr/lib64-nvidia` to link CUDA."\
     )
-    os.system("ldconfig /usr/lib64-nvidia")
+
+    if os.path.exists("/usr/lib64-nvidia"):
+        os.system("ldconfig /usr/lib64-nvidia")
+    elif os.path.exists("/usr/local"):
+        # Sometimes bitsandbytes cannot be linked properly in Runpod for example
+        possible_cudas = subprocess.check_output(["ls", "-al", "/usr/local"]).decode("utf-8").split("\n")
+        find_cuda = re.compile(r"[\s](cuda\-[\d\.]{2,})$")
+        possible_cudas = [find_cuda.search(x) for x in possible_cudas]
+        possible_cudas = [x.group(1) for x in possible_cudas if x is not None]
+
+        # Try linking cuda folder, or everything in local
+        if len(possible_cudas) == 0:
+            os.system(f"ldconfig /usr/local/")
+        else:
+            find_number = re.compile(r"([\d\.]{2,})")
+            latest_cuda = np.argsort([float(find_number.search(x).group(1)) for x in possible_cudas])[::-1][0]
+            latest_cuda = possible_cudas[latest_cuda]
+            os.system(f"ldconfig /usr/local/{latest_cuda}")
+    pass
+
     importlib.reload(bnb)
     importlib.reload(triton)
     try:
@@ -75,9 +99,10 @@
         cdequantize_blockwise_fp32 = bnb.functional.lib.cdequantize_blockwise_fp32
         libcuda_dirs()
     except:
-        raise ImportError("CUDA is not linked properly.\n"\
+        raise ImportError("Unsloth: CUDA is not linked properly.\n"\
                           "We tried running `ldconfig /usr/lib64-nvidia` ourselves, but it didn't work.\n"\
-                          "You need to run in your terminal `ldconfig /usr/lib64-nvidia` yourself, then import Unsloth.")
+                          "You need to run in your terminal `sudo ldconfig /usr/lib64-nvidia` yourself, then import Unsloth.\n"\
+                          "Also try `sudo ldconfig /usr/local/cuda-xx.x` - find the latest cuda version.")
 pass
 
 from .models import *

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
@@ -17,6 +17,7 @@
 import warnings
 warnings.filterwarnings(action = "ignore", category = UserWarning, module = "torch")
 warnings.filterwarnings(action = "ignore", category = UserWarning, module = "huggingface_hub")
+warnings.filterwarnings(action = "ignore", category = RuntimeWarning, module = "subprocess")
 import bitsandbytes as bnb
 from transformers.models.llama.modeling_llama import logger
 from transformers import AutoTokenizer