vault backup: 2024-09-10 13:59:42
Affected files: .obsidian/workspace.json Unsloth.md
This commit is contained in:
parent
aa833b02f5
commit
20bb17a35e
|
@ -66,9 +66,21 @@
|
|||
"source": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "6d90217a583512b1",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "markdown",
|
||||
"state": {
|
||||
"file": "Unsloth.md",
|
||||
"mode": "source",
|
||||
"source": false
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"currentTab": 4
|
||||
"currentTab": 5
|
||||
}
|
||||
],
|
||||
"direction": "vertical"
|
||||
|
@ -135,7 +147,7 @@
|
|||
"state": {
|
||||
"type": "backlink",
|
||||
"state": {
|
||||
"file": "neo4j.md",
|
||||
"file": "Unsloth.md",
|
||||
"collapseAll": false,
|
||||
"extraContext": false,
|
||||
"sortOrder": "alphabetical",
|
||||
|
@ -152,7 +164,7 @@
|
|||
"state": {
|
||||
"type": "outgoing-link",
|
||||
"state": {
|
||||
"file": "neo4j.md",
|
||||
"file": "Unsloth.md",
|
||||
"linksCollapsed": false,
|
||||
"unlinkedCollapsed": true
|
||||
}
|
||||
|
@ -175,7 +187,7 @@
|
|||
"state": {
|
||||
"type": "outline",
|
||||
"state": {
|
||||
"file": "neo4j.md"
|
||||
"file": "Unsloth.md"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -218,8 +230,10 @@
|
|||
"copilot:Copilot Chat": false
|
||||
}
|
||||
},
|
||||
"active": "8b4b3854a1cb6ef8",
|
||||
"active": "6d90217a583512b1",
|
||||
"lastOpenFiles": [
|
||||
"Unsloth.md",
|
||||
"neo4j.md",
|
||||
"YuNoHost - Apps.md",
|
||||
"Python FastAPI.md",
|
||||
"GTA San Andreas Cheat codes.md",
|
||||
|
@ -234,7 +248,6 @@
|
|||
"Docker.md",
|
||||
"Weaviate.md",
|
||||
"Install neo4j on Debian.md",
|
||||
"neo4j.md",
|
||||
"Debian.md",
|
||||
"GNU-Linux.md",
|
||||
"Linux.md",
|
||||
|
|
|
@ -0,0 +1,312 @@
|
|||
|
||||
# 👋Welcome
|
||||
|
||||
New to Unsloth? Start here!
|
||||
|
||||
[Unsloth](https://github.com/unslothai/unsloth) makes finetuning large language models like Llama-3, Mistral, Phi-3 and Gemma 2x faster, use 70% less memory, and with no degradation in accuracy! Our docs will help you navigate through training your very own custom model. It covers the essentials of [creating datasets](https://docs.unsloth.ai/tutorials/how-to-finetune-llama-3-and-export-to-ollama#id-6.-alpaca-dataset), running and [deploying](https://docs.unsloth.ai/tutorials/how-to-finetune-llama-3-and-export-to-ollama#id-13.-exporting-to-ollama) your model. You'll also learn how to integrate third-party tools, use tools like [Google Colab](https://docs.unsloth.ai/tutorials/how-to-finetune-llama-3-and-export-to-ollama#id-4.-selecting-a-model-to-finetune) and more!
|
||||
|
||||
## What is finetuning and why?
|
||||
|
||||
If we want a language model to learn a new skill, a new language, some new programming language, or simply want the language model to learn how to follow and answer instructions like how ChatGPT functions, we do finetuning!
|
||||
|
||||
Finetuning is the process of updating the actual "brains" of the language model through some process called back-propagation. But, finetuning can get very slow and very resource intensive.
|
||||
|
||||
## How to use Unsloth?
|
||||
|
||||
Our open-source version of [Unsloth](https://github.com/unslothai/unsloth) can be installed locally or another GPU service like Google Colab. Most use Unsloth through the interface Google Colab which provides a free GPU to train with. You can access all of our notebooks [here](https://github.com/unslothai/unsloth#-finetune-for-free).
|
||||
|
||||
# 📒Unsloth Notebooks
|
||||
|
||||
See the list below for all our notebooks:
|
||||
|
||||
#### Google Colab
|
||||
|
||||
### Main notebooks:
|
||||
|
||||
- [Llama 3.1 (8B)](https://colab.research.google.com/drive/1Ys44kVvmeZtnICzWz0xgpRnrIOjZAuxp?usp=sharing)
|
||||
- [Mistral NeMo (12B)](https://colab.research.google.com/drive/17d3U-CAIwzmbDRqbZ9NnpHxCkmXB6LZ0?usp=sharing)
|
||||
- [Gemma 2 (9B)](https://colab.research.google.com/drive/1vIrqH5uYDQwsJ4-OO3DErvuv4pBgVwk4?usp=sharing)
|
||||
- [_**Inference chat UI**_](https://colab.research.google.com/drive/1T-YBVfnphoVc8E2E854qF3jdia2Ll2W2?usp=sharing)
|
||||
- [Phi-3.5 (mini)](https://colab.research.google.com/drive/1lN6hPQveB_mHSnTOYifygFcrO8C1bxq4?usp=sharing)
|
||||
- [Llama 3 (8B)](https://colab.research.google.com/drive/135ced7oHytdxu3N2DNe1Z0kqjyYIkDXp?usp=sharing)
|
||||
- [Mistral v0.3 (7B)](https://colab.research.google.com/drive/1_yNCks4BTD5zOnjozppphh5GzMFaMKq_?usp=sharing)
|
||||
- [Phi-3 (medium)](https://colab.research.google.com/drive/1hhdhBa1j_hsymiW9m-WzxQtgqTH_NHqi?usp=sharing)
|
||||
- [Qwen2 (7B)](https://colab.research.google.com/drive/1mvwsIQWDs2EdZxZQF9pRGnnOvE86MVvR?usp=sharing)
|
||||
- [Gemma (2B)](https://colab.research.google.com/drive/15gGm7x_jTm017_Ic8e317tdIpDG53Mtu?usp=sharing)
|
||||
- [TinyLlama](https://colab.research.google.com/drive/1AZghoNBQaMDgWJpi4RbffGM1h6raLUj9?usp=sharing)
|
||||
|
||||
### Other notebooks:
|
||||
|
||||
- [ORPO](https://colab.research.google.com/drive/11t4njE3c4Lxl-07OD8lJSMKkfyJml3Tn?usp=sharing)
|
||||
- [Ollama](https://colab.research.google.com/drive/1WZDi7APtQ9VsvOrQSSC5DDtxq159j8iZ?usp=sharing)
|
||||
- [Text Classification](https://github.com/timothelaborie/text_classification_scripts/blob/main/unsloth_classification.ipynb) by Timotheeee
|
||||
- [Multiple Datasets](https://colab.research.google.com/drive/1njCCbE1YVal9xC83hjdo2hiGItpY_D6t?usp=sharing) by Flail
|
||||
- [DPO Zephyr](https://colab.research.google.com/drive/15vttTpzzVXv_tJwEk-hIcQ0S9FcEWvwP?usp=sharing)
|
||||
- [Conversational](https://colab.research.google.com/drive/1Ys44kVvmeZtnICzWz0xgpRnrIOjZAuxp?usp=sharing)
|
||||
- [ChatML](https://colab.research.google.com/drive/15F1xyn8497_dUbxZP4zWmPZ3PJx1Oymv?usp=sharing)
|
||||
- [Text Completion](https://colab.research.google.com/drive/1ef-tab5bhkvWmBOObepl1WgJvfvSzn5Q?usp=sharing)
|
||||
- [Continued Pretraining](https://colab.research.google.com/drive/1tEd1FrOXWMnCU9UIvdYhs61tkxdMuKZu?usp=sharing)
|
||||
- [Mistral v0.3 Instruct (7B)](https://colab.research.google.com/drive/15F1xyn8497_dUbxZP4zWmPZ3PJx1Oymv?usp=sharing)
|
||||
- [CodeGemma (7B)](https://colab.research.google.com/drive/19lwcRk_ZQ_ZtX-qzFP3qZBBHZNcMD1hh?usp=sharing)
|
||||
- [Inference only](https://colab.research.google.com/drive/1aqlNQi7MMJbynFDyOQteD2t0yVfjb9Zh?usp=sharing)
|
||||
|
||||
# 📚All Our Models
|
||||
|
||||
See the list below for all our 4bit bnb uploaded models
|
||||
|
||||
You can also view all our uploaded models on [Hugging Face directly](https://huggingface.co/unsloth).
|
||||
|
||||
|Model|Base|Instruct|
|
||||
|---|---|---|
|
||||
|Llama 3.1|- [8B](https://huggingface.co/unsloth/Meta-Llama-3.1-8B-bnb-4bit)<br> <br>- [70B](https://huggingface.co/unsloth/Meta-Llama-3.1-70B-bnb-4bit)<br> <br>- [405B](https://huggingface.co/unsloth/Meta-Llama-3.1-405B-bnb-4bit)|- [8B](https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit)<br> <br>- [70B](https://huggingface.co/unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit)<br> <br>- [405B](https://huggingface.co/unsloth/Meta-Llama-3.1-405B-Instruct-bnb-4bit/)|
|
||||
|Phi-3.5||- [mini](https://huggingface.co/unsloth/Phi-3.5-mini-instruct-bnb-4bit)|
|
||||
|Mistral NeMo|- [12B](https://huggingface.co/unsloth/Mistral-Nemo-Base-2407-bnb-4bit)|- [12B](https://huggingface.co/unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit)|
|
||||
|Gemma 2|- [2B](https://huggingface.co/unsloth/gemma-2-2b-bnb-4bit)<br> <br>- [9B](https://huggingface.co/unsloth/gemma-2-9b-bnb-4bit)<br> <br>- [27B](https://huggingface.co/unsloth/gemma-2-27b-bnb-4bit)|- [2B](https://huggingface.co/unsloth/gemma-2-2b-it-bnb-4bit)<br> <br>- [9B](https://huggingface.co/unsloth/gemma-2-9b-it-bnb-4bit)<br> <br>- [27B](https://huggingface.co/unsloth/gemma-2-27b-it-bnb-4bit)|
|
||||
|Llama 3|- [8B](https://huggingface.co/unsloth/llama-3-8b-bnb-4bit)<br> <br>- [70B](https://huggingface.co/unsloth/llama-3-70b-bnb-4bit)|- [8B](https://huggingface.co/unsloth/llama-3-8b-Instruct-bnb-4bit)<br> <br>- [70B](https://huggingface.co/unsloth/llama-3-70b-bnb-4bit)|
|
||||
|Phi-3||- [mini](https://huggingface.co/unsloth/Phi-3-mini-4k-instruct-bnb-4bit)<br> <br>- [medium](https://huggingface.co/unsloth/Phi-3-medium-4k-instruct-bnb-4bit)|
|
||||
|Mistral|- [7B (v0.3)](https://huggingface.co/unsloth/mistral-7b-v0.3-bnb-4bit)<br> <br>- [7B (v0.2)](https://huggingface.co/unsloth/mistral-7b-v0.2-bnb-4bit)|- [Large](https://huggingface.co/unsloth/Mistral-Large-Instruct-2407-bnb-4bit)<br> <br>- [7B (v0.3)](https://huggingface.co/unsloth/mistral-7b-instruct-v0.3-bnb-4bit)<br> <br>- [7B (v0.2)](https://huggingface.co/unsloth/mistral-7b-instruct-v0.2-bnb-4bit)|
|
||||
|Qwen2|- [1.5B](https://huggingface.co/unsloth/Qwen2-1.5B-bnb-4bit)<br> <br>- [7B](https://huggingface.co/unsloth/Qwen2-7B-bnb-4bit)<br> <br>- [72B](https://huggingface.co/unsloth/Qwen2-7B-bnb-4bit)|- [1.5B](https://huggingface.co/unsloth/Qwen2-1.5B-Instruct-bnb-4bit)<br> <br>- [7B](https://huggingface.co/unsloth/Qwen2-7B-Instruct-bnb-4bit)<br> <br>- [72B](https://huggingface.co/unsloth/Qwen2-72B-Instruct-bnb-4bit)|
|
||||
|Llama 2|- [7B](https://huggingface.co/unsloth/llama-2-7b-bnb-4bit)<br> <br>- [13B](https://huggingface.co/unsloth/llama-2-13b-bnb-4bit)|- [7B](https://huggingface.co/unsloth/llama-2-7b-chat-bnb-4bit)|
|
||||
|TinyLlama|- [Base](https://huggingface.co/unsloth/tinyllama-bnb-4bit)|- [Instruct](https://huggingface.co/unsloth/tinyllama-chat-bnb-4bit)|
|
||||
|Zephyr SFT||- [Instruct](https://huggingface.co/unsloth/zephyr-sft-bnb-4bit)|
|
||||
|CodeLlama|- [7B](https://huggingface.co/unsloth/codellama-7b-bnb-4bit)<br> <br>- [13B](https://huggingface.co/unsloth/codellama-13b-bnb-4bit)<br> <br>- [34B](https://huggingface.co/unsloth/codellama-34b-bnb-4bit)||
|
||||
|Yi|- [6B (v 1.5)](https://huggingface.co/unsloth/Yi-1.5-6B-bnb-4bit)<br> <br>- [6B](https://huggingface.co/unsloth/yi-6b-bnb-4bit)<br> <br>- [34B](https://huggingface.co/unsloth/yi-34b-bnb-4bit)|- [34B](https://huggingface.co/unsloth/yi-34b-chat-bnb-4bit)|
|
||||
# 📥Installation
|
||||
|
||||
Learn to install Unsloth locally or on Google Colab.
|
||||
|
||||
# Updating
|
||||
|
||||
To update Unsloth, follow the steps below:
|
||||
## Updating without dependency updates
|
||||
|
||||
```
|
||||
pip uninstall unsloth -y
|
||||
pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/u
|
||||
```
|
||||
|
||||
# Conda Install
|
||||
|
||||
To install Unsloth locally on Conda, follow the steps below:
|
||||
|
||||
Select either `pytorch-cuda=11.8` for CUDA 11.8 or `pytorch-cuda=12.1` for CUDA 12.1. If you have `mamba`, use `mamba` instead of `conda` for faster solving. See this [Github issue](https://github.com/unslothai/unsloth/issues/73) for help on debugging Conda installs.
|
||||
|
||||
Copy
|
||||
|
||||
```
|
||||
conda create --name unsloth_env \
|
||||
python=3.10 \
|
||||
pytorch-cuda=<11.8/12.1> \
|
||||
pytorch cudatoolkit xformers -c pytorch -c nvidia -c xformers \
|
||||
-y
|
||||
conda activate unsloth_env
|
||||
|
||||
pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
|
||||
|
||||
pip install --no-deps "trl<0.9.0" peft accelerate bitsandbytes
|
||||
```
|
||||
|
||||
# Pip Install
|
||||
|
||||
To install Unsloth locally via Pip, follow the steps below:
|
||||
|
||||
Do **NOT** use this if you have Anaconda. You must use the Conda install method, or else stuff will BREAK.
|
||||
|
||||
1. Find your CUDA version via
|
||||
|
||||
|
||||
Copy
|
||||
|
||||
```
|
||||
import torch; torch.version.cuda
|
||||
```
|
||||
|
||||
1. For Pytorch 2.1.0: You can update Pytorch via Pip (interchange `cu121` / `cu118`). Go to https://pytorch.org/ to learn more. Select either `cu118` for CUDA 11.8 or `cu121` for CUDA 12.1. If you have a RTX 3060 or higher (A100, H100 etc), use the `"ampere"` path. For Pytorch 2.1.1: go to step 3. For Pytorch 2.2.0: go to step 4.
|
||||
|
||||
|
||||
Copy
|
||||
|
||||
```
|
||||
pip install --upgrade --force-reinstall --no-cache-dir torch==2.1.0 triton \
|
||||
--index-url https://download.pytorch.org/whl/cu121
|
||||
```
|
||||
|
||||
Copy
|
||||
|
||||
```
|
||||
pip install "unsloth[cu118] @ git+https://github.com/unslothai/unsloth.git"
|
||||
pip install "unsloth[cu121] @ git+https://github.com/unslothai/unsloth.git"
|
||||
pip install "unsloth[cu118-ampere] @ git+https://github.com/unslothai/unsloth.git"
|
||||
pip install "unsloth[cu121-ampere] @ git+https://github.com/unslothai/unsloth.git"
|
||||
```
|
||||
|
||||
1. For Pytorch 2.1.1: Use the `"ampere"` path for newer RTX 30xx GPUs or higher.
|
||||
|
||||
|
||||
Copy
|
||||
|
||||
```
|
||||
pip install --upgrade --force-reinstall --no-cache-dir torch==2.1.1 triton \
|
||||
--index-url https://download.pytorch.org/whl/cu121
|
||||
```
|
||||
|
||||
Copy
|
||||
|
||||
```
|
||||
pip install "unsloth[cu118-torch211] @ git+https://github.com/unslothai/unsloth.git"
|
||||
pip install "unsloth[cu121-torch211] @ git+https://github.com/unslothai/unsloth.git"
|
||||
pip install "unsloth[cu118-ampere-torch211] @ git+https://github.com/unslothai/unsloth.git"
|
||||
pip install "unsloth[cu121-ampere-torch211] @ git+https://github.com/unslothai/unsloth.git"
|
||||
```
|
||||
|
||||
1. For Pytorch 2.2.0: Use the `"ampere"` path for newer RTX 30xx GPUs or higher.
|
||||
|
||||
|
||||
Copy
|
||||
|
||||
```
|
||||
pip install --upgrade --force-reinstall --no-cache-dir torch==2.2.0 triton \
|
||||
--index-url https://download.pytorch.org/whl/cu121
|
||||
```
|
||||
|
||||
Copy
|
||||
|
||||
```
|
||||
pip install "unsloth[cu118-torch220] @ git+https://github.com/unslothai/unsloth.git"
|
||||
pip install "unsloth[cu121-torch220] @ git+https://github.com/unslothai/unsloth.git"
|
||||
pip install "unsloth[cu118-ampere-torch220] @ git+https://github.com/unslothai/unsloth.git"
|
||||
pip install "unsloth[cu121-ampere-torch220] @ git+https://github.com/unslothai/unsloth.git"
|
||||
```
|
||||
|
||||
1. If you get errors, try the below first, then go back to step 1:
|
||||
|
||||
|
||||
Copy
|
||||
|
||||
```
|
||||
pip install --upgrade pip
|
||||
```
|
||||
|
||||
1. For Pytorch 2.2.1:
|
||||
|
||||
|
||||
Copy
|
||||
|
||||
```
|
||||
# RTX 3090, 4090 Ampere GPUs:
|
||||
pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
|
||||
pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
|
||||
|
||||
# Pre Ampere RTX 2080, T4, GTX 1080 GPUs:
|
||||
pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
|
||||
pip install --no-deps xformers "trl<0.9.0" peft accelerate bitsandbytes
|
||||
```
|
||||
|
||||
1. For Pytorch 2.3.0: Use the `"ampere"` path for newer RTX 30xx GPUs or higher.
|
||||
|
||||
|
||||
Copy
|
||||
|
||||
```
|
||||
pip install "unsloth[cu118-torch230] @ git+https://github.com/unslothai/unsloth.git"
|
||||
pip install "unsloth[cu121-torch230] @ git+https://github.com/unslothai/unsloth.git"
|
||||
pip install "unsloth[cu118-ampere-torch230] @ git+https://github.com/unslothai/unsloth.git"
|
||||
pip install "unsloth[cu121-ampere-torch230] @ git+https://github.com/unslothai/unsloth.git"
|
||||
```
|
||||
|
||||
1. To troubleshoot installs try the below (all must succeed). Xformers should mostly all be available.
|
||||
|
||||
|
||||
Copy
|
||||
|
||||
```
|
||||
nvcc
|
||||
python -m xformers.info
|
||||
python -m bitsandbytes
|
||||
```
|
||||
|
||||
# Google Colab
|
||||
|
||||
To install and run Unsloth on Google Colab, follow the steps below:
|
||||
|
||||

|
||||
|
||||
If you have never used a Colab notebook, a quick primer on the notebook itself:
|
||||
|
||||
1. **Play Button at each "cell".** Click on this to run that cell's code. You must not skip any cells and you must run every cell in chronological order. If you encounter errors, simply rerun the cell you did not run. Another option is to click CTRL + ENTER if you don't want to click the play button.
|
||||
|
||||
2. **Runtime Button in the top toolbar.** You can also use this button and hit "Run all" to run the entire notebook in 1 go. This will skip all the customization steps, but is a good first try.
|
||||
|
||||
3. **Connect / Reconnect T4 button.** T4 is the free GPU Google is providing. It's quite powerful!
|
||||
|
||||
|
||||
The first installation cell looks like below: Remember to click the PLAY button in the brackets [ ]. We grab our open source Github package, and install some other packages.
|
||||
|
||||
# Basics
|
||||
|
||||
# 📂Saving Models
|
||||
|
||||
Learn how to save your finetuned model so you can run it in your favorite inference engine.
|
||||
|
||||
# Saving to GGUF
|
||||
|
||||
Saving models to 16bit for GGUF so you can use it for Ollama, Jan AI, Open WebUI and more!
|
||||
|
||||
To save to GGUF, use the below to save locally:
|
||||
|
||||
Copy
|
||||
|
||||
```
|
||||
model.save_pretrained_gguf("dir", tokenizer, quantization_method = "q4_k_m")
|
||||
model.save_pretrained_gguf("dir", tokenizer, quantization_method = "q8_0")
|
||||
model.save_pretrained_gguf("dir", tokenizer, quantization_method = "f16")
|
||||
```
|
||||
|
||||
For to push to hub:
|
||||
|
||||
Copy
|
||||
|
||||
```
|
||||
model.push_to_hub_gguf("hf_username/dir", tokenizer, quantization_method = "q4_k_m")
|
||||
model.push_to_hub_gguf("hf_username/dir", tokenizer, quantization_method = "q8_0")
|
||||
```
|
||||
|
||||
All supported quantization options for `quantization_method` are listed below:
|
||||
|
||||
Copy
|
||||
|
||||
```
|
||||
# https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/quantize.cpp#L19
|
||||
# From https://mlabonne.github.io/blog/posts/Quantize_Llama_2_models_using_ggml.html
|
||||
ALLOWED_QUANTS = \
|
||||
{
|
||||
"not_quantized" : "Recommended. Fast conversion. Slow inference, big files.",
|
||||
"fast_quantized" : "Recommended. Fast conversion. OK inference, OK file size.",
|
||||
"quantized" : "Recommended. Slow conversion. Fast inference, small files.",
|
||||
"f32" : "Not recommended. Retains 100% accuracy, but super slow and memory hungry.",
|
||||
"f16" : "Fastest conversion + retains 100% accuracy. Slow and memory hungry.",
|
||||
"q8_0" : "Fast conversion. High resource use, but generally acceptable.",
|
||||
"q4_k_m" : "Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_K",
|
||||
"q5_k_m" : "Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_K",
|
||||
"q2_k" : "Uses Q4_K for the attention.vw and feed_forward.w2 tensors, Q2_K for the other tensors.",
|
||||
"q3_k_l" : "Uses Q5_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K",
|
||||
"q3_k_m" : "Uses Q4_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K",
|
||||
"q3_k_s" : "Uses Q3_K for all tensors",
|
||||
"q4_0" : "Original quant method, 4-bit.",
|
||||
"q4_1" : "Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.",
|
||||
"q4_k_s" : "Uses Q4_K for all tensors",
|
||||
"q4_k" : "alias for q4_k_m",
|
||||
"q5_k" : "alias for q5_k_m",
|
||||
"q5_0" : "Higher accuracy, higher resource usage and slower inference.",
|
||||
"q5_1" : "Even higher accuracy, resource usage and slower inference.",
|
||||
"q5_k_s" : "Uses Q5_K for all tensors",
|
||||
"q6_k" : "Uses Q8_K for all tensors",
|
||||
"iq2_xxs" : "2.06 bpw quantization",
|
||||
"iq2_xs" : "2.31 bpw quantization",
|
||||
"iq3_xxs" : "3.06 bpw quantization",
|
||||
"q3_k_xs" : "3-bit extra small quantization",
|
||||
}
|
||||
```
|
||||
|
||||
# Saving to Ollama
|
||||
|
Loading…
Reference in New Issue