LLaMA-Factory 训练 Llama3-Chinese-8B-Instruct 相关报错问题解决

模型路径
up主为 llama中文社区 模型地址 https://www.modelscope.cn/models/FlagAlpha/Llama3-Chinese-8B-Instruct/summary
sys info
gpu: Tesla V100-PCIE-32GB
python: 3.10
model:Llama3-Chinese-8B-Instruct
nvcc --version
cuda 11.8
python
import torch
print(torch.version)
13.1

1 pip install flash_attn timeout

2 下载whl

CODAA SuTOI, Tagres, compare  ocrmeuy	fnnm
CUDA SETUP: Detected CUDA version 118
/usr/local/miniconda/envs/car/Lib/python3.9/site-packages/bitsandbytes/cuda setup/main.py:149: UserWarning: WARNING: Compute capability <
7.5 detected! Only slow 8-bit matmul is supported for your GPU!
warn(msg)
CUDA SETUP: Loading binary /usr/local/miniconda/envs/car/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cudal18_nocublaslt.so..
Traceback (most recent call last):
File "/data/mlops/code/LLaMA-Factory/examples/lora_single_gpu/../../src/train_bash.py", Line 1, in <module>
from Ulmtuner import run exp
File "/data/mlops/code/LLaMA-Factory/src/llmtuner/  init_.py", line 3, in <module>
from .api import create app
File "/data/mlops/code/LLaMA-Factory/src/llmtuner/api/ init_.py", line 1, in <module>
from .app import create app
File "/data/mlops/code/LLaMA-Factory/src/llmtuner/api/app.py", line 8, in <module>
from ..chat import ChatModel
File "/data/mlops/code/LLaMA-Factory/src/llmtuner/chat/  init_.py", line 2, in <module>
from .chat model import ChatModel
File "/data/mlops/code/LLaMA-Factory/src/llmtuner/chat/chat model.py", line 6, in <module>e
from .hf engine import HuggingfaceEngine
File "/data/mlops/code/LLaMA-Factory/src/Llmtuner/chat/hf_engine.py", Line 12, in <module>
from ..model import load model, load tokenizer
File "/data/mlops/code/LLaMA-Factory/src/Llmtuner/model/_ init_.py", Line 1, in <module>
from .loader import load config, load model, load_tokenizer
File "/data/mlops/code/LLaMA-Factory/src/Llmtuner/model/Loader.py", Line 9, in <module>
from .patcher import patch_config, patch model, patch_tokenizer, patch valuehead modela
File "/data/mlops/code/LLaMA-Factory/src/Llmtuner/model/patcher.py", line 14, in <module>
from .utils.longlora import configure_longlora
File "/data/mlops/code/LLaMA-Factory/src/llmtuner/model/utils/longlora.py", line 6, in <module>c
from transformers.models.llama.modeling llama import (
File "/usr/local/miniconda/envs/car/Lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 55, in <module>
from flash attn import flash attn func, flash attn varlen func
File "/usr/local/miniconda/envs/car/Lib/python3.9/site-packages/flash_attn/_ init_.py", line 3, in <module>a
from flash attn.flash attn interface import (
File "/usr/local/miniconda/envs/car/Lib/python3.9/site-packages/flash_attn/flash_attn_interface.py", line 10, in <module>
import flash attn 2 cuda as flash attn cuda
ImportError: /usr/local/miniconda/envs/car/lib/python3.9/site-packages/flash attn 2 cuda.cpython-39-x86 64-Linux-gnu.so: undefined symbol
:_ZN2at4_ops19empty memory format4callEN3c108ArrayRefILEENS2 8optionalINS2_10ScalarTypeEEENS5_INS2 6LayoutEEENS5_INS2_6DeviceEEENS5_IbEE
NS5 INS2 12MemoryFormatEEE

报这个错是因为版本没有对应上容器的环境参数

nvcc --version
cuda 11.8
python
import torch
print(torch.version)
13.1

https://github.com/Dao-AILab/flash-attention/releases?page=1

image

3 transformers 版本不对

Traceback (most recent call last):
File "/data/mlops/code/LLaMA-Factory/examples/lora_single_gpu/../../src/train_bash.py", line 14, in <module>
main()
File "/data/mlops/code/LLaMA-Factory/examples/lora_single_gpu/../../src/train_bash.py", Line 5, in main
run exp()
File "/data/mlops/code/LLaMA-Factory/src/llmtuner/train/tuner.py", Line 33, in run_exp
run_sft(model_args, data args, training_args, finetuning args, generating args, callbacks)
File "/data/mlops/code/LLaMA-Factory/src/llmtuner/train/sft/workflow.py", line 34, in run_sft
model = load_model(tokenizer, model_args, finetuning_args, training_args.do train)
File "/data/mlops/code/LLaMA-Factory/src/llmtuner/model/loader.py", line 128, in load model
model = AutoModelForCausalLM.from pretrained(**init_kwargs)
File "/usr/local/miniconda/envs/car/lib/python3.9/site-packages/transformers/models/auto/auto factory.py", line 553, in from pretrained
model_class = get class from_dynamic module(
File "/usr/local/miniconda/envs/car/Lib/python3.9/site-packages/transformers/dynamic module_utils.py", line 500, in get_class from_dyna
mic module
return get_class in module(class name, final module.replace(".py", ""))
File "/usr/local/miniconda/envs/car/Lib/python3.9/site-packages/transformers/dynamic module_utils.py", line 200, in get_class in module
module = importlib.import module(module path)
File "/usr/local/miniconda/envs/car/Lib/python3.9/importlib/  init .py", line 127, in import module
return _bootstrap._gcd import(name[level:], package, level)
File "<frozen importlib. _bootstrap>", line 1030, in _gcd_import
File "<frozen importlib._bootstrap>", line 1007, in _find_and_load
File "<frozen importlib._bootstrap>", line 986, in _find and_load_unlocked
File "<frozen importlib._bootstrap>", line 680, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 850, in exec_module
File "<frozen importlib._bootstrap>", line 228, in _call _with_frames removed
File "/root/.cache/huggingface/modules/transformers_modules/modelDir/modeling_llama.py", Line 33, in <module>
from transformers.cache_utils import Cache, DynamicCache, StaticCache
ImportError: cannot import name 'StaticCache' from 'transformers.cache utils' (/usr/local/miniconda/envs/car/Lib/python3.9/site-packages/
transformers/cache utils.py)
adb@farnde,	ссиTроусоvoRoоLI gререворrс	adaaM

pip uninstall transformers
pip install transformers

posted @ 2024-05-09 11:19  linzm14  阅读(566)  评论(0编辑  收藏  举报