You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[INFO:swift] model_parameter_info: PeftModelForSequenceClassification: 1352.5729M Params (105.5293M Trainable [7.8021%]), 9428.1780M Buffers.
[ERROR:modelscope] The request model: unknown does not exist!
[ERROR:modelscope] The request model: unknown does not exist!
/ssd3/CodeSpace/ms-swift-classify/swift/trainers/mixin.py:77: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.
super().__init__(
[2025-02-10 17:39:22,154] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[INFO:swift] The logging file will be saved in: /ssd3/CodeSpace/ms-swift-classify/output/Qwen2.5-72B-Instruct-AWQ-Instruct-porn_length256_lora8_bs64_epoch3_lr1e-4/v12-20250210-173859/logging.jsonl
Train: 0%| | 0/186 [00:00<?, ?it/s]/tmp/tmp_x91vu0x/main.c:6:23: fatal error: stdatomic.h: No such file or directory
#include <stdatomic.h>
^
compilation terminated.
Traceback (most recent call last):
File "/ssd3/CodeSpace/ms-swift-classify/swift/cli/sft.py", line 5, in <module>
sft_main()
File "/ssd3/CodeSpace/ms-swift-classify/swift/llm/train/sft.py", line 256, in sft_main
return SwiftSft(args).main()
File "/ssd3/CodeSpace/ms-swift-classify/swift/llm/base.py", line 46, in main
result = self.run()
File "/ssd3/CodeSpace/ms-swift-classify/swift/llm/train/sft.py", line 137, in run
return self.train(trainer)
File "/ssd3/CodeSpace/ms-swift-classify/swift/llm/train/sft.py", line 196, in train
trainer.train(trainer.args.resume_from_checkpoint)
File "/ssd3/CodeSpace/ms-swift-classify/swift/trainers/trainers.py", line 53, in train
return super().train(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/swift/trainers/mixin.py", line 261, in train
res = super().train(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/transformers/trainer.py", line 2164, in train
return inner_training_loop(
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/transformers/trainer.py", line 2524, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/transformers/trainer.py", line 3654, in training_step
loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
File "/ssd3/CodeSpace/ms-swift-classify/swift/trainers/trainers.py", line 56, in compute_loss
loss, outputs = super().compute_loss(model, inputs, return_outputs=True)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/transformers/trainer.py", line 3708, in compute_loss
outputs = model(**inputs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/accelerate/utils/operations.py", line 819, in forward
return model_forward(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/accelerate/utils/operations.py", line 807, in __call__
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast
return func(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/peft/peft_model.py", line 1521, in forward
return self.base_model(
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/peft/tuners/tuners_utils.py", line 197, in forward
return self.model.forward(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 1252, in forward
transformer_outputs = self.model(
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 883, in forward
layer_outputs = self._gradient_checkpointing_func(
File "/ssd3/CodeSpace/ms-swift-classify/swift/trainers/arguments.py", line 49, in _new_checkpoint
return _old_checkpoint(*args, use_reentrant=use_reentrant_, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/_compile.py", line 32, in inner
return disable_fn(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/_dynamo/eval_frame.py", line 632, in _fn
return fn(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/utils/checkpoint.py", line 489, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/autograd/function.py", line 575, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/utils/checkpoint.py", line 264, in forward
outputs = run_function(*args)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 623, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 501, in forward
query_states = self.q_proj(hidden_states)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/peft/tuners/lora/awq.py", line 62, in forward
result = self.quant_linear_module(x)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/awq/modules/linear/gemm.py", line 258, in forward
out = WQLinearMMFunction.apply(
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/torch/autograd/function.py", line 575, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/awq/modules/linear/gemm.py", line 65, in forward
out = awq_gemm_triton(
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/awq/modules/triton/gemm.py", line 343, in awq_gemm_triton
awq_gemm_kernel[grid](
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/triton/runtime/jit.py", line 345, in <lambda>
return lambda *args, **kwargs: self.run(grid=grid, warmup=False, *args, **kwargs)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/triton/runtime/jit.py", line 607, in run
device = driver.active.get_current_device()
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/triton/runtime/driver.py", line 23, in __getattr__
self._initialize_obj()
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/triton/runtime/driver.py", line 20, in _initialize_obj
self._obj = self._init_fn()
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/triton/runtime/driver.py", line 9, in _create_driver
return actives[0]()
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/triton/backends/nvidia/driver.py", line 371, in __init__
self.utils = CudaUtils() # TODO: make static
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/triton/backends/nvidia/driver.py", line 80, in __init__
mod = compile_module_from_src(Path(os.path.join(dirname, "driver.c")).read_text(), "cuda_utils")
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/triton/backends/nvidia/driver.py", line 57, in compile_module_from_src
so = _build(name, src_path, tmpdir, library_dirs(), include_dir, libraries)
File "/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/triton/runtime/build.py", line 48, in _build
ret = subprocess.check_call(cc_cmd)
File "/home/.conda/envs/python39/lib/python3.9/subprocess.py", line 373, in check_call
raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['/usr/bin/gcc', '/tmp/tmp_x91vu0x/main.c', '-O3', '-shared', '-fPIC', '-o', '/tmp/tmp_x91vu0x/cuda_utils.cpython-39-x86_64-linux-gnu.so', '-lcuda', '-L/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/triton/backends/nvidia/lib', '-L/lib64', '-L/lib', '-I/ssd3/CodeSpace/ms-swift-classify/env39_cuda12.1/lib/python3.9/site-packages/triton/backends/nvidia/include', '-I/tmp/tmp_x91vu0x', '-I/home/.conda/envs/python39/include/python3.9']' returned non-zero exit status 1.
Train: 0%| | 0/186 [00:01<?, ?it/s]
The text was updated successfully, but these errors were encountered:
训练代码
训练环境
报错信息
The text was updated successfully, but these errors were encountered: