You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[INFO|2024-12-18 10:52:09] llamafactory.model.model_utils.attention:157 >> Using torch SDPA for faster training and inference.
[INFO|2024-12-18 10:52:09] llamafactory.model.loader:157 >> all params: 8,291,375,616
[WARNING|2024-12-18 10:52:09] llamafactory.chat.hf_engine:168 >> There is no current event loop, creating a new one.
Exception in thread Thread-8:
Traceback (most recent call last):
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/threading.py", line 932, in _bootstrap_inner
self.run()
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/transformers/generation/utils.py", line 2215, in generate
result = self._sample(
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/transformers/generation/utils.py", line 3206, in _sample
outputs = self(**model_inputs, return_dict=True)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/hooks.py", line 170, in new_forward
output = module._old_forward(*args, **kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/transformers/models/qwen2_vl/modeling_qwen2_vl.py", line 1722, in forward
outputs = self.model(
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/transformers/models/qwen2_vl/modeling_qwen2_vl.py", line 1159, in forward
layer_outputs = decoder_layer(
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
args, kwargs = module._hf_hook.pre_forward(module, *args, **kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/hooks.py", line 364, in pre_forward
return send_to_device(args, self.execution_device), send_to_device(
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/utils/operations.py", line 184, in send_to_device
{
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/utils/operations.py", line 185, in
k: t if k in skip_keys else send_to_device(t, device, non_blocking=non_blocking, skip_keys=skip_keys)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/utils/operations.py", line 175, in send_to_device
return honor_type(
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/utils/operations.py", line 82, in honor_type
return type(obj)(generator)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/utils/operations.py", line 176, in
tensor, (send_to_device(t, device, non_blocking=non_blocking, skip_keys=skip_keys) for t in tensor)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/utils/operations.py", line 156, in send_to_device
return tensor.to(device, non_blocking=non_blocking)
RuntimeError: CUDA error: peer mapping resources exhausted
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.
Expected behavior
help
Others
No response
The text was updated successfully, but these errors were encountered:
Reminder
System Info
llamafactory
version: 0.9.2.dev0Reproduction
llamafactory-cli webui
[INFO|2024-12-18 10:52:09] llamafactory.model.model_utils.attention:157 >> Using torch SDPA for faster training and inference.
[INFO|2024-12-18 10:52:09] llamafactory.model.loader:157 >> all params: 8,291,375,616
[WARNING|2024-12-18 10:52:09] llamafactory.chat.hf_engine:168 >> There is no current event loop, creating a new one.
Exception in thread Thread-8:
Traceback (most recent call last):
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/threading.py", line 932, in _bootstrap_inner
self.run()
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/transformers/generation/utils.py", line 2215, in generate
result = self._sample(
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/transformers/generation/utils.py", line 3206, in _sample
outputs = self(**model_inputs, return_dict=True)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/hooks.py", line 170, in new_forward
output = module._old_forward(*args, **kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/transformers/models/qwen2_vl/modeling_qwen2_vl.py", line 1722, in forward
outputs = self.model(
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/transformers/models/qwen2_vl/modeling_qwen2_vl.py", line 1159, in forward
layer_outputs = decoder_layer(
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
args, kwargs = module._hf_hook.pre_forward(module, *args, **kwargs)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/hooks.py", line 364, in pre_forward
return send_to_device(args, self.execution_device), send_to_device(
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/utils/operations.py", line 184, in send_to_device
{
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/utils/operations.py", line 185, in
k: t if k in skip_keys else send_to_device(t, device, non_blocking=non_blocking, skip_keys=skip_keys)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/utils/operations.py", line 175, in send_to_device
return honor_type(
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/utils/operations.py", line 82, in honor_type
return type(obj)(generator)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/utils/operations.py", line 176, in
tensor, (send_to_device(t, device, non_blocking=non_blocking, skip_keys=skip_keys) for t in tensor)
File "/data/s2/zhuzhaowei/anaconda3/envs/py38/lib/python3.8/site-packages/accelerate/utils/operations.py", line 156, in send_to_device
return tensor.to(device, non_blocking=non_blocking)
RuntimeError: CUDA error: peer mapping resources exhausted
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with
TORCH_USE_CUDA_DSA
to enable device-side assertions.Expected behavior
help
Others
No response
The text was updated successfully, but these errors were encountered: