You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Unexpected failing examples:
/var/lib/workspace/intermediate_source/pinmem_nonblock.py failed leaving traceback:
Traceback (most recent call last):
File "/var/lib/workspace/intermediate_source/pinmem_nonblock.py", line 642, in <module>
from tensordict import TensorDict
File "/usr/local/lib/python3.10/dist-packages/tensordict/__init__.py", line 6, in <module>
import tensordict._reductions
File "/usr/local/lib/python3.10/dist-packages/tensordict/_reductions.py", line 11, in <module>
from tensordict._lazy import LazyStackedTensorDict
File "/usr/local/lib/python3.10/dist-packages/tensordict/_lazy.py", line 37, in <module>
from tensordict.memmap import MemoryMappedTensor
File "/usr/local/lib/python3.10/dist-packages/tensordict/memmap.py", line 22, in <module>
from tensordict.utils import _shape, implement_for, IndexType, NESTED_TENSOR_ERR
File "/usr/local/lib/python3.10/dist-packages/tensordict/utils.py", line 94, in <module>
from torchrec import KeyedJaggedTensor
File "/usr/local/lib/python3.10/dist-packages/torchrec/__init__.py", line 10, in <module>
import torchrec.distributed # noqa
File "/usr/local/lib/python3.10/dist-packages/torchrec/distributed/__init__.py", line 38, in <module>
from torchrec.distributed.model_parallel import DistributedModelParallel # noqa
File "/usr/local/lib/python3.10/dist-packages/torchrec/distributed/model_parallel.py", line 26, in <module>
from torchrec.distributed.planner import EmbeddingShardingPlanner, Topology
File "/usr/local/lib/python3.10/dist-packages/torchrec/distributed/planner/__init__.py", line 24, in <module>
from torchrec.distributed.planner.planners import EmbeddingShardingPlanner # noqa
File "/usr/local/lib/python3.10/dist-packages/torchrec/distributed/planner/planners.py", line 21, in <module>
from torchrec.distributed.planner.constants import BATCH_SIZE, MAX_SIZE
File "/usr/local/lib/python3.10/dist-packages/torchrec/distributed/planner/constants.py", line 12, in <module>
from torchrec.distributed.embedding_types import EmbeddingComputeKernel
File "/usr/local/lib/python3.10/dist-packages/torchrec/distributed/embedding_types.py", line 16, in <module>
from fbgemm_gpu.split_table_batched_embeddings_ops_training import EmbeddingLocation
File "/usr/local/lib/python3.10/dist-packages/fbgemm_gpu/__init__.py", line 71, in <module>
_load_library(f"{library}.so")
File "/usr/local/lib/python3.10/dist-packages/fbgemm_gpu/__init__.py", line 21, in _load_library
raise error
File "/usr/local/lib/python3.10/dist-packages/fbgemm_gpu/__init__.py", line 17, in _load_library
torch.ops.load_library(os.path.join(os.path.dirname(__file__), filename))
File "/var/lib/ci-user/.local/lib/python3.10/site-packages/torch/_ops.py", line 1392, in load_library
ctypes.CDLL(path)
File "/usr/lib/python3.10/ctypes/__init__.py", line 374, in __init__
self._handle = _dlopen(self._name, mode)
OSError: /usr/local/lib/python3.10/dist-packages/fbgemm_gpu/fbgemm_gpu_config.so: undefined symbol: _ZN5torch3jit17parseSchemaOrNameERKSsb
Add Link
https://pytorch.org/tutorials/intermediate/pinmem_nonblock.html
Describe the bug
Tutorial failing with the following error:
build log
Please submit fixes against the 2.7-RC-TEST branch and enable in .jenkins/validate_tutorials_built.py.
Describe your environment
CUDA: 12.6
PyTorch: 2.7
cc @vmoens @nairbv
The text was updated successfully, but these errors were encountered: