Skip to content

Commit 0ab3667

Browse files
authored
Merge branch 'main' into links
2 parents e6c1203 + 15ef015 commit 0ab3667

36 files changed

+1109
-663
lines changed

.ci/docker/requirements.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ tensorboard
2828
jinja2==3.1.3
2929
pytorch-lightning
3030
torchx
31-
torchrl==0.5.0
32-
tensordict==0.5.0
31+
torchrl==0.6.0
32+
tensordict==0.6.0
3333
ax-platform>=0.4.0
3434
nbformat>=5.9.2
3535
datasets

.jenkins/build.sh

+4-2
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@ sudo apt-get install -y pandoc
2222
#Install PyTorch Nightly for test.
2323
# Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
2424
# Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
25-
# pip uninstall -y torch torchvision torchaudio torchtext torchdata
26-
# pip3 install torch==2.5.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124
25+
sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata
26+
sudo pip3 install torch==2.6.0 torchvision --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124
27+
sudo pip uninstall -y fbgemm-gpu torchrec
28+
sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124
2729

2830
# Install two language tokenizers for Translation with TorchText tutorial
2931
python -m spacy download en_core_web_sm

.jenkins/validate_tutorials_built.py

-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
"intermediate_source/mnist_train_nas", # used by ax_multiobjective_nas_tutorial.py
2626
"intermediate_source/fx_conv_bn_fuser",
2727
"intermediate_source/_torch_export_nightly_tutorial", # does not work on release
28-
"intermediate_source/transformer_building_blocks", # does not work on release
2928
"advanced_source/super_resolution_with_onnxruntime",
3029
"advanced_source/usb_semisup_learn", # fails with CUDA OOM error, should try on a different worker
3130
"prototype_source/fx_graph_mode_ptq_dynamic",
@@ -51,7 +50,6 @@
5150
"intermediate_source/flask_rest_api_tutorial",
5251
"intermediate_source/text_to_speech_with_torchaudio",
5352
"intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.
54-
"intermediate_source/torch_export_tutorial" # reenable after 2940 is fixed.
5553
]
5654

5755
def tutorial_source_dirs() -> List[Path]:
38.1 KB
Loading
Binary file not shown.
-7.37 KB
Binary file not shown.
-15.9 KB
Binary file not shown.
-8.41 KB
Binary file not shown.
-381 Bytes
Loading
Loading
Binary file not shown.

advanced_source/coding_ddpg.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -893,7 +893,7 @@ def make_recorder(actor_model_explore, transform_state_dict, record_interval):
893893
record_frames=1000,
894894
policy_exploration=actor_model_explore,
895895
environment=environment,
896-
exploration_type=ExplorationType.MEAN,
896+
exploration_type=ExplorationType.DETERMINISTIC,
897897
record_interval=record_interval,
898898
)
899899
return recorder_obj

advanced_source/pendulum.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,7 @@ def __init__(self, td_params=None, seed=None, device="cpu"):
604604
env,
605605
# ``Unsqueeze`` the observations that we will concatenate
606606
UnsqueezeTransform(
607-
unsqueeze_dim=-1,
607+
dim=-1,
608608
in_keys=["th", "thdot"],
609609
in_keys_inv=["th", "thdot"],
610610
),

beginner_source/basics/buildmodel_tutorial.py

+4-11
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,10 @@
3232
#############################################
3333
# Get Device for Training
3434
# -----------------------
35-
# We want to be able to train our model on a hardware accelerator like the GPU or MPS,
36-
# if available. Let's check to see if `torch.cuda <https://pytorch.org/docs/stable/notes/cuda.html>`_
37-
# or `torch.backends.mps <https://pytorch.org/docs/stable/notes/mps.html>`_ are available, otherwise we use the CPU.
38-
39-
device = (
40-
"cuda"
41-
if torch.cuda.is_available()
42-
else "mps"
43-
if torch.backends.mps.is_available()
44-
else "cpu"
45-
)
35+
# We want to be able to train our model on an `accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
36+
# such as CUDA, MPS, MTIA, or XPU. If the current accelerator is available, we will use it. Otherwise, we use the CPU.
37+
38+
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
4639
print(f"Using {device} device")
4740

4841
##############################################

beginner_source/basics/quickstart_tutorial.py

+4-10
Original file line numberDiff line numberDiff line change
@@ -84,16 +84,10 @@
8484
# To define a neural network in PyTorch, we create a class that inherits
8585
# from `nn.Module <https://pytorch.org/docs/stable/generated/torch.nn.Module.html>`_. We define the layers of the network
8686
# in the ``__init__`` function and specify how data will pass through the network in the ``forward`` function. To accelerate
87-
# operations in the neural network, we move it to the GPU or MPS if available.
88-
89-
# Get cpu, gpu or mps device for training.
90-
device = (
91-
"cuda"
92-
if torch.cuda.is_available()
93-
else "mps"
94-
if torch.backends.mps.is_available()
95-
else "cpu"
96-
)
87+
# operations in the neural network, we move it to the `accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
88+
# such as CUDA, MPS, MTIA, or XPU. If the current accelerator is available, we will use it. Otherwise, we use the CPU.
89+
90+
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
9791
print(f"Using {device} device")
9892

9993
# Define model

beginner_source/basics/tensorqs_tutorial.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -99,20 +99,20 @@
9999
# Operations on Tensors
100100
# ~~~~~~~~~~~~~~~~~~~~~~~
101101
#
102-
# Over 100 tensor operations, including arithmetic, linear algebra, matrix manipulation (transposing,
102+
# Over 1200 tensor operations, including arithmetic, linear algebra, matrix manipulation (transposing,
103103
# indexing, slicing), sampling and more are
104104
# comprehensively described `here <https://pytorch.org/docs/stable/torch.html>`__.
105105
#
106-
# Each of these operations can be run on the GPU (at typically higher speeds than on a
107-
# CPU). If you’re using Colab, allocate a GPU by going to Runtime > Change runtime type > GPU.
106+
# Each of these operations can be run on the CPU and `Accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
107+
# such as CUDA, MPS, MTIA, or XPU. If you’re using Colab, allocate an accelerator by going to Runtime > Change runtime type > GPU.
108108
#
109-
# By default, tensors are created on the CPU. We need to explicitly move tensors to the GPU using
110-
# ``.to`` method (after checking for GPU availability). Keep in mind that copying large tensors
109+
# By default, tensors are created on the CPU. We need to explicitly move tensors to the accelerator using
110+
# ``.to`` method (after checking for accelerator availability). Keep in mind that copying large tensors
111111
# across devices can be expensive in terms of time and memory!
112112

113-
# We move our tensor to the GPU if available
114-
if torch.cuda.is_available():
115-
tensor = tensor.to("cuda")
113+
# We move our tensor to the current accelerator if available
114+
if torch.accelerator.is_available():
115+
tensor = tensor.to(torch.accelerator.current_accelerator())
116116

117117

118118
######################################################################

beginner_source/chatbot_tutorial.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,10 @@
108108
import json
109109

110110

111-
USE_CUDA = torch.cuda.is_available()
112-
device = torch.device("cuda" if USE_CUDA else "cpu")
111+
# If the current `accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__ is available,
112+
# we will use it. Otherwise, we use the CPU.
113+
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
114+
print(f"Using {device} device")
113115

114116

115117
######################################################################
@@ -1318,16 +1320,16 @@ def evaluateInput(encoder, decoder, searcher, voc):
13181320
encoder_optimizer.load_state_dict(encoder_optimizer_sd)
13191321
decoder_optimizer.load_state_dict(decoder_optimizer_sd)
13201322

1321-
# If you have CUDA, configure CUDA to call
1323+
# If you have an accelerator, configure it to call
13221324
for state in encoder_optimizer.state.values():
13231325
for k, v in state.items():
13241326
if isinstance(v, torch.Tensor):
1325-
state[k] = v.cuda()
1327+
state[k] = v.to(device)
13261328

13271329
for state in decoder_optimizer.state.values():
13281330
for k, v in state.items():
13291331
if isinstance(v, torch.Tensor):
1330-
state[k] = v.cuda()
1332+
state[k] = v.to(device)
13311333

13321334
# Run training iterations
13331335
print("Starting Training!")

beginner_source/examples_autograd/polynomial_autograd.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,12 @@
1717
import torch
1818
import math
1919

20+
# We want to be able to train our model on an `accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
21+
# such as CUDA, MPS, MTIA, or XPU. If the current accelerator is available, we will use it. Otherwise, we use the CPU.
22+
2023
dtype = torch.float
21-
device = "cuda" if torch.cuda.is_available() else "cpu"
24+
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
25+
print(f"Using {device} device")
2226
torch.set_default_device(device)
2327

2428
# Create Tensors to hold input and outputs.

beginner_source/fgsm_tutorial.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -125,14 +125,9 @@
125125
# `pytorch/examples/mnist <https://github.com/pytorch/examples/tree/master/mnist>`__.
126126
# For simplicity, download the pretrained model `here <https://drive.google.com/file/d/1HJV2nUHJqclXQ8flKvcWmjZ-OU5DGatl/view?usp=drive_link>`__.
127127
#
128-
# - ``use_cuda`` - boolean flag to use CUDA if desired and available.
129-
# Note, a GPU with CUDA is not critical for this tutorial as a CPU will
130-
# not take much time.
131-
#
132128

133129
epsilons = [0, .05, .1, .15, .2, .25, .3]
134130
pretrained_model = "data/lenet_mnist_model.pth"
135-
use_cuda=True
136131
# Set random seed for reproducibility
137132
torch.manual_seed(42)
138133

@@ -184,9 +179,10 @@ def forward(self, x):
184179
])),
185180
batch_size=1, shuffle=True)
186181

187-
# Define what device we are using
188-
print("CUDA Available: ",torch.cuda.is_available())
189-
device = torch.device("cuda" if use_cuda and torch.cuda.is_available() else "cpu")
182+
# We want to be able to train our model on an `accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
183+
# such as CUDA, MPS, MTIA, or XPU. If the current accelerator is available, we will use it. Otherwise, we use the CPU.
184+
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
185+
print(f"Using {device} device")
190186

191187
# Initialize the network
192188
model = Net().to(device)

beginner_source/introyt/tensors_deeper_tutorial.py

+22-26
Original file line numberDiff line numberDiff line change
@@ -632,34 +632,33 @@
632632
# does this *without* changing ``a`` - you can see that when we print
633633
# ``a`` again at the end, it retains its ``requires_grad=True`` property.
634634
#
635-
# Moving to GPU
635+
# Moving to `Accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
636636
# -------------
637637
#
638-
# One of the major advantages of PyTorch is its robust acceleration on
639-
# CUDA-compatible Nvidia GPUs. (“CUDA” stands for *Compute Unified Device
640-
# Architecture*, which is Nvidia’s platform for parallel computing.) So
641-
# far, everything we’ve done has been on CPU. How do we move to the faster
638+
# One of the major advantages of PyTorch is its robust acceleration on an
639+
# `accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
640+
# such as CUDA, MPS, MTIA, or XPU.
641+
# So far, everything we’ve done has been on CPU. How do we move to the faster
642642
# hardware?
643643
#
644-
# First, we should check whether a GPU is available, with the
644+
# First, we should check whether an accelerator is available, with the
645645
# ``is_available()`` method.
646646
#
647647
# .. note::
648-
# If you do not have a CUDA-compatible GPU and CUDA drivers
649-
# installed, the executable cells in this section will not execute any
650-
# GPU-related code.
648+
# If you do not have an accelerator, the executable cells in this section will not execute any
649+
# accelerator-related code.
651650
#
652651

653-
if torch.cuda.is_available():
654-
print('We have a GPU!')
652+
if torch.accelerator.is_available():
653+
print('We have an accelerator!')
655654
else:
656655
print('Sorry, CPU only.')
657656

658657

659658
##########################################################################
660-
# Once we’ve determined that one or more GPUs is available, we need to put
661-
# our data someplace where the GPU can see it. Your CPU does computation
662-
# on data in your computer’s RAM. Your GPU has dedicated memory attached
659+
# Once we’ve determined that one or more accelerators is available, we need to put
660+
# our data someplace where the accelerator can see it. Your CPU does computation
661+
# on data in your computer’s RAM. Your accelerator has dedicated memory attached
663662
# to it. Whenever you want to perform a computation on a device, you must
664663
# move *all* the data needed for that computation to memory accessible by
665664
# that device. (Colloquially, “moving the data to memory accessible by the
@@ -669,34 +668,31 @@
669668
# may do it at creation time:
670669
#
671670

672-
if torch.cuda.is_available():
673-
gpu_rand = torch.rand(2, 2, device='cuda')
671+
if torch.accelerator.is_available():
672+
gpu_rand = torch.rand(2, 2, device=torch.accelerator.current_accelerator())
674673
print(gpu_rand)
675674
else:
676675
print('Sorry, CPU only.')
677676

678677

679678
##########################################################################
680679
# By default, new tensors are created on the CPU, so we have to specify
681-
# when we want to create our tensor on the GPU with the optional
680+
# when we want to create our tensor on the accelerator with the optional
682681
# ``device`` argument. You can see when we print the new tensor, PyTorch
683682
# informs us which device it’s on (if it’s not on CPU).
684683
#
685-
# You can query the number of GPUs with ``torch.cuda.device_count()``. If
686-
# you have more than one GPU, you can specify them by index:
684+
# You can query the number of accelerators with ``torch.accelerator.device_count()``. If
685+
# you have more than one accelerator, you can specify them by index, take CUDA for example:
687686
# ``device='cuda:0'``, ``device='cuda:1'``, etc.
688687
#
689688
# As a coding practice, specifying our devices everywhere with string
690689
# constants is pretty fragile. In an ideal world, your code would perform
691-
# robustly whether you’re on CPU or GPU hardware. You can do this by
690+
# robustly whether you’re on CPU or accelerator hardware. You can do this by
692691
# creating a device handle that can be passed to your tensors instead of a
693692
# string:
694693
#
695694

696-
if torch.cuda.is_available():
697-
my_device = torch.device('cuda')
698-
else:
699-
my_device = torch.device('cpu')
695+
my_device = torch.accelerator.current_accelerator() if torch.accelerator.is_available() else torch.device('cpu')
700696
print('Device: {}'.format(my_device))
701697

702698
x = torch.rand(2, 2, device=my_device)
@@ -718,12 +714,12 @@
718714
# It is important to know that in order to do computation involving two or
719715
# more tensors, *all of the tensors must be on the same device*. The
720716
# following code will throw a runtime error, regardless of whether you
721-
# have a GPU device available:
717+
# have an accelerator device available, take CUDA for example:
722718
#
723719
# .. code-block:: python
724720
#
725721
# x = torch.rand(2, 2)
726-
# y = torch.rand(2, 2, device='gpu')
722+
# y = torch.rand(2, 2, device='cuda')
727723
# z = x + y # exception will be thrown
728724
#
729725

beginner_source/knowledge_distillation_tutorial.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,10 @@
3737
import torchvision.transforms as transforms
3838
import torchvision.datasets as datasets
3939

40-
# Check if GPU is available, and if not, use the CPU
41-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
40+
# Check if the current `accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
41+
# is available, and if not, use the CPU
42+
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
43+
print(f"Using {device} device")
4244

4345
######################################################################
4446
# Loading CIFAR-10

beginner_source/nn_tutorial.py

+11-14
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@
132132
# we'll write `log_softmax` and use it. Remember: although PyTorch
133133
# provides lots of prewritten loss functions, activation functions, and
134134
# so forth, you can easily write your own using plain python. PyTorch will
135-
# even create fast GPU or vectorized CPU code for your function
135+
# even create fast accelerator or vectorized CPU code for your function
136136
# automatically.
137137

138138
def log_softmax(x):
@@ -827,38 +827,35 @@ def __iter__(self):
827827
fit(epochs, model, loss_func, opt, train_dl, valid_dl)
828828

829829
###############################################################################
830-
# Using your GPU
830+
# Using your `Accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
831831
# ---------------
832832
#
833-
# If you're lucky enough to have access to a CUDA-capable GPU (you can
833+
# If you're lucky enough to have access to an accelerator such as CUDA (you can
834834
# rent one for about $0.50/hour from most cloud providers) you can
835-
# use it to speed up your code. First check that your GPU is working in
835+
# use it to speed up your code. First check that your accelerator is working in
836836
# Pytorch:
837837

838-
print(torch.cuda.is_available())
838+
# If the current accelerator is available, we will use it. Otherwise, we use the CPU.
839+
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
840+
print(f"Using {device} device")
839841

840-
###############################################################################
841-
# And then create a device object for it:
842-
843-
dev = torch.device(
844-
"cuda") if torch.cuda.is_available() else torch.device("cpu")
845842

846843
###############################################################################
847-
# Let's update ``preprocess`` to move batches to the GPU:
844+
# Let's update ``preprocess`` to move batches to the accelerator:
848845

849846

850847
def preprocess(x, y):
851-
return x.view(-1, 1, 28, 28).to(dev), y.to(dev)
848+
return x.view(-1, 1, 28, 28).to(device), y.to(device)
852849

853850

854851
train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
855852
train_dl = WrappedDataLoader(train_dl, preprocess)
856853
valid_dl = WrappedDataLoader(valid_dl, preprocess)
857854

858855
###############################################################################
859-
# Finally, we can move our model to the GPU.
856+
# Finally, we can move our model to the accelerator.
860857

861-
model.to(dev)
858+
model.to(device)
862859
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
863860

864861
###############################################################################

0 commit comments

Comments
 (0)