From https://pytorch.org/
to install pytorch on MacOS the following is stated :
conda install pytorch torchvision -c pytorch
# MacOS Binaries dont support CUDA, install from source if CUDA is needed
Why would want to install pytorch without cuda enabled ?
Reason I ask is I receive error :
--------------------------------------------------------------------------- AssertionError Traceback (most recent call
last) in ()
78 # predicted = outputs.data.max(1)[1]
79
---> 80 output = model(torch.tensor([[1,1]]).float().cuda())
81 predicted = output.data.max(1)[1]
82
~/anaconda3/lib/python3.6/site-packages/torch/cuda/init.py in
_lazy_init()
159 raise RuntimeError(
160 "Cannot re-initialize CUDA in forked subprocess. " + msg)
--> 161 _check_driver()
162 torch._C._cuda_init()
163 _cudart = _load_cudart()
~/anaconda3/lib/python3.6/site-packages/torch/cuda/init.py in
_check_driver()
73 def _check_driver():
74 if not hasattr(torch._C, '_cuda_isDriverSufficient'):
---> 75 raise AssertionError("Torch not compiled with CUDA enabled")
76 if not torch._C._cuda_isDriverSufficient():
77 if torch._C._cuda_getDriverVersion() == 0:
AssertionError: Torch not compiled with CUDA enabled
when attempting to execute code :
x = torch.tensor([[0,0] , [0,1] , [1,0]]).float()
print(x)
y = torch.tensor([0,1,1]).long()
print(y)
my_train = data_utils.TensorDataset(x, y)
my_train_loader = data_utils.DataLoader(my_train, batch_size=2, shuffle=True)
# Device configuration
device = 'cpu'
print(device)
# Hyper-parameters
input_size = 2
hidden_size = 100
num_classes = 2
learning_rate = 0.001
train_dataset = my_train
train_loader = my_train_loader
pred = []
for i in range(0 , model_iters) :
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
model = NeuralNet(input_size, hidden_size, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Move tensors to the configured device
images = images.reshape(-1, 2).to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
{:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
output = model(torch.tensor([[1,1]]).float().cuda())
To fix this error I need to install pytorch from source with cuda already installed ?
To summarize and expand on the comments:
CUDA is an Nvidia proprietary (apparently unlicensed) technology to allow general computing on GPU processors.
Very few Macbook Pro's have an Nvidia CUDA-capable GPU. Take a look here to see whether your MBP has an Nvidia GPU. Then, look at the table here to see if that GPU supports CUDA
Same story for iMac, iMac Pro and Mac Pro.
Therefore, PyTorch is installed without CUDA support by default on MacOS
This PyTorch github issue mentions that very few Macs have Nvidia processors: https://github.com/pytorch/pytorch/issues/30664
IF your Mac does have a CUDA-capable GPU, then to use CUDA commands on MacOS you'll need to recompile pytorch from source with correct command line options.
Related
Python version: 3.10.6
pytorch version: 1.12.1 (nightly build)
I'm trying to run a LSTM network on a macbook pro M1. The code runs perfectly on CPU but when I change my device to GPU (mps), loss.backward() throws the following error:
RuntimeError: Expected a proper Tensor but got None (or an undefined Tensor in C++) for argument #0 'grad_y'
Tbh I don't know how to debug this and what the problem might be. Probably a grad issue since error is for argument grad_y. But why does it work on the CPU?
My code:
class ShallowRegressionLSTM(nn.Module):
def __init__(self, num_sensors, hidden_units, num_layers=1, out_features=1):
super(ShallowRegressionLSTM, self).__init__()
self.num_sensors = num_sensors # this is the number of features
self.hidden_units = hidden_units
self.num_layers = num_layers
self.out_features = out_features
self.lstm = nn.LSTM(
input_size=num_sensors,
hidden_size=hidden_units,
batch_first=True,
num_layers=self.num_layers
)
self.linear = nn.Linear(in_features=self.hidden_units, out_features=self.out_features)
def forward(self, x):
batch_size = x.shape[0]
h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units, requires_grad=True, device=device)
c0 = torch.zeros(self.num_layers, batch_size, self.hidden_units, requires_grad=True, device=device)
_, (hn, _) = self.lstm(x, (h0, c0)) #Outputs: output, (h_n, c_n)
out = self.linear(hn[-1]).flatten() # First dim of Hn is number layers
return out
def train_model(data_loader, model, loss_function, optimizer):
num_batches = len(data_loader)
total_loss = 0
model.train()
for X, y in data_loader:
X = X.to(device)
y = y.to(device)
output = model(X)
loss = loss_function(output, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
avg_loss = total_loss / num_batches
print(f"Train loss: {avg_loss}")
train_loss.append(avg_loss)
tried loss on gpu too.
model = ShallowRegressionLSTM(num_sensors=len(features), hidden_units=num_hidden_units, num_layers=num_layers).to(device)
#loss_function = nn.MSELoss().to(device)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(epochs):
train_model(train_loader, model, loss_function, optimizer=optimizer)
Only post around this issue i've found:
https://discuss.pytorch.org/t/loss-backward-error-when-using-m1-gpu-mps-device/152306
I've tried the following:
loss on gpu and cpu
retain_grad
tried to push output and label to cpu before loss
to fix: #0 'grad_y'.
The machine I am using for training has 4 GPUs. I am "moving" classifier, loss function and tensors to GPU. But when I run nvidia-smi on the machine while training is ongoing, I see GPU utilization is very low (3%) on one core and 0 on other cores.
Questions I have are
Is there an easier approach to ask Pytorch to use GPU and as many cores as available without me having to do so many .to(device) all over the place
Is there something other than .to(device) that is needed to use GPU?
Is there a way to see if training is happening on CPU vs GPU or is running nvidia-smi on the machine and looking at GPU utilization the only way?
How do I interpret GPU utilization of 3% in nvidia-smi. Does it mean CPU is being used in many places? If yes, is there a way to debug what is making the training use CPU?
Will setting num_workers to number of available cores in DataLoader class be enough to use multiple GPU cores? Is there any generic way to automatically learn number of GPU cores available?
Code used to train
random.seed(1234)
np.random.seed(1234)
torch.manual_seed(1234)
torch.cuda.manual_seed(1234)
torch.backends.cudnn.deterministic = True
start_time = time.time()
clf = MLP(len(X_training[0]), hidden_size=[100, 100, 100, 100, 100])
#Move to GPU if available
use_gpu = torch.cuda.is_available()
device = torch.device('cuda' if use_gpu else 'cpu')
# Define the loss function and optimizer
optimizer = torch.optim.Adam(clf.parameters(), lr=8e-4)
clf = clf.to(device)
loss_function = nn.BCELoss()
loss_function = loss_function.to(device)
# Run the training loop
# per_epoch_precision = []
# per_epoch_recall = []
for epoch in range(0, 150):
# Set current loss value
current_loss = 0.0
dataset = MyDataset(X_training, y_training, use_gpu)
kwargs = {'num_workers': 1, 'pin_memory': True} if use_gpu else {}
trainloader = torch.utils.data.DataLoader(dataset, batch_size=10000, shuffle=True, **kwargs)
# Iterate over the DataLoader for training data
clf.train() # set to train mode
for i, data in enumerate(trainloader):
# Get inputs
inputs, targets = data
inputs = inputs.to(device)
targets = targets.to(device)
# Zero the gradients
optimizer.zero_grad()
# Perform forward pass
outputs = clf(inputs)
# Compute loss
targets = targets.float().unsqueeze(1)
loss = loss_function(outputs, targets)
# Perform backward pass
loss.backward()
# Perform optimization
optimizer.step()
# Print statistics
current_loss += loss.item()
if i % 20000 == 19999:
print("Loss after mini-batch %5d: %.3f" % (i + 1, current_loss / 500))
current_loss = 0.0
# Process is complete.
print("Training process has finished.")
print(f"Train time is {time.time() - start_time}")
class MyDataset(Dataset):
def __init__(self, x, y, use_gpu=False):
x = x.astype(np.float32)
self.x_train = torch.from_numpy(x)
self.y_train = torch.from_numpy(y.values)
if use_gpu:
device = torch.device("cuda")
self.x_train.to(device)
self.y_train.to(device)
# self.y_train = torch.LongTensor(y.values, dtype=torch.int)
def __len__(self):
return len(self.y_train)
def __getitem__(self,idx):
return self.x_train[idx],self.y_train[idx]
class MLP(nn.Module):
def __init__(self, input_size, hidden_size, act_fn=nn.ReLU(), use_dropout=False, drop_rate=0.25):
super(MLP, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.layers = nn.Sequential()
if use_dropout:
self.layers.append(nn.Dropout(p=drop_rate))
self.layers.append(nn.Linear(self.input_size, self.hidden_size[0]))
self.layers.append(act_fn)
for i in range(1, len(hidden_size)):
if use_dropout:
self.layers.append(nn.Dropout(p=drop_rate))
self.layers.append(nn.Linear(self.hidden_size[i - 1], self.hidden_size[i]))
self.layers.append(act_fn)
if use_dropout:
self.layers.append(nn.Dropout(p=drop_rate))
self.layers.append(nn.Linear(self.hidden_size[-1], 1))
self.layers.append(nn.Sigmoid())
def forward(self, x):
return self.layers(x)
I have followed this link and this one too. But the error persists.
I'm trying a simple feedforward NN using pytorch (from a tutorial).
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transformers
# hyperparameters
input_size = 784
hidden_size = 100
num_classes = 10
num_epochs = 2
learning_rate = 0.001
batch_size = 100
# activating device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Loading data
train_dataset = torchvision.datasets.MNIST('.', train=True, transform=transformers.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST('.', train=False, transform=transformers.ToTensor(), download=False)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
class FeedforwardNN(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(FeedforwardNN, self).__init__()
self.l1 = nn.Linear(input_size, hidden_size)
self.r1 = nn.ReLU()
self.l2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
out = self.l1(x)
out = self.r1(out)
out = self.l2(out)
return out
model = FeedforwardNN(input_size, hidden_size, num_classes)
model.to(device) # Added to solve the mentioned error, but to no avail.
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Training Loop
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.reshape(-1, 28*28).to(device)
y_pred = model(images)
loss = criterion(y_pred, labels)
loss.backward()
optimizer.zero_grad()
I get the following error
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument target in method wrapper_nll_loss_forward)
I tried adding to(device) on criterion and optimizer. But that doesn't resolve the error.
The tutorial that uses the same code seems to run properly.
At the time of this writing cuda is inactive. Although that shouldn't matter as we had given an if condition, that will then choose cpu
>>> torch.cuda.is_available()
False
Where should be the error then ?
You forgot to move the labels to the device in the loop,
I am trying to code a GNN example problem as shown in the given link: https://towardsdatascience.com/hands-on-graph-neural-networks-with-pytorch-pytorch-geometric-359487e221a8
I am using a Macbook Pro 2016 edition, without a Nvidia graphic card!
The example problem is implementing 'CUDA' toolkit. Can I somehow modify the code and run in on my current laptop? I have made the dataset sufficiently small, such that it does not requires high computation and can run on my PC!
The part of the code which is giving an error is as follows!
def train():
model.train()
loss_all = 0
for data in train_loader:
data = data.to(device)
optimizer.zero_grad()
output = model(data)
label = data.y.to(device)
loss = crit(output, label)
loss.backward()
loss_all += data.num_graphs * loss.item()
optimizer.step()
return loss_all / len(train_dataset)
device = torch.device('cuda')
model = Net().to(device) # Net = A class inherited from torch.nn.Module
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
crit = torch.nn.BCELoss()
train_loader = DataLoader(train_dataset, batch_size=batch_size)
for epoch in range(num_epochs):
train()
The error is as follows
AssertionError: Torch not compiled with CUDA enabled
You are using:
device = torch.device('cuda')
If you like to use cpu please change to:
device = torch.device('cpu')
I've cloned my GitHub repo into google colab and trying to load data using PyTorch's DataLoader.
global gpu, device
if torch.cuda.is_available():
gpu = True
device = 'cuda:0'
torch.set_default_tensor_type('torch.cuda.FloatTensor')
print("Using GPU")
else:
gpu = False
device = 'cpu'
print("Using CPU")
data_transforms = transforms.Compose([
#transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize([0.3112, 0.2636, 0.2047], [0.2419, 0.1972, 0.1554])
])
train_path = '/content/convLSTM/code/data/train/'
val_path = '/content/convLSTM/code/data/val/'
test_path = '/content/convLSTM/code/data/test/'
train_data = datasets.ImageFolder(root=train_path, transform=data_transforms)
val_data = datasets.ImageFolder(root=val_path, transform=data_transforms)
test_data = datasets.ImageFolder(root=test_path, transform=data_transforms)
train_loader = torch.utils.data.DataLoader(
train_data,
batch_size=18,
num_workers=4,
shuffle=False,
pin_memory=True
)
val_loader = torch.utils.data.DataLoader(
val_data,
batch_size=18,
shuffle=False,
num_workers=4,
pin_memory=True
)
test_loader = torch.utils.data.DataLoader(
test_data,
batch_size=18,
shuffle=False,
num_workers=4,
pin_memory=True
)
for batch_idx, (data, target) in enumerate(train_loader):
print(batch_idx)
if batch_idx==3:
break
I'm getting the following error when I run the last for loop:
RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
I tried num_workers = 1 instead of 4 but the error persists. I'm not using any multiprocessing.
I also tried without setting torch.set_default_tensor_type('torch.cuda.FloatTensor') but the error persists.
Python : 3.6.8 | PyTorch : 1.3.1
What seems to be the problem?
Not sure if you fixed it already but just in case someone else reads this, using n number of works activates pytorch multi processing. To disable it you need to have the default number of workers which is 0, not 1.
Try setting num_workers to 0 or using the Torch Multiprocessing submodule.
Just try putting num_workers=0
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0)
This solved the problem over Kaggle notebook