win10从零安装配置pytorch全过程图文详解

  • Post category:Python

下面是“win10从零安装配置pytorch全过程图文详解”的攻略:

第一步:安装Anaconda

首先需要安装一个Python的发行版,这里我推荐使用Anaconda。去官网下载对应版本的Anaconda,安装时选择默认配置即可。

第二步:创建虚拟环境

为了避免不同项目之间的依赖冲突,我们需要为每个项目创建一个独立的Python环境。可以通过以下命令创建一个名为myenv的虚拟环境。

conda create -n myenv python=3.7

第三步:激活虚拟环境

虚拟环境创建好后,需要激活才能使用。在命令行中通过以下命令激活。

conda activate myenv

第四步:安装PyTorch

PyTorch是一个常用的深度学习框架,可以通过以下命令安装。

conda install pytorch torchvision torchaudio cpuonly -c pytorch

其中,-c pytorch指定了安装源为pytorch官方源。如果需要安装CUDA版本的PyTorch,则不需要指定cpuonly。

第五步:测试PyTorch

安装好PyTorch后,可以通过以下代码测试是否安装成功。

import torch

x = torch.randn(3, 3)
print(x)

如果输出一个3×3的张量,则表示安装成功。

示例一:使用PyTorch进行图像分类

下面以一个图像分类的任务为例,说明如何使用PyTorch进行深度学习任务。

首先,需要准备一些数据。可以从官方网站上下载CIFAR-10数据集,包含了10类不同的图像。可以使用以下代码下载数据集。

import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

接下来,可以定义一个卷积神经网络模型。

import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()

定义好模型后,可以使用以下代码进行训练。

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(2):  # 多次循环遍历数据集
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 2000 == 1999:    # 每2000个mini-batch打印一次损失
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

训练完成后可以使用以下代码进行测试。

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

示例二:使用PyTorch进行文本分类

下面以一个文本分类的任务为例,说明如何使用PyTorch进行深度学习任务。

首先,需要准备一些数据。可以从官网上下载IMDB电影评论数据集,包含了两类评论,分别为正面和负面。可以使用以下代码下载数据集。

import torchtext
from torchtext.datasets import IMDB
from torchtext.data import Field, LabelField, TabularDataset, BucketIterator

TEXT = Field(sequential=True, lower=True, include_lengths=True, batch_first=True)
LABEL = LabelField(sequential=False)

train_data, test_data = IMDB.splits(TEXT, LABEL)

train_data, valid_data = train_data.split()

TEXT.build_vocab(train_data, min_freq=2)

LABEL.build_vocab(train_data)

train_iter, valid_iter, test_iter = BucketIterator.splits((train_data, valid_data, test_data), batch_size=128, device=torch.device('cpu'))

接下来,可以定义一个循环神经网络模型。

import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, bidirectional, dropout):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=bidirectional, dropout=dropout)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, text, text_lengths):
        embedded = self.embedding(text)
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.cpu(), batch_first=True)
        packed_output, (hidden, cell) = self.rnn(packed_embedded)
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1))
        output = self.fc(hidden)
        return output

定义好模型后,可以使用以下代码进行训练。

import torch.optim as optim

INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 2
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.5

model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, N_LAYERS, BIDIRECTIONAL, DROPOUT)

optimizer = optim.Adam(model.parameters())

criterion = nn.CrossEntropyLoss()

model = model.to(device)
criterion = criterion.to(device)

def binary_accuracy(preds, y):
    rounded_preds = torch.argmax(preds, 1)
    correct = (rounded_preds == y).float()
    acc = correct.sum() / len(correct)
    return acc

def train(model, iterator, optimizer, criterion):
    epoch_loss = 0
    epoch_acc = 0

    model.train()

    for batch in iterator:
        text, text_lengths = batch.text
        optimizer.zero_grad()
        predictions = model(text, text_lengths).squeeze(1)
        loss = criterion(predictions, batch.label)
        acc = binary_accuracy(predictions, batch.label)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

def evaluate(model, iterator, criterion):
    epoch_loss = 0
    epoch_acc = 0

    model.eval()

    with torch.no_grad():
        for batch in iterator:
            text, text_lengths = batch.text
            predictions = model(text, text_lengths).squeeze(1)
            loss = criterion(predictions, batch.label)
            acc = binary_accuracy(predictions, batch.label)
            epoch_loss += loss.item()
            epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

N_EPOCHS = 5

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    train_loss, train_acc = train(model, train_iter, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iter, criterion)

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut2-model.pt')

    print(f'Epoch: {epoch+1:02}')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

model.load_state_dict(torch.load('tut2-model.pt'))

test_loss, test_acc = evaluate(model, test_iter, criterion)

print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

训练完成后可以使用以下代码进行测试。

import spacy

nlp = spacy.load('en')

def predict_sentiment(model, sentence):
    model.eval()

    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
    indexed = [TEXT.vocab.stoi[t] for t in tokenized]
    length = [len(indexed)]

    tensor = torch.LongTensor(indexed).to(device)
    tensor = tensor.unsqueeze(1).T
    length_tensor = torch.LongTensor(length)

    prediction = torch.softmax(model(tensor, length_tensor), dim=1)
    pos_prob = prediction[:, 1].item()
    neg_prob = prediction[:, 0].item()
    return pos_prob, neg_prob

以上就是“win10从零安装配置pytorch全过程图文详解”的攻略。希望对你有所帮助。