PyG的安装与基本使用方法

1 新建conda环境

conda create -n pygod python=3.8
conda activate pygod

2 安装Pytorch并测试torch、cuda版本，以及cuda是否可用

我安装的v1.12.0版本，cuda对应的选的11.6

pip install torch==1.12.0+cu116 torchvision==0.13.0+cu116 torchaudio==0.12.0 -f https://download.pytorch.org/whl/torch_stable.html

测试：

python -c "import torch; print(torch.__version__)"
python -c "import torch; print(torch.version.cuda)"
python -c "import torch; print(torch.cuda.is_avaiable())"

3 安装pytorch-geometirc

一定要对应好pytorch和cuda版本号,官网直接一条命令搞定。

pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.12.0+cu116.html

4 测试pytorch-geometirc是否安装成功

import torch
from torch_geometric.data import Data
edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)
data = Data(x=x, edge_index=edge_index)
print(data)

Data(x=[3, 1], edge_index=[2, 4])

5 pyg基本使用方法

参考官方教程

import os
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch_geometric.datasets import Planetoid
import torch_geometric.nn as pyg_nn


# load dataset
def get_data(folder="node_classify/cora", data_name="cora"):
    dataset = Planetoid(root=folder, name=data_name)
    return dataset


# create the graph cnn model
class GraphCNN(nn.Module):
    def __init__(self, in_c, hid_c, out_c):
        super(GraphCNN, self).__init__()
        self.conv1 = pyg_nn.GCNConv(in_channels=in_c, out_channels=hid_c)
        self.conv2 = pyg_nn.GCNConv(in_channels=hid_c, out_channels=out_c)

    def forward(self, data):
        # data.x data.edge_index
        x = data.x  # [N, C]
        edge_index = data.edge_index  # [2 ,E]
        hid = self.conv1(x=x, edge_index=edge_index)  # [N, D]
        hid = F.relu(hid)

        out = self.conv2(x=hid, edge_index=edge_index)  # [N, out_c]

        out = F.log_softmax(out, dim=1)  # [N, out_c]

        return out


def main():
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # dicide wichi GPU to use
    cora_dataset = get_data()


    my_net = GraphCNN(in_c=cora_dataset.num_node_features,hid_c=12,out_c=cora_dataset.num_classes)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    my_net = my_net.to(device)
    data = cora_dataset[0].to(device)

    optimizer = torch.optim.Adam(my_net.parameters(), lr=1e-3)

    # model train
    my_net.train()
    for epoch in range(200):
        optimizer.zero_grad()

        output = my_net(data)
        loss = F.nll_loss(output[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

        print("Epoch", epoch + 1, "Loss", loss.item())

    # model test
    my_net.eval()
    _, prediction = my_net(data).max(dim=1)

    target = data.y

    test_correct = prediction[data.test_mask].eq(target[data.test_mask]).sum().item()
    test_number = data.test_mask.sum().item()

    print("Accuracy of Test Samples: ", test_correct / test_number)


if __name__ == '__main__':
    main()

第一次运行会自动下载cora数据集

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!
Output exceeds the size limit. Open the full output data in a text editor
Epoch 1 Loss 1.9414448738098145
Epoch 2 Loss 1.9322998523712158
Epoch 3 Loss 1.9232864379882812
Epoch 4 Loss 1.9142569303512573
Epoch 5 Loss 1.9053010940551758
Epoch 6 Loss 1.8961549997329712
Epoch 7 Loss 1.8865703344345093
Epoch 8 Loss 1.8765546083450317
Epoch 9 Loss 1.8660269975662231
Epoch 10 Loss 1.8549989461898804
Epoch 11 Loss 1.8434016704559326
Epoch 12 Loss 1.8311491012573242
Epoch 13 Loss 1.8183444738388062
Epoch 14 Loss 1.804998755455017
Epoch 15 Loss 1.7911924123764038
Epoch 16 Loss 1.7771121263504028
Epoch 17 Loss 1.7628358602523804
Epoch 18 Loss 1.748426079750061
Epoch 19 Loss 1.7338117361068726
Epoch 20 Loss 1.7189439535140991
Epoch 21 Loss 1.7039512395858765
Epoch 22 Loss 1.6888483762741089
Epoch 23 Loss 1.6736129522323608
Epoch 24 Loss 1.6582697629928589
Epoch 25 Loss 1.6427830457687378
...
Epoch 198 Loss 0.1903388947248459
Epoch 199 Loss 0.18813340365886688
Epoch 200 Loss 0.1859591007232666
Accuracy of Test Samples:  0.756

6 创建自己的数据集

参考官方教程

import torch
import numpy as np
import torch_geometric
from torch_geometric.data import InMemoryDataset
from torch_geometric.data import Data
from torch_geometric.data import DataLoader


# create a toy dataset
def toy_dataset(num_nodes, num_node_features, num_edges):
    x = np.random.randn(num_nodes, num_node_features)  # node features
    edge_index = np.random.randint(low=0, high=num_nodes-1, size=[2, num_edges], dtype=np.long)  # [2, num_edges]

    data = Data(x=torch.from_numpy(x), edge_index=torch.from_numpy(edge_index))

    return data


# In Memory Dataset
class PyGToyDataset(InMemoryDataset):
    def __init__(self, save_root, transform=None, pre_transform=None):
        super(PyGToyDataset, self).__init__(save_root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_file_names[0])

    @property
    def raw_file_names(self):
        return ["origin_dataset"]

    @property
    def processed_file_names(self):
        return ["toy_dataset.pt"]

    def download(self):
        pass

    def process(self):
        # 100 samples, each sample is a graph with 32 nodes and 42 edges, each node has a feature dimension of 3.
        data_list = [toy_dataset(num_nodes=32, num_node_features=3, num_edges=42) for _ in range(100)]
        data_save, data_slices = self.collate(data_list)
        torch.save((data_save, data_slices), self.processed_file_names[0]) #(data_save,data_slices) --> ['toy_dataset.py']


if __name__ == '__main__':
    #### 1. test dataset ###
    # toy_sample = toy_dataset(num_nodes=32, num_node_features=3, num_edges=42)
    # print(toy_sample)
    ### 2. test PyGToyDataset Class ###
    toy_data = PyGToyDataset(save_root="toy")  # 100 samples, each sample is a graph
    print(toy_data[0])
    ### 3. test batch ###
    data_loader = DataLoader(toy_data, batch_size=5, shuffle=True)
    for batch in data_loader:
        print(batch)

7 各种卷积的使用方法

参考链接

其他

来自参考知乎文章https://zhuanlan.zhihu.com/p/381204915

posted @ 2022-09-30 19:48 Sharycxc 阅读(1108) 评论(0) 收藏举报

刷新页面返回顶部

sharycxc

PyG的安装与基本使用方法

1 新建conda环境

2 安装Pytorch并测试torch、cuda版本，以及cuda是否可用

3 安装pytorch-geometirc

4 测试pytorch-geometirc是否安装成功

5 pyg基本使用方法

6 创建自己的数据集

7 各种卷积的使用方法

其他

公告