CS224W学习笔记
CS224W学习笔记
colab1
torch.ones
全1
torch.zeros
全0
torch.rand
0-1随机
x.shape
shape
x.dtype
数据类型
zeros = torch.zeros(3, 4, dtype=torch.float32)
使用数据类型
zeros = zeros.type(torch.long)
改变数据类型
使用torch进行梯度下降:
def train(emb, loss_fn, sigmoid, train_label, train_edge):
# TODO: Train the embedding layer here. You can also change epochs and
# learning rate. In general, you need to implement:
# (1) Get the embeddings of the nodes in train_edge
# (2) Dot product the embeddings between each node pair
# (3) Feed the dot product result into sigmoid
# (4) Feed the sigmoid output into the loss_fn
# (5) Print both loss and accuracy of each epoch
# (6) Update the embeddings using the loss and optimizer
# (as a sanity check, the loss should decrease during training)
epochs = 500
learning_rate = 0.1
optimizer = SGD(emb.parameters(), lr=learning_rate, momentum=0.9)
for i in range(epochs):
############# Your code here ############
embeddings = emb(train_edge)
print(embeddings.shape)
dot_pro = torch.sum(embeddings[0] * embeddings[1],axis = 1)
#print(dot_pro.shape)
pred = sigmoid(dot_pro)
print(pred.shape)
loss = loss_fn(pred,train_label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
accu = accuracy(pred, train_label)
print(f"Epoch {i+1}/{epochs}, Loss: {loss.item():.4f}, Accuracy: {accu:.4f}")
#########################################
loss_fn = nn.BCELoss()
sigmoid = nn.Sigmoid()
print(pos_edge_index.shape)
# Generate the positive and negative labels
pos_label = torch.ones(pos_edge_index.shape[1], )
neg_label = torch.zeros(neg_edge_index.shape[1], )
# Concat positive and negative labels into one tensor
train_label = torch.cat([pos_label, neg_label], dim=0)
print(train_label.shape)
# Concat positive and negative edges into one tensor
# Since the network is very small, we do not split the edges into val/test sets
train_edge = torch.cat([pos_edge_index, neg_edge_index], dim=1)
print(train_edge.shape)
train(emb, loss_fn, sigmoid, train_label, train_edge)
每个epoch内进行:
optimizer.zero_grad()
loss.backward()
optimizer.step()
colab2
使用torch搭建GCN:
声明一个 class GCN(torch.nn.Module):
,里面运用nn的各种函数、卷积层、Batchnorm对tensor进行操作,最后操作同colab1
optimizer使用torch.optim.Adam
from torch_geometric.datasets import TUDataset
import torch
import os
if 'IS_GRADESCOPE_ENV' not in os.environ:
root = './enzymes'
name = 'ENZYMES'
# The ENZYMES dataset
pyg_dataset= TUDataset(root, name)
# You will find that there are 600 graphs in this dataset
print(pyg_dataset)
def get_num_classes(pyg_dataset):
# TODO: Implement a function that takes a PyG dataset object
# and returns the number of classes for that dataset.
num_classes = 0
############# Your code here ############
## (~1 line of code)
## Note
## 1. Colab autocomplete functionality might be useful.
num_classes = pyg_dataset.num_classes
#########################################
return num_classes
def get_num_features(pyg_dataset):
# TODO: Implement a function that takes a PyG dataset object
# and returns the number of features for that dataset.
num_features = 0
############# Your code here ############
## (~1 line of code)
## Note
## 1. Colab autocomplete functionality might be useful.
num_features = pyg_dataset.num_features
#########################################
return num_features
if 'IS_GRADESCOPE_ENV' not in os.environ:
num_classes = get_num_classes(pyg_dataset)
num_features = get_num_features(pyg_dataset)
print("{} dataset has {} classes".format(name, num_classes))
print("{} dataset has {} features".format(name, num_features))
def get_graph_class(pyg_dataset, idx):
# TODO: Implement a function that takes a PyG dataset object,
# an index of a graph within the dataset, and returns the class/label
# of the graph (as an integer).
label = -1
############# Your code here ############
## (~1 line of code)
label = pyg_dataset.y[idx]
#########################################
return label
# Here pyg_dataset is a dataset for graph classification
if 'IS_GRADESCOPE_ENV' not in os.environ:
graph_0 = pyg_dataset[0]
print(graph_0)
idx = 100
label = get_graph_class(pyg_dataset, idx)
print('Graph with index {} has label {}'.format(idx, label))
def get_graph_num_edges(pyg_dataset, idx):
# TODO: Implement a function that takes a PyG dataset object,
# the index of a graph in the dataset, and returns the number of
# edges in the graph (as an integer). You should not count an edge
# twice if the graph is undirected. For example, in an undirected
# graph G, if two nodes v and u are connected by an edge, this edge
# should only be counted once.
num_edges = 0
############# Your code here ############
## Note:
## 1. You can't return the data.num_edges directly
## 2. We assume the graph is undirected
## 3. Look at the PyG dataset built in functions
## (~4 lines of code)
graph = pyg_dataset[idx]
num_edges = graph.num_edges
#########################################
return num_edges
if 'IS_GRADESCOPE_ENV' not in os.environ:
idx = 200
num_edges = get_graph_num_edges(pyg_dataset, idx)
print('Graph with index {} has {} edges'.format(idx, num_edges))
import torch_geometric.transforms as T
from ogb.nodeproppred import PygNodePropPredDataset
if 'IS_GRADESCOPE_ENV' not in os.environ:
dataset_name = 'ogbn-arxiv'
# Load the dataset and transform it to sparse tensor
dataset = PygNodePropPredDataset(name=dataset_name,
transform=T.ToSparseTensor())
print('The {} dataset has {} graph'.format(dataset_name, len(dataset)))
# Extract the graph
data = dataset[0]
print(data)
def graph_num_features(data):
# TODO: Implement a function that takes a PyG data object,
# and returns the number of features in the graph (as an integer).
num_features = 0
############# Your code here ############
## (~1 line of code)
num_features = data.num_features
#########################################
return num_features
if 'IS_GRADESCOPE_ENV' not in os.environ:
num_features = graph_num_features(data)
print('The graph has {} features'.format(num_features))
import torch
import pandas as pd
import torch.nn.functional as F
print(torch.__version__)
# The PyG built-in GCNConv
from torch_geometric.nn import GCNConv
import torch_geometric.transforms as T
from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
if 'IS_GRADESCOPE_ENV' not in os.environ:
dataset_name = 'ogbn-arxiv'
dataset = PygNodePropPredDataset(name=dataset_name,
transform=T.ToSparseTensor())
data = dataset[0]
# Make the adjacency matrix to symmetric
data.adj_t = data.adj_t.to_symmetric()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# If you use GPU, the device should be cuda
print('Device: {}'.format(device))
data = data.to(device)
split_idx = dataset.get_idx_split()
train_idx = split_idx['train'].to(device)
class GCN(torch.nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, num_layers,
dropout, return_embeds=False):
# TODO: Implement a function that initializes self.convs,
# self.bns, and self.softmax.
super(GCN, self).__init__()
# A list of GCNConv layers
self.convs = torch.nn.ModuleList()
# A list of 1D batch normalization layers
self.bns = torch.nn.ModuleList()
# The log softmax layer
self.softmax = None
############# Your code here ############
## Note:
## 1. You should use torch.nn.ModuleList for self.convs and self.bns
## 2. self.convs has num_layers GCNConv layers
## 3. self.bns has num_layers - 1 BatchNorm1d layers
## 4. You should use torch.nn.LogSoftmax for self.softmax
## 5. The parameters you can set for GCNConv include 'in_channels' and
## 'out_channels'. For more information please refer to the documentation:
## https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html#torch_geometric.nn.conv.GCNConv
## 6. The only parameter you need to set for BatchNorm1d is 'num_features'
## For more information please refer to the documentation:
## https://pytorch.org/docs/stable/generated/torch.nn.BatchNorm1d.html
## (~10 lines of code)
self.num_layers = num_layers
for i in range(num_layers-1):
self.convs.append(GCNConv(input_dim if i == 0 else hidden_dim,hidden_dim))
self.bns.append(torch.nn.BatchNorm1d(hidden_dim))
self.convs.append(GCNConv(hidden_dim,output_dim))
self.softmax = torch.nn.LogSoftmax(dim=1)
self.return_embeds = return_embeds
#########################################
# Probability of an element getting zeroed
self.dropout = dropout
# Skip classification layer and return node embeddings
self.return_embeds = return_embeds
def reset_parameters(self):
for conv in self.convs:
conv.reset_parameters()
for bn in self.bns:
bn.reset_parameters()
def forward(self, x, adj_t):
# TODO: Implement a function that takes the feature tensor x and
# edge_index tensor adj_t and returns the output tensor as
# shown in the figure.
out = None
############# Your code here ############
## Note:
## 1. Construct the network as shown in the figure
## 2. torch.nn.functional.relu and torch.nn.functional.dropout are useful
## For more information please refer to the documentation:
## https://pytorch.org/docs/stable/nn.functional.html
## 3. Don't forget to set F.dropout training to self.training
## 4. If return_embeds is True, then skip the last softmax layer
## (~7 lines of code)
#########################################
for i in range(self.num_layers-1):
x = self.convs[i].forward(x,adj_t)
x = self.bns[i].forward(x)
x = F.relu(x)
x = F.dropout(x,self.dropout,training=self.training)
out = self.convs[-1].forward(x,adj_t)
if not self.return_embeds:
#print(out[0])
out = self.softmax(out)
#print(out[0])
return out
def train(model, data, train_idx, optimizer, loss_fn):
# TODO: Implement a function that trains the model by
# using the given optimizer and loss_fn.
model.train()
loss = 0
############# Your code here ############
## Note:
## 1. Zero grad the optimizer
## 2. Feed the data into the model
## 3. Slice the model output and label by train_idx
## 4. Feed the sliced output and label to loss_fn
## (~4 lines of code)
optimizer.zero_grad()
out = model.forward(data.x,data.adj_t)
out = out[train_idx]#.argmax(dim=-1, keepdim=True)
label = data.y[train_idx].reshape(out.shape[0])
#print(data.y[:10])
loss = loss_fn(out,label)
#########################################
loss.backward()
optimizer.step()
return loss.item()
# Test function here
@torch.no_grad()
def test(model, data, split_idx, evaluator, save_model_results=False):
# TODO: Implement a function that tests the model by
# using the given split_idx and evaluator.
model.eval()
# The output of model on all data
out = None
############# Your code here ############
## (~1 line of code)
## Note:
## 1. No index slicing here
out = model.forward(data.x,data.adj_t)
#########################################
y_pred = out.argmax(dim=-1, keepdim=True)
train_acc = evaluator.eval({
'y_true': data.y[split_idx['train']],
'y_pred': y_pred[split_idx['train']],
})['acc']
valid_acc = evaluator.eval({
'y_true': data.y[split_idx['valid']],
'y_pred': y_pred[split_idx['valid']],
})['acc']
test_acc = evaluator.eval({
'y_true': data.y[split_idx['test']],
'y_pred': y_pred[split_idx['test']],
})['acc']
if save_model_results:
print ("Saving Model Predictions")
data = {}
data['y_pred'] = y_pred.view(-1).cpu().detach().numpy()
df = pd.DataFrame(data=data)
# Save locally as csv
df.to_csv('ogbn-arxiv_node.csv', sep=',', index=False)
return train_acc, valid_acc, test_acc
# Please do not change the args
if 'IS_GRADESCOPE_ENV' not in os.environ:
args = {
'device': device,
'num_layers': 3,
'hidden_dim': 256,
'dropout': 0.5,
'lr': 0.01,
'epochs': 500,
}
args
if 'IS_GRADESCOPE_ENV' not in os.environ:
model = GCN(data.num_features, args['hidden_dim'],
dataset.num_classes, args['num_layers'],
args['dropout']).to(device)
evaluator = Evaluator(name='ogbn-arxiv')
# Please do not change these args
# Training should take <10min using GPU runtime
import copy
if 'IS_GRADESCOPE_ENV' not in os.environ:
# reset the parameters to initial random value
model.reset_parameters()
optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
loss_fn = F.nll_loss
best_model = None
best_valid_acc = 0
for epoch in range(1, 1 + args["epochs"]):
loss = train(model, data, train_idx, optimizer, loss_fn)
result = test(model, data, split_idx, evaluator)
train_acc, valid_acc, test_acc = result
if valid_acc > best_valid_acc:
best_valid_acc = valid_acc
best_model = copy.deepcopy(model)
print(f'Epoch: {epoch:02d}, '
f'Loss: {loss:.4f}, '
f'Train: {100 * train_acc:.2f}%, '
f'Valid: {100 * valid_acc:.2f}% '
f'Test: {100 * test_acc:.2f}%')
下面用global_mean_pool的操作和上面差不多
from ogb.graphproppred import PygGraphPropPredDataset, Evaluator
from torch_geometric.data import DataLoader
from tqdm.notebook import tqdm
import torch
import os
if 'IS_GRADESCOPE_ENV' not in os.environ:
# Load the dataset
dataset = PygGraphPropPredDataset(name='ogbg-molhiv')
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device: {}'.format(device))
split_idx = dataset.get_idx_split()
# Check task type
print('Task type: {}'.format(dataset.task_type))
# Load the dataset splits into corresponding dataloaders
# We will train the graph classification task on a batch of 32 graphs
# Shuffle the order of graphs for training set
if 'IS_GRADESCOPE_ENV' not in os.environ:
train_loader = DataLoader(dataset[split_idx["train"]], batch_size=32, shuffle=True, num_workers=0)
valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=32, shuffle=False, num_workers=0)
test_loader = DataLoader(dataset[split_idx["test"]], batch_size=32, shuffle=False, num_workers=0)
if 'IS_GRADESCOPE_ENV' not in os.environ:
# Please do not change the args
args = {
'device': device,
'num_layers': 5,
'hidden_dim': 256,
'dropout': 0.5,
'lr': 0.001,
'epochs': 30,
}
args
from ogb.graphproppred.mol_encoder import AtomEncoder
from torch_geometric.nn import global_add_pool, global_mean_pool
### GCN to predict graph property
class GCN_Graph(torch.nn.Module):
def __init__(self, hidden_dim, output_dim, num_layers, dropout):
super(GCN_Graph, self).__init__()
# Load encoders for Atoms in molecule graphs
self.node_encoder = AtomEncoder(hidden_dim)
# Node embedding model
# Note that the input_dim and output_dim are set to hidden_dim
self.gnn_node = GCN(hidden_dim, hidden_dim,
hidden_dim, num_layers, dropout, return_embeds=True)
self.pool = None
############# Your code here ############
## Note:
## 1. Initialize self.pool as a global mean pooling layer
## For more information please refer to the documentation:
## https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html#global-pooling-layers
self.pool = global_mean_pool
#########################################
# Output layer
self.linear = torch.nn.Linear(hidden_dim, output_dim)
def reset_parameters(self):
self.gnn_node.reset_parameters()
self.linear.reset_parameters()
def forward(self, batched_data):
# TODO: Implement a function that takes as input a
# mini-batch of graphs (torch_geometric.data.Batch) and
# returns the predicted graph property for each graph.
#
# NOTE: Since we are predicting graph level properties,
# your output will be a tensor with dimension equaling
# the number of graphs in the mini-batch
# Extract important attributes of our mini-batch
x, edge_index, batch = batched_data.x, batched_data.edge_index, batched_data.batch
embed = self.node_encoder(x)
out = None
############# Your code here ############
## Note:
## 1. Construct node embeddings using existing GCN model
## 2. Use the global pooling layer to aggregate features for each individual graph
## For more information please refer to the documentation:
## https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html#global-pooling-layers
## 3. Use a linear layer to predict each graph's property
## (~3 lines of code)
node_embeddings = self.gnn_node(embed, edge_index)
pooled = self.pool(node_embeddings, batch)
out = self.linear(pooled)
#########################################
return out
def train(model, device, data_loader, optimizer, loss_fn):
# TODO: Implement a function that trains your model by
# using the given optimizer and loss_fn.
model.train()
loss = 0
for step, batch in enumerate(tqdm(data_loader, desc="Iteration")):
batch = batch.to(device)
if batch.x.shape[0] == 1 or batch.batch[-1] == 0:
pass
else:
## ignore nan targets (unlabeled) when computing training loss.
is_labeled = batch.y == batch.y
############# Your code here ############
## Note:
## 1. Zero grad the optimizer
## 2. Feed the data into the model
## 3. Use `is_labeled` mask to filter output and labels
## 4. You may need to change the type of label to torch.float32
## 5. Feed the output and label to the loss_fn
## (~3 lines of code)
optimizer.zero_grad()
output = model.forward(batch)
output = output[is_labeled]
labels = batch.y[is_labeled].float()
loss = loss_fn(output, labels)
#########################################
loss.backward()
optimizer.step()
return loss.item()
# The evaluation function
def eval(model, device, loader, evaluator, save_model_results=False, save_file=None):
model.eval()
y_true = []
y_pred = []
for step, batch in enumerate(tqdm(loader, desc="Iteration")):
batch = batch.to(device)
if batch.x.shape[0] == 1:
pass
else:
with torch.no_grad():
pred = model(batch)
y_true.append(batch.y.view(pred.shape).detach().cpu())
y_pred.append(pred.detach().cpu())
y_true = torch.cat(y_true, dim = 0).numpy()
y_pred = torch.cat(y_pred, dim = 0).numpy()
input_dict = {"y_true": y_true, "y_pred": y_pred}
if save_model_results:
print ("Saving Model Predictions")
# Create a pandas dataframe with a two columns
# y_pred | y_true
data = {}
data['y_pred'] = y_pred.reshape(-1)
data['y_true'] = y_true.reshape(-1)
df = pd.DataFrame(data=data)
# Save to csv
df.to_csv('ogbg-molhiv_graph_' + save_file + '.csv', sep=',', index=False)
return evaluator.eval(input_dict)
if 'IS_GRADESCOPE_ENV' not in os.environ:
model = GCN_Graph(args['hidden_dim'],
dataset.num_tasks, args['num_layers'],
args['dropout']).to(device)
evaluator = Evaluator(name='ogbg-molhiv')
# Please do not change these args
# Training should take <10min using GPU runtime
import copy
if 'IS_GRADESCOPE_ENV' not in os.environ:
model.reset_parameters()
optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
loss_fn = torch.nn.BCEWithLogitsLoss()
best_model = None
best_valid_acc = 0
for epoch in range(1, 1 + args["epochs"]):
print('Training...')
loss = train(model, device, train_loader, optimizer, loss_fn)
print('Evaluating...')
train_result = eval(model, device, train_loader, evaluator)
val_result = eval(model, device, valid_loader, evaluator)
test_result = eval(model, device, test_loader, evaluator)
train_acc, valid_acc, test_acc = train_result[dataset.eval_metric], val_result[dataset.eval_metric], test_result[dataset.eval_metric]
if valid_acc > best_valid_acc:
best_valid_acc = valid_acc
best_model = copy.deepcopy(model)
print(f'Epoch: {epoch:02d}, '
f'Loss: {loss:.4f}, '
f'Train: {100 * train_acc:.2f}%, '
f'Valid: {100 * valid_acc:.2f}% '
f'Test: {100 * test_acc:.2f}%')
顺便一提GCN就是把周围的信息aggregate到当前点再除以一下两边度数,非常naive
colab3
GraphSage 就多加了个Linear和skip
我们仍然使用GNNStack,并且从MessagePassing继承GraphSage。MessagePassing含有一个propagate函数,我们重载message、aggregate函数,propagate会调用他们。
# Install torch geometric
import os
import torch_geometric
torch_geometric.__version__
import torch
import torch_scatter
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric.nn as pyg_nn
import torch_geometric.utils as pyg_utils
from torch import Tensor
from typing import Union, Tuple, Optional
from torch_geometric.typing import (OptPairTensor, Adj, Size, NoneType,
OptTensor)
from torch.nn import Parameter, Linear
from torch_sparse import SparseTensor, set_diag
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.utils import remove_self_loops, add_self_loops, softmax
class GNNStack(torch.nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, args, emb=False):
super(GNNStack, self).__init__()
conv_model = self.build_conv_model(args.model_type)
self.convs = nn.ModuleList()
self.convs.append(conv_model(input_dim, hidden_dim))
assert (args.num_layers >= 1), 'Number of layers is not >=1'
for l in range(args.num_layers-1):
self.convs.append(conv_model(args.heads * hidden_dim, hidden_dim))
# post-message-passing
self.post_mp = nn.Sequential(
nn.Linear(args.heads * hidden_dim, hidden_dim), nn.Dropout(args.dropout),
nn.Linear(hidden_dim, output_dim))
self.dropout = args.dropout
self.num_layers = args.num_layers
self.emb = emb
def build_conv_model(self, model_type):
if model_type == 'GraphSage':
return GraphSage
elif model_type == 'GAT':
# When applying GAT with num heads > 1, you need to modify the
# input and output dimension of the conv layers (self.convs),
# to ensure that the input dim of the next layer is num heads
# multiplied by the output dim of the previous layer.
# HINT: In case you want to play with multiheads, you need to change the for-loop that builds up self.convs to be
# self.convs.append(conv_model(hidden_dim * num_heads, hidden_dim)),
# and also the first nn.Linear(hidden_dim * num_heads, hidden_dim) in post-message-passing.
return GAT
def forward(self, data):
x, edge_index, batch = data.x, data.edge_index, data.batch
for i in range(self.num_layers):
x = self.convs[i](x, edge_index)
x = F.relu(x)
x = F.dropout(x, p=self.dropout,training=self.training)
x = self.post_mp(x)
if self.emb == True:
return x
return F.log_softmax(x, dim=1)
def loss(self, pred, label):
return F.nll_loss(pred, label)
class GraphSage(MessagePassing):
def __init__(self, in_channels, out_channels, normalize = True,
bias = False, **kwargs):
super(GraphSage, self).__init__(**kwargs)
self.in_channels = in_channels
self.out_channels = out_channels
self.normalize = normalize
self.lin_l = None
self.lin_r = None
############################################################################
# TODO: Your code here!
# Define the layers needed for the message and update functions below.
# self.lin_l is the linear transformation that you apply to embedding
# for central node.
# self.lin_r is the linear transformation that you apply to aggregated
# message from neighbors.
# Don't forget the bias!
# Our implementation is ~2 lines, but don't worry if you deviate from this.
self.lin_l = nn.Linear(in_channels,out_channels,bias = bias)
self.lin_r = nn.Linear(in_channels,out_channels,bias = bias)
############################################################################
self.reset_parameters()
def reset_parameters(self):
self.lin_l.reset_parameters()
self.lin_r.reset_parameters()
def forward(self, x, edge_index, size = None):
""""""
out = None
############################################################################
# TODO: Your code here!
# Implement message passing, as well as any post-processing (our update rule).
# 1. Call the propagate function to conduct the message passing.
# 1.1 See the description of propagate above or the following link for more information:
# https://pytorch-geometric.readthedocs.io/en/latest/notes/create_gnn.html
# 1.2 We will only use the representation for neighbor nodes (x_j), so by default
# we pass the same representation for central and neighbor nodes as x=(x, x).
# 2. Update our node embedding with skip connection from the previous layer.
# 3. If normalize is set, do L-2 normalization (defined in
# torch.nn.functional)
#
# Our implementation is ~5 lines, but don't worry if you deviate from this.
out = self.propagate(edge_index, x = x)
out = self.lin_l(x) + self.lin_r(out)
if self.normalize:
out = F.normalize(out,p=2,dim=1)
############################################################################
return out
def message(self, x_j):
out = None
############################################################################
# TODO: Your code here!
# Implement your message function here.
# Hint: Look at the formulation of the mean aggregation function, focusing on
# what message each neighboring node passes.
#
# Our implementation is ~1 lines, but don't worry if you deviate from this.
out = x_j
############################################################################
return out
def aggregate(self, inputs, index, dim_size = None):
out = None
# The axis along which to index number of nodes.
node_dim = self.node_dim
############################################################################
# TODO: Your code here!
# Implement your aggregate function here.
# See here as how to use torch_scatter.scatter:
# https://pytorch-scatter.readthedocs.io/en/latest/functions/scatter.html#torch_scatter.scatter
#
# Our implementation is ~1 lines, but don't worry if you deviate from this.
out = torch_scatter.scatter(inputs,index,dim=node_dim,reduce='mean')
############################################################################
return out
import torch.optim as optim
def build_optimizer(args, params):
weight_decay = args.weight_decay
filter_fn = filter(lambda p : p.requires_grad, params)
if args.opt == 'adam':
optimizer = optim.Adam(filter_fn, lr=args.lr, weight_decay=weight_decay)
elif args.opt == 'sgd':
optimizer = optim.SGD(filter_fn, lr=args.lr, momentum=0.95, weight_decay=weight_decay)
elif args.opt == 'rmsprop':
optimizer = optim.RMSprop(filter_fn, lr=args.lr, weight_decay=weight_decay)
elif args.opt == 'adagrad':
optimizer = optim.Adagrad(filter_fn, lr=args.lr, weight_decay=weight_decay)
if args.opt_scheduler == 'none':
return None, optimizer
elif args.opt_scheduler == 'step':
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.opt_decay_step, gamma=args.opt_decay_rate)
elif args.opt_scheduler == 'cos':
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.opt_restart)
return scheduler, optimizer
import time
import networkx as nx
import numpy as np
import torch
import torch.optim as optim
from tqdm import trange
import pandas as pd
import copy
from torch_geometric.datasets import TUDataset
from torch_geometric.datasets import Planetoid
from torch_geometric.data import DataLoader
import torch_geometric.nn as pyg_nn
import matplotlib.pyplot as plt
def train(dataset, args):
print("Node task. test set size:", np.sum(dataset[0]['test_mask'].numpy()))
print()
test_loader = loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False)
# build model
model = GNNStack(dataset.num_node_features, args.hidden_dim, dataset.num_classes,
args)
scheduler, opt = build_optimizer(args, model.parameters())
# train
losses = []
test_accs = []
best_acc = 0
best_model = None
for epoch in trange(args.epochs, desc="Training", unit="Epochs"):
total_loss = 0
model.train()
for batch in loader:
opt.zero_grad()
pred = model(batch)
label = batch.y
pred = pred[batch.train_mask]
label = label[batch.train_mask]
loss = model.loss(pred, label)
loss.backward()
opt.step()
total_loss += loss.item() * batch.num_graphs
total_loss /= len(loader.dataset)
losses.append(total_loss)
if epoch % 10 == 0:
test_acc = test(test_loader, model)
test_accs.append(test_acc)
if test_acc > best_acc:
best_acc = test_acc
best_model = copy.deepcopy(model)
else:
test_accs.append(test_accs[-1])
return test_accs, losses, best_model, best_acc, test_loader
def test(loader, test_model, is_validation=False, save_model_preds=False, model_type=None):
test_model.eval()
correct = 0
# Note that Cora is only one graph!
for data in loader:
with torch.no_grad():
# max(dim=1) returns values, indices tuple; only need indices
pred = test_model(data).max(dim=1)[1]
label = data.y
mask = data.val_mask if is_validation else data.test_mask
# node classification: only evaluate on nodes in test set
pred = pred[mask]
label = label[mask]
if save_model_preds:
print ("Saving Model Predictions for Model Type", model_type)
data = {}
data['pred'] = pred.view(-1).cpu().detach().numpy()
data['label'] = label.view(-1).cpu().detach().numpy()
df = pd.DataFrame(data=data)
# Save locally as csv
df.to_csv('CORA-Node-' + model_type + '.csv', sep=',', index=False)
correct += pred.eq(label).sum().item()
total = 0
for data in loader.dataset:
total += torch.sum(data.val_mask if is_validation else data.test_mask).item()
return correct / total
class objectview(object):
def __init__(self, d):
self.__dict__ = d
if 'IS_GRADESCOPE_ENV' not in os.environ:
for args in [
{'model_type': 'GraphSage', 'dataset': 'cora', 'num_layers': 2, 'heads': 1, 'batch_size': 32, 'hidden_dim': 32, 'dropout': 0.5, 'epochs': 500, 'opt': 'adam', 'opt_scheduler': 'none', 'opt_restart': 0, 'weight_decay': 5e-3, 'lr': 0.01},
]:
args = objectview(args)
for model in ['GraphSage']:
args.model_type = model
# Match the dimension.
if model == 'GAT':
args.heads = 2
else:
args.heads = 1
if args.dataset == 'cora':
dataset = Planetoid(root='/tmp/cora', name='Cora')
else:
raise NotImplementedError("Unknown dataset")
test_accs, losses, best_model, best_acc, test_loader = train(dataset, args)
print("Maximum test set accuracy: {0}".format(max(test_accs)))
print("Minimum loss: {0}".format(min(losses)))
# Run test for our best model to save the predictions!
test(test_loader, best_model, is_validation=False, save_model_preds=True, model_type=model)
print()
plt.title(dataset.name)
plt.plot(losses, label="training loss" + " - " + args.model_type)
plt.plot(test_accs, label="test accuracy" + " - " + args.model_type)
plt.legend()
plt.show()
colab4
GAT使用多头注意力机制,message passing里会传一些参数,搞明白费了点时间
# Install torch geometric
import os
import torch_geometric
import torch
import torch_scatter
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric.nn as pyg_nn
import torch_geometric.utils as pyg_utils
from torch import Tensor
from typing import Union, Tuple, Optional
from torch_geometric.typing import (OptPairTensor, Adj, Size, NoneType,
OptTensor)
from torch.nn import Parameter, Linear
from torch_sparse import SparseTensor, set_diag
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.utils import remove_self_loops, add_self_loops, softmax, degree
class GNNStack(torch.nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, args, emb=False):
super(GNNStack, self).__init__()
conv_model = self.build_conv_model(args.model_type)
self.convs = nn.ModuleList()
self.convs.append(conv_model(input_dim, hidden_dim))
assert (args.num_layers >= 1), 'Number of layers is not >=1'
for l in range(args.num_layers-1):
self.convs.append(conv_model(args.heads * hidden_dim, hidden_dim))
# post-message-passing
self.post_mp = nn.Sequential(
nn.Linear(args.heads * hidden_dim, hidden_dim), nn.Dropout(args.dropout),
nn.Linear(hidden_dim, output_dim))
self.dropout = args.dropout
self.num_layers = args.num_layers
self.emb = emb
def build_conv_model(self, model_type):
if model_type == 'GraphSage':
return GraphSage
elif model_type == 'GAT':
# When applying GAT with num heads > 1, you need to modify the
# input and output dimension of the conv layers (self.convs),
# to ensure that the input dim of the next layer is num heads
# multiplied by the output dim of the previous layer.
# HINT: In case you want to play with multiheads, you need to change the for-loop that builds up self.convs to be
# self.convs.append(conv_model(hidden_dim * num_heads, hidden_dim)),
# and also the first nn.Linear(hidden_dim * num_heads, hidden_dim) in post-message-passing.
return GAT
def forward(self, data):
x, edge_index, batch = data.x, data.edge_index, data.batch
for i in range(self.num_layers):
x = self.convs[i](x, edge_index)
x = F.relu(x)
x = F.dropout(x, p=self.dropout,training=self.training)
x = self.post_mp(x)
if self.emb == True:
return x
return F.log_softmax(x, dim=1)
def loss(self, pred, label):
return F.nll_loss(pred, label)
class GAT(MessagePassing):
def __init__(self, in_channels, out_channels, heads = 2,
negative_slope = 0.2, dropout = 0., **kwargs):
super(GAT, self).__init__(node_dim=0, **kwargs)
self.in_channels = in_channels
self.out_channels = out_channels
self.heads = heads
self.negative_slope = negative_slope
self.dropout = dropout
self.lin_l = None
self.lin_r = None
self.att_l = None
self.att_r = None
############################################################################
# TODO: Your code here!
# Define the layers needed for the message functions below.
# self.lin_l is the linear transformation that you apply to embeddings
# BEFORE message passing.
#
# Pay attention to dimensions of the linear layers, since we're using
# multi-head attention.
# Our implementation is ~1 lines, but don't worry if you deviate from this.
self.lin_l = nn.Linear(in_channels, heads * out_channels)
############################################################################
self.lin_r = self.lin_l
############################################################################
# TODO: Your code here!
# Define the attention parameters \overrightarrow{a_l/r}^T in the above intro.
# You have to deal with multi-head scenarios.
# Use nn.Parameter instead of nn.Linear
# Our implementation is ~2 lines, but don't worry if you deviate from this.
self.att_l = nn.Parameter(torch.Tensor(2708, heads, out_channels))
self.att_r = nn.Parameter(torch.Tensor(2708, heads, out_channels))
############################################################################
self.reset_parameters()
def reset_parameters(self):
nn.init.xavier_uniform_(self.lin_l.weight)
nn.init.xavier_uniform_(self.lin_r.weight)
nn.init.xavier_uniform_(self.att_l)
nn.init.xavier_uniform_(self.att_r)
def forward(self, x, edge_index, size = None):
H, C = self.heads, self.out_channels
############################################################################
# TODO: Your code here!
# Implement message passing, as well as any pre- and post-processing (our update rule).
# 1. First apply linear transformation to node embeddings, and split that
# into multiple heads. We use the same representations for source and
# target nodes, but apply different linear weights (W_l and W_r)
# 2. Calculate alpha vectors for central nodes (alpha_l) and neighbor nodes (alpha_r).
# 3. Call propagate function to conduct the message passing.
# 3.1 Remember to pass alpha = (alpha_l, alpha_r) as a parameter.
# 3.2 See there for more information: https://pytorch-geometric.readthedocs.io/en/latest/notes/create_gnn.html
# 4. Transform the output back to the shape of [N, H * C].
# Our implementation is ~5 lines, but don't worry if you deviate from this.
xl = self.lin_l(x).reshape(-1, H, C)
xr = self.lin_r(x).reshape(-1, H, C)
#print(xl.shape)
#print(self.att_l.shape)
alpha_l = (xl * self.att_l).sum(dim=-1)
alpha_r = (xr * self.att_r).sum(dim=-1)
out = self.propagate(edge_index, size=size, x=(xl, xr), alpha=(alpha_l, alpha_r))
out = out.view(-1, H * C)
############################################################################
return out
def message(self, x_j, alpha_j, alpha_i, index, ptr, size_i):
############################################################################
# TODO: Your code here!
# Implement your message function. Putting the attention in message
# instead of in update is a little tricky.
# 1. Calculate the final attention weights using alpha_i and alpha_j,
# and apply leaky Relu.
# 2. Calculate softmax over the neighbor nodes for all the nodes. Use
# torch_geometric.utils.softmax instead of the one in Pytorch.
# 3. Apply dropout to attention weights (alpha).
# 4. Multiply embeddings and attention weights. As a sanity check, the output
# should be of shape [E, H, C].
# 5. ptr (LongTensor, optional): If given, computes the softmax based on
# sorted inputs in CSR representation. You can simply pass it to softmax.
# Our implementation is ~4-5 lines, but don't worry if you deviate from this.
alpha = alpha_i + alpha_j
#print(alpha.shape)
alpha = F.leaky_relu(alpha, self.negative_slope)
alpha = softmax(alpha, index, ptr, size_i)
alpha = F.dropout(alpha, p=self.dropout, training=self.training)
out = x_j * alpha.reshape(-1, self.heads, 1)
############################################################################
return out
def aggregate(self, inputs, index, dim_size = None):
############################################################################
# TODO: Your code here!
# Implement your aggregate function here.
# See here as how to use torch_scatter.scatter: https://pytorch-scatter.readthedocs.io/en/latest/_modules/torch_scatter/scatter.html
# Pay attention to "reduce" parameter is different from that in GraphSage.
# Our implementation is ~1 lines, but don't worry if you deviate from this.
out = torch_scatter.scatter(inputs, index, dim=0, dim_size=dim_size, reduce='sum').reshape(-1, self.heads * self.out_channels)
############################################################################
return out
import torch.optim as optim
def build_optimizer(args, params):
weight_decay = args.weight_decay
filter_fn = filter(lambda p : p.requires_grad, params)
if args.opt == 'adam':
optimizer = optim.Adam(filter_fn, lr=args.lr, weight_decay=weight_decay)
elif args.opt == 'sgd':
optimizer = optim.SGD(filter_fn, lr=args.lr, momentum=0.95, weight_decay=weight_decay)
elif args.opt == 'rmsprop':
optimizer = optim.RMSprop(filter_fn, lr=args.lr, weight_decay=weight_decay)
elif args.opt == 'adagrad':
optimizer = optim.Adagrad(filter_fn, lr=args.lr, weight_decay=weight_decay)
if args.opt_scheduler == 'none':
return None, optimizer
elif args.opt_scheduler == 'step':
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.opt_decay_step, gamma=args.opt_decay_rate)
elif args.opt_scheduler == 'cos':
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.opt_restart)
return scheduler, optimizer
import time
import networkx as nx
import numpy as np
import torch
import torch.optim as optim
from tqdm import trange
import pandas as pd
import copy
from torch_geometric.datasets import TUDataset
from torch_geometric.datasets import Planetoid
from torch_geometric.data import DataLoader
import torch_geometric.nn as pyg_nn
import matplotlib.pyplot as plt
def train(dataset, args):
print("Node task. test set size:", np.sum(dataset[0]['test_mask'].numpy()))
print()
test_loader = loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False)
# build model
model = GNNStack(dataset.num_node_features, args.hidden_dim, dataset.num_classes,
args)
scheduler, opt = build_optimizer(args, model.parameters())
# train
losses = []
test_accs = []
best_acc = 0
best_model = None
for epoch in trange(args.epochs, desc="Training", unit="Epochs"):
total_loss = 0
model.train()
for batch in loader:
opt.zero_grad()
pred = model(batch)
label = batch.y
pred = pred[batch.train_mask]
label = label[batch.train_mask]
loss = model.loss(pred, label)
loss.backward()
opt.step()
total_loss += loss.item() * batch.num_graphs
total_loss /= len(loader.dataset)
losses.append(total_loss)
if epoch % 10 == 0:
test_acc = test(test_loader, model)
test_accs.append(test_acc)
if test_acc > best_acc:
best_acc = test_acc
best_model = copy.deepcopy(model)
else:
test_accs.append(test_accs[-1])
return test_accs, losses, best_model, best_acc, test_loader
def test(loader, test_model, is_validation=False, save_model_preds=False, model_type=None):
test_model.eval()
correct = 0
# Note that Cora is only one graph!
for data in loader:
with torch.no_grad():
# max(dim=1) returns values, indices tuple; only need indices
pred = test_model(data).max(dim=1)[1]
label = data.y
mask = data.val_mask if is_validation else data.test_mask
# node classification: only evaluate on nodes in test set
pred = pred[mask]
label = label[mask]
if save_model_preds:
print ("Saving Model Predictions for Model Type", model_type)
data = {}
data['pred'] = pred.view(-1).cpu().detach().numpy()
data['label'] = label.view(-1).cpu().detach().numpy()
df = pd.DataFrame(data=data)
# Save locally as csv
df.to_csv('CORA-Node-' + model_type + '.csv', sep=',', index=False)
correct += pred.eq(label).sum().item()
total = 0
for data in loader.dataset:
total += torch.sum(data.val_mask if is_validation else data.test_mask).item()
return correct / total
class objectview(object):
def __init__(self, d):
self.__dict__ = d
if 'IS_GRADESCOPE_ENV' not in os.environ:
for args in [
{'model_type': 'GAT', 'dataset': 'cora', 'num_layers': 2, 'heads': 1, 'batch_size': 32, 'hidden_dim': 32, 'dropout': 0.5, 'epochs': 500, 'opt': 'adam', 'opt_scheduler': 'none', 'opt_restart': 0, 'weight_decay': 5e-3, 'lr': 0.01},
]:
args = objectview(args)
for model in ['GAT']:
args.model_type = model
# Match the dimension.
if model == 'GAT':
args.heads = 2
else:
args.heads = 1
if args.dataset == 'cora':
dataset = Planetoid(root='/tmp/cora', name='Cora')
else:
raise NotImplementedError("Unknown dataset")
test_accs, losses, best_model, best_acc, test_loader = train(dataset, args)
print("Maximum test set accuracy: {0}".format(max(test_accs)))
print("Minimum loss: {0}".format(min(losses)))
# Run test for our best model to save the predictions!
test(test_loader, best_model, is_validation=False, save_model_preds=True, model_type=model)
print()
plt.title(dataset.name)
plt.plot(losses, label="training loss" + " - " + args.model_type)
plt.plot(test_accs, label="test accuracy" + " - " + args.model_type)
plt.legend()
plt.show()
colab5
用了他们自己写的deepSNAP库,实现两层的HAN
# import torch
# print(torch.__version__)
# import torch_geometric
# print(torch_geometric.__version__)
# import networkx as nx
# from networkx.algorithms.community import greedy_modularity_communities
# import matplotlib.pyplot as plt
# import copy
# import os
# import torch
# G = nx.karate_club_graph()
# community_map = {}
# for node in G.nodes(data=True):
# if node[1]["club"] == "Mr. Hi":
# community_map[node[0]] = 0
# else:
# community_map[node[0]] = 1
# node_color = []
# color_map = {0: 0, 1: 1}
# node_color = [color_map[community_map[node]] for node in G.nodes()]
# pos = nx.spring_layout(G)
# plt.figure(figsize=(7, 7))
# def assign_node_types(G, community_map):
# # TODO: Implement a function that takes in a NetworkX graph
# # G and community map assignment (mapping node id --> "n0"/"n1" label)
# # and adds 'node_type' as a node_attribute in G.
# ############# Your code here ############
# ## (~2 line of code) It's alright if you take up more lines!
# ## Note
# ## 1. Look up NetworkX `nx.classes.function.set_node_attributes`
# ## 2. Look above for the two node type values!
# new_map = community_map.copy()
# for key in new_map:
# if not new_map[key]:
# new_map[key] = 'n0'
# else:
# new_map[key] = 'n1'
# nx.set_node_attributes(G, new_map, 'node_type')
# #########################################
# def assign_node_labels(G, community_map):
# # TODO: Implement a function that takes in a NetworkX graph
# # G and community map assignment (mapping node id --> 0/1 label)
# # and adds 'node_label' as a node_attribute in G.
# ############# Your code here ############
# ## (~2 line of code) It's alright if you take up more lines!
# ## Note
# ## 1. Look up NetworkX `nx.classes.function.set_node_attributes`
# nx.set_node_attributes(G, community_map, 'node_label')
# #########################################
# def assign_node_features(G):
# # TODO: Implement a function that takes in a NetworkX graph
# # G and adds 'node_feature' as a node_attribute in G. Each node
# # in the graph has the same feature vector - a torchtensor with
# # data [1., 1., 1., 1., 1.]
# ############# Your code here ############
# ## (~2 line of code) It's alright if you take up more lines!
# ## Note
# ## 1. Look up NetworkX `nx.classes.function.set_node_attributes`
# nx.set_node_attributes(G, torch.tensor([1., 1., 1., 1., 1.]), 'node_feature')
# #########################################
# if 'IS_GRADESCOPE_ENV' not in os.environ:
# assign_node_types(G, community_map)
# assign_node_labels(G, community_map)
# assign_node_features(G)
# # Explore node properties for the node with id: 20
# node_id = 20
# print (f"Node {node_id} has properties:", G.nodes(data=True)[node_id])
# def assign_edge_types(G, community_map):
# # TODO: Implement a function that takes in a NetworkX graph
# # G and community map assignment (mapping node id --> 0/1 label)
# # and adds 'edge_type' as a edge_attribute in G.
# ############# Your code here ############
# ## (~5 line of code) It's alright if you take up more lines!
# ## Note
# ## 1. Create an edge assignment dict following rules above
# ## 2. Look up NetworkX `nx.classes.function.set_edge_attributes`
# edge_map = {}
# for edge in G.edges(data=True):
# if not community_map[edge[0]] and not community_map[edge[1]]:
# edge_map[edge[0], edge[1]] = 'e0'
# elif community_map[edge[0]] and community_map[edge[1]]:
# edge_map[edge[0], edge[1]] = 'e1'
# else:
# edge_map[edge[0], edge[1]] = 'e2'
# nx.set_edge_attributes(G, edge_map, 'edge_type')
# #########################################
# if 'IS_GRADESCOPE_ENV' not in os.environ:
# assign_edge_types(G, community_map)
# # Explore edge properties for a sampled edge and check the corresponding
# # node types
# edge_idx = 15
# n1 = 0
# n2 = 31
# edge = list(G.edges(data=True))[edge_idx]
# print (f"Edge ({edge[0]}, {edge[1]}) has properties:", edge[2])
# print (f"Node {n1} has properties:", G.nodes(data=True)[n1])
# print (f"Node {n2} has properties:", G.nodes(data=True)[n2])
# from pylab import show
# if 'IS_GRADESCOPE_ENV' not in os.environ:
# edge_color = {}
# for edge in G.edges():
# n1, n2 = edge
# edge_color[edge] = community_map[n1] if community_map[n1] == community_map[n2] else 2
# if community_map[n1] == community_map[n2] and community_map[n1] == 0:
# edge_color[edge] = 'blue'
# elif community_map[n1] == community_map[n2] and community_map[n1] == 1:
# edge_color[edge] = 'red'
# else:
# edge_color[edge] = 'green'
# G_orig = copy.deepcopy(G)
# nx.classes.function.set_edge_attributes(G, edge_color, name='color')
# colors = nx.get_edge_attributes(G,'color').values()
# labels = nx.get_node_attributes(G, 'node_type')
# plt.figure(figsize=(8, 8))
# nx.draw(G, pos=pos, cmap=plt.get_cmap('coolwarm'), node_color=node_color, edge_color=colors, labels=labels, font_color='white')
# # show()
# from deepsnap.hetero_graph import HeteroGraph
# if 'IS_GRADESCOPE_ENV' not in os.environ:
# hete = HeteroGraph(G_orig)
# def get_nodes_per_type(hete):
# # TODO: Implement a function that takes a DeepSNAP dataset object
# # and return the number of nodes per `node_type`.
# num_nodes_n0 = 0
# num_nodes_n1 = 0
# ############# Your code here ############
# ## (~2 line of code)
# ## Note
# ## 1. Colab autocomplete functionality might be useful. Explore the attributes of HeteroGraph class.
# num_nodes_n0 = hete.num_nodes('n0')
# num_nodes_n1 = hete.num_nodes('n1')
# #########################################
# return num_nodes_n0, num_nodes_n1
# if 'IS_GRADESCOPE_ENV' not in os.environ:
# num_nodes_n0, num_nodes_n1 = get_nodes_per_type(hete)
# print("Node type n0 has {} nodes".format(num_nodes_n0))
# print("Node type n1 has {} nodes".format(num_nodes_n1))
# def get_num_message_edges(hete):
# # TODO: Implement this function that takes a DeepSNAP dataset object
# # and return the number of edges for each message type.
# # You should return a list of tuples as
# # (message_type, num_edge)
# message_type_edges = []
# ############# Your code here ############
# ## (~2 line of code)
# ## Note
# ## 1. Colab autocomplete functionality might be useful. Explore the attributes of HeteroGraph class.
# message_type_edges.append((('n0','e0','n0'), hete.num_edges(('n0','e0','n0'))))
# message_type_edges.append((('n1','e1','n1'), hete.num_edges(('n1','e1','n1'))))
# message_type_edges.append((('n0','e2','n1'), hete.num_edges(('n0','e2','n1'))))
# #########################################
# return message_type_edges
# if 'IS_GRADESCOPE_ENV' not in os.environ:
# message_type_edges = get_num_message_edges(hete)
# for (message_type, num_edges) in message_type_edges:
# print("Message type {} has {} edges".format(message_type, num_edges))
# from deepsnap.dataset import GraphDataset
# def compute_dataset_split_counts(datasets):
# # TODO: Implement a function that takes a dict of datasets in the form
# # {'train': dataset_train, 'val': dataset_val, 'test': dataset_test}
# # and returns a dict mapping dataset names to the number of labeled
# # nodes used for supervision in that respective dataset.
# data_set_splits = {}
# ############# Your code here ############
# ## (~3 line of code)
# ## Note
# ## 1. The DeepSNAP `node_label_index` dictionary will be helpful.
# ## 2. Remember to count both node_types
# ## 3. Remember each dataset only has one graph that we need to access
# ## (i.e. dataset[0])
# for key in datasets:
# data_set_splits[key] = len(datasets[key][0].node_label_index['n0']) + len(datasets[key][0].node_label_index['n1'])
# #########################################
# return data_set_splits
# if 'IS_GRADESCOPE_ENV' not in os.environ:
# dataset = GraphDataset([hete], task='node')
# # Splitting the dataset
# dataset_train, dataset_val, dataset_test = dataset.split(transductive=True, split_ratio=[0.4, 0.3, 0.3])
# datasets = {'train': dataset_train, 'val': dataset_val, 'test': dataset_test}
# data_set_splits = compute_dataset_split_counts(datasets)
# for dataset_name, num_nodes in data_set_splits.items():
# print("{} dataset has {} nodes".format(dataset_name, num_nodes))
# from deepsnap.dataset import GraphDataset
# if 'IS_GRADESCOPE_ENV' not in os.environ:
# dataset = GraphDataset([hete], task='node')
# # Splitting the dataset
# dataset_train, dataset_val, dataset_test = dataset.split(transductive=True, split_ratio=[0.4, 0.3, 0.3])
# titles = ['Train', 'Validation', 'Test']
# for i, dataset in enumerate([dataset_train, dataset_val, dataset_test]):
# n0 = hete._convert_to_graph_index(dataset[0].node_label_index['n0'], 'n0').tolist()
# n1 = hete._convert_to_graph_index(dataset[0].node_label_index['n1'], 'n1').tolist()
# plt.figure(figsize=(7, 7))
# plt.title(titles[i])
# nx.draw(G_orig, pos=pos, node_color="grey", edge_color=colors, labels=labels, font_color='white')
# nx.draw_networkx_nodes(G_orig.subgraph(n0), pos=pos, node_color="blue")
# nx.draw_networkx_nodes(G_orig.subgraph(n1), pos=pos, node_color="red")
import copy
import torch
import deepsnap
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric.nn as pyg_nn
from sklearn.metrics import f1_score
from deepsnap.hetero_gnn import forward_op
from deepsnap.hetero_graph import HeteroGraph
from torch_sparse import SparseTensor, matmul
import os
class HeteroGNNConv(pyg_nn.MessagePassing):
def __init__(self, in_channels_src, in_channels_dst, out_channels):
super(HeteroGNNConv, self).__init__(aggr="mean")
self.in_channels_src = in_channels_src
self.in_channels_dst = in_channels_dst
self.out_channels = out_channels
# To simplify implementation, please initialize both self.lin_dst
# and self.lin_src out_features to out_channels
self.lin_dst = None
self.lin_src = None
self.lin_update = None
############# Your code here #############
## (~3 lines of code)
## Note:
## 1. Initialize the 3 linear layers.
## 2. Think through the connection between the mathematical
## definition of the update rule and torch linear layers!
self.lin_dst = nn.Linear(in_channels_dst, out_channels)
self.lin_src = nn.Linear(in_channels_src, out_channels)
self.lin_update = nn.Linear(out_channels*2, out_channels)
##########################################
def forward(
self,
node_feature_src,
node_feature_dst,
edge_index,
size=None
):
############# Your code here #############
## (~1 line of code)
## Note:
## 1. Unlike Colabs 3 and 4, we just need to call self.propagate with
## proper/custom arguments.
return self.propagate(edge_index, size=size, node_feature_src=node_feature_src, node_feature_dst=node_feature_dst)
##########################################
def message_and_aggregate(self, edge_index, node_feature_src):
out = None
############# Your code here #############
## (~1 line of code)
## Note:
## 1. Different from what we implemented in Colabs 3 and 4, we use message_and_aggregate
## to combine the previously seperate message and aggregate functions.
## The benefit is that we can avoid materializing x_i and x_j
## to make the implementation more efficient.
## 2. To implement efficiently, refer to PyG documentation for message_and_aggregate
## and sparse-matrix multiplication:
## https://pytorch-geometric.readthedocs.io/en/latest/notes/sparse_tensor.html
## 3. Here edge_index is torch_sparse SparseTensor. Although interesting, you
## do not need to deeply understand SparseTensor represenations!
## 4. Conceptually, think through how the message passing and aggregation
## expressed mathematically can be expressed through matrix multiplication.
#print(edge_index.shape)
#print(node_feature_src.shape)
out = matmul(edge_index, node_feature_src, reduce=self.aggr)
#print(out.shape)
##########################################
return out
def update(self, aggr_out, node_feature_dst):
############# Your code here #############
## (~4 lines of code)
## Note:
## 1. The update function is called after message_and_aggregate
## 2. Think through the one-one connection between the mathematical update
## rule and the 3 linear layers defined in the constructor.
aggr_out = self.lin_dst(aggr_out)
node_feature_dst = self.lin_src(node_feature_dst)
aggr_out = torch.cat((aggr_out, node_feature_dst), 1)
aggr_out = self.lin_update(aggr_out)
#print(aggr_out.shape)
#print(aggr_out)
##########################################
return aggr_out
class HeteroGNNWrapperConv(deepsnap.hetero_gnn.HeteroConv):
def __init__(self, convs, args, aggr="mean"):
super(HeteroGNNWrapperConv, self).__init__(convs, None)
self.aggr = aggr
# Map the index and message type
self.mapping = {}
# A numpy array that stores the final attention probability
self.alpha = None
self.attn_proj = None
if self.aggr == "attn":
############# Your code here #############
## (~1 line of code)
## Note:
## 1. Initialize self.attn_proj, where self.attn_proj should include
## two linear layers. Note, make sure you understand
## which part of the equation self.attn_proj captures.
## 2. You should use nn.Sequential for self.attn_proj
## 3. nn.Linear and nn.Tanh are useful.
## 4. You can model a weight vector (rather than matrix) by using:
## nn.Linear(some_size, 1, bias=False).
## 5. The first linear layer should have out_features as args['attn_size']
## 6. You can assume we only have one "head" for the attention.
## 7. We recommend you to implement the mean aggregation first. After
## the mean aggregation works well in the training, then you can
## implement this part.
self.attn_proj = nn.Sequential(nn.Linear(args['hidden_size'],args['attn_size'],bias=True), nn.Tanh(), nn.Linear(args['attn_size'],1,bias=False))
##########################################
def reset_parameters(self):
super(HeteroConvWrapper, self).reset_parameters()
if self.aggr == "attn":
for layer in self.attn_proj.children():
layer.reset_parameters()
def forward(self, node_features, edge_indices):
message_type_emb = {}
for message_key, message_type in edge_indices.items():
src_type, edge_type, dst_type = message_key
node_feature_src = node_features[src_type]
node_feature_dst = node_features[dst_type]
edge_index = edge_indices[message_key]
# print('node_feature_src',node_feature_src.shape)
# print('node_feature_dst',node_feature_dst.shape)
# print('edge_index',edge_index)
#print(self.convs[message_key].)
message_type_emb[message_key] = (
self.convs[message_key](
node_feature_src,
node_feature_dst,
edge_index,
)
)
node_emb = {dst: [] for _, _, dst in message_type_emb.keys()}
mapping = {}
for (src, edge_type, dst), item in message_type_emb.items():
mapping[len(node_emb[dst])] = (src, edge_type, dst)
node_emb[dst].append(item)
self.mapping = mapping
for node_type, embs in node_emb.items():
if len(embs) == 1:
node_emb[node_type] = embs[0]
else:
node_emb[node_type] = self.aggregate(embs)
return node_emb
def aggregate(self, xs):
# TODO: Implement this function that aggregates all message type results.
# Here, xs is a list of tensors (embeddings) with respect to message
# type aggregation results.
if self.aggr == "mean":
############# Your code here #############
## (~2 lines of code)
## Note:
## 1. Explore the function parameter `xs`!
x = torch.mean(torch.stack(xs), dim=0)
return x
##########################################
elif self.aggr == "attn":
N = xs[0].shape[0] # Number of nodes for that node type
M = len(xs) # Number of message types for that node type
x = torch.cat(xs, dim=0).view(M, N, -1) # M * N * D
z = self.attn_proj(x).view(M, N) # M * N * 1
z = z.mean(1) # M * 1
alpha = torch.softmax(z, dim=0) # M * 1
# Store the attention result to self.alpha as np array
self.alpha = alpha.view(-1).data.cpu().numpy()
alpha = alpha.view(M, 1, 1)
x = x * alpha
return x.sum(dim=0)
def generate_convs(hetero_graph, conv, hidden_size, first_layer=False):
# TODO: Implement this function that returns a dictionary of `HeteroGNNConv`
# layers where the keys are message types. `hetero_graph` is deepsnap `HeteroGraph`
# object and the `conv` is the `HeteroGNNConv`.
convs = {}
############# Your code here #############
## (~9 lines of code)
## Note:
## 1. See the hints above!
## 2. conv is of type `HeteroGNNConv`
for message_key in hetero_graph.message_types:
src_type, edge_type, dst_type = message_key
if first_layer:
convs[message_key] = conv(hetero_graph.num_node_features(src_type), hetero_graph.num_node_features(dst_type), hidden_size)
else:
convs[message_key] = conv(hidden_size, hidden_size, hidden_size)
##########################################
return convs
class HeteroGNN(torch.nn.Module):
def __init__(self, hetero_graph, args, aggr="mean"):
super(HeteroGNN, self).__init__()
self.aggr = aggr
self.hidden_size = args['hidden_size']
self.convs1 = None
self.convs2 = None
self.bns1 = nn.ModuleDict()
self.bns2 = nn.ModuleDict()
self.relus1 = nn.ModuleDict()
self.relus2 = nn.ModuleDict()
self.post_mps = nn.ModuleDict()
############# Your code here #############
## (~10 lines of code)
## Note:
## 1. For self.convs1 and self.convs2, call generate_convs at first and then
## pass the returned dictionary of `HeteroGNNConv` to `HeteroGNNWrapperConv`.
## 2. For self.bns, self.relus and self.post_mps, the keys are node_types.
## `deepsnap.hetero_graph.HeteroGraph.node_types` will be helpful.
## 3. Initialize all batchnorms to torch.nn.BatchNorm1d(hidden_size, eps=1).
## 4. Initialize all relus to nn.LeakyReLU().
## 5. For self.post_mps, each value in the ModuleDict is a linear layer
## where the `out_features` is the number of classes for that node type.
## `deepsnap.hetero_graph.HeteroGraph.num_node_labels(node_type)` will be
## useful.
self.convs1 = HeteroGNNWrapperConv(generate_convs(hetero_graph, HeteroGNNConv, self.hidden_size, True), args, aggr = self.aggr)
self.convs2 = HeteroGNNWrapperConv(generate_convs(hetero_graph, HeteroGNNConv, self.hidden_size), args, aggr = self.aggr)
for node_type in hetero_graph.node_types:
self.bns1[node_type] = nn.BatchNorm1d(self.hidden_size, eps=1)
self.bns2[node_type] = nn.BatchNorm1d(self.hidden_size, eps=1)
self.relus1[node_type] = nn.LeakyReLU()
self.relus2[node_type] = nn.LeakyReLU()
self.post_mps[node_type] = nn.Linear(self.hidden_size, hetero_graph.num_node_labels(node_type))
##########################################
def forward(self, node_feature, edge_index):
# TODO: Implement the forward function. Notice that `node_feature` is
# a dictionary of tensors where keys are node types and values are
# corresponding feature tensors. The `edge_index` is a dictionary of
# tensors where keys are message types and values are corresponding
# edge index tensors (with respect to each message type).
x = node_feature
############# Your code here #############
## (~7 lines of code)
## Note:
## 1. `deepsnap.hetero_gnn.forward_op` can be helpful.
x = self.convs1(x, edge_index)
x = forward_op(x, self.bns1)
x = forward_op(x, self.relus1)
x = self.convs2(x, edge_index)
x = forward_op(x, self.bns2)
x = forward_op(x, self.relus2)
x = forward_op(x, self.post_mps)
##########################################
return x
def loss(self, preds, y, indices):
loss = 0
loss_func = F.cross_entropy
############# Your code here #############
## (~3 lines of code)
## Note:
## 1. For each node type in preds, accumulate computed loss to `loss`
## 2. Loss need to be computed with respect to the given index
## 3. preds is a dictionary of model predictions keyed by node_type.
## 4. indeces is a dictionary of labeled supervision nodes keyed
## by node_type
for node_type in preds:
loss += loss_func(preds[node_type][indices[node_type]], y[node_type][indices[node_type]])
##########################################
return loss
import pandas as pd
def train(model, optimizer, hetero_graph, train_idx):
model.train()
optimizer.zero_grad()
preds = model(hetero_graph.node_feature, hetero_graph.edge_index)
loss = None
############# Your code here #############
## Note:
## 1. Compute the loss here
## 2. `deepsnap.hetero_graph.HeteroGraph.node_label` is useful
loss = model.loss(preds, hetero_graph.node_label, train_idx)
##########################################
loss.backward()
optimizer.step()
return loss.item()
def test(model, graph, indices, best_model=None, best_val=0, save_preds=False, agg_type=None):
model.eval()
accs = []
for i, index in enumerate(indices):
preds = model(graph.node_feature, graph.edge_index)
num_node_types = 0
micro = 0
macro = 0
for node_type in preds:
idx = index[node_type]
pred = preds[node_type][idx]
pred = pred.max(1)[1]
label_np = graph.node_label[node_type][idx].cpu().numpy()
pred_np = pred.cpu().numpy()
micro = f1_score(label_np, pred_np, average='micro')
macro = f1_score(label_np, pred_np, average='macro')
num_node_types += 1
# Averaging f1 score might not make sense, but in our example we only
# have one node type
micro /= num_node_types
macro /= num_node_types
accs.append((micro, macro))
# Only save the test set predictions and labels!
if save_preds and i == 2:
print ("Saving Heterogeneous Node Prediction Model Predictions with Agg:", agg_type)
print()
data = {}
data['pred'] = pred_np
data['label'] = label_np
df = pd.DataFrame(data=data)
# Save locally as csv
df.to_csv('ACM-Node-' + agg_type + 'Agg.csv', sep=',', index=False)
if accs[1][0] > best_val:
best_val = accs[1][0]
best_model = copy.deepcopy(model)
return accs, best_model, best_val
# Please do not change the following parameters
args = {
'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
'hidden_size': 64,
'epochs': 100,
'weight_decay': 1e-5,
'lr': 0.003,
'attn_size': 32,
}
if 'IS_GRADESCOPE_ENV' not in os.environ:
print("Device: {}".format(args['device']))
# Load the data
data = torch.load("acm.pkl")
# Message types
message_type_1 = ("paper", "author", "paper")
message_type_2 = ("paper", "subject", "paper")
# Dictionary of edge indices
edge_index = {}
edge_index[message_type_1] = data['pap']
edge_index[message_type_2] = data['psp']
# Dictionary of node features
node_feature = {}
node_feature["paper"] = data['feature']
# Dictionary of node labels
node_label = {}
node_label["paper"] = data['label']
# Load the train, validation and test indices
train_idx = {"paper": data['train_idx'].to(args['device'])}
val_idx = {"paper": data['val_idx'].to(args['device'])}
test_idx = {"paper": data['test_idx'].to(args['device'])}
# Construct a deepsnap tensor backend HeteroGraph
hetero_graph = HeteroGraph(
node_feature=node_feature,
node_label=node_label,
edge_index=edge_index,
directed=True
)
print(f"ACM heterogeneous graph: {hetero_graph.num_nodes()} nodes, {hetero_graph.num_edges()} edges")
# Node feature and node label to device
for key in hetero_graph.node_feature:
hetero_graph.node_feature[key] = hetero_graph.node_feature[key].to(args['device'])
for key in hetero_graph.node_label:
hetero_graph.node_label[key] = hetero_graph.node_label[key].to(args['device'])
# Edge_index to sparse tensor and to device
for key in hetero_graph.edge_index:
edge_index = hetero_graph.edge_index[key]
adj = SparseTensor(row=edge_index[0], col=edge_index[1], sparse_sizes=(hetero_graph.num_nodes('paper'), hetero_graph.num_nodes('paper')))
hetero_graph.edge_index[key] = adj.t().to(args['device'])
print(hetero_graph.edge_index[message_type_1])
print(hetero_graph.edge_index[message_type_2])
# if 'IS_GRADESCOPE_ENV' not in os.environ:
# best_model = None
# best_val = 0
# model = HeteroGNN(hetero_graph, args, aggr="mean").to(args['device'])
# optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])
# for epoch in range(args['epochs']):
# loss = train(model, optimizer, hetero_graph, train_idx)
# accs, best_model, best_val = test(model, hetero_graph, [train_idx, val_idx, test_idx], best_model, best_val)
# print(
# f"Epoch {epoch + 1}: loss {round(loss, 5)}, "
# f"train micro {round(accs[0][0] * 100, 2)}%, train macro {round(accs[0][1] * 100, 2)}%, "
# f"valid micro {round(accs[1][0] * 100, 2)}%, valid macro {round(accs[1][1] * 100, 2)}%, "
# f"test micro {round(accs[2][0] * 100, 2)}%, test macro {round(accs[2][1] * 100, 2)}%"
# )
# best_accs, _, _ = test(best_model, hetero_graph, [train_idx, val_idx, test_idx], save_preds=True, agg_type="Mean")
# print(
# f"Best model: "
# f"train micro {round(best_accs[0][0] * 100, 2)}%, train macro {round(best_accs[0][1] * 100, 2)}%, "
# f"valid micro {round(best_accs[1][0] * 100, 2)}%, valid macro {round(best_accs[1][1] * 100, 2)}%, "
# f"test micro {round(best_accs[2][0] * 100, 2)}%, test macro {round(best_accs[2][1] * 100, 2)}%"
# )
if 'IS_GRADESCOPE_ENV' not in os.environ:
best_model = None
best_val = 0
output_size = hetero_graph.num_node_labels('paper')
model = HeteroGNN(hetero_graph, args, aggr="attn").to(args['device'])
optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])
for epoch in range(args['epochs']):
loss = train(model, optimizer, hetero_graph, train_idx)
accs, best_model, best_val = test(model, hetero_graph, [train_idx, val_idx, test_idx], best_model, best_val)
print(
f"Epoch {epoch + 1}: loss {round(loss, 5)}, "
f"train micro {round(accs[0][0] * 100, 2)}%, train macro {round(accs[0][1] * 100, 2)}%, "
f"valid micro {round(accs[1][0] * 100, 2)}%, valid macro {round(accs[1][1] * 100, 2)}%, "
f"test micro {round(accs[2][0] * 100, 2)}%, test macro {round(accs[2][1] * 100, 2)}%"
)
best_accs, _, _ = test(best_model, hetero_graph, [train_idx, val_idx, test_idx], save_preds=True, agg_type="Attention")
print(
f"Best model: "
f"train micro {round(best_accs[0][0] * 100, 2)}%, train macro {round(best_accs[0][1] * 100, 2)}%, "
f"valid micro {round(best_accs[1][0] * 100, 2)}%, valid macro {round(best_accs[1][1] * 100, 2)}%, "
f"test micro {round(best_accs[2][0] * 100, 2)}%, test macro {round(best_accs[2][1] * 100, 2)}%"
)
if 'IS_GRADESCOPE_ENV' not in os.environ:
if model.convs1.alpha is not None and model.convs2.alpha is not None:
for idx, message_type in model.convs1.mapping.items():
print(f"Layer 1 has attention {model.convs1.alpha[idx]} on message type {message_type}")
for idx, message_type in model.convs2.mapping.items():
print(f"Layer 2 has attention {model.convs2.alpha[idx]} on message type {message_type}")
*HGT
用lab5的框架写了HGT,具体实现都在 class HGTConv(pyg_nn.MessagePassing)
里,效果一般..
import copy
import torch
import deepsnap
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric as pyg
import torch_geometric.nn as pyg_nn
from sklearn.metrics import f1_score
from deepsnap.hetero_graph import HeteroGraph
from torch_sparse import SparseTensor, matmul
import os
def forward_op(x, module_dict, *args, **kwargs):
if not isinstance(x, dict):
raise ValueError("The input x should be a dictionary.")
res = {}
if not isinstance(module_dict, dict) and not isinstance(module_dict, nn.ModuleDict):
for key in x:
res[key] = module_dict(x[key], *args, **kwargs)
else:
for key in x:
res[key] = module_dict[key](x[key], *args, **kwargs)
return res
class HGTConv(pyg_nn.MessagePassing):
def __init__(self, in_channels, out_channels, args, node_types, edge_types, msg_keys, aggr="add"):
super(HGTConv, self).__init__(aggr=aggr)
self.in_channels = in_channels
self.out_channels = out_channels
self.args = args
self.head = 4
self.sqrtd = 1 / np.sqrt(out_channels // self.head)
self.src_lins = nn.ModuleDict()
self.msg_lins = nn.ModuleDict()
self.attn_edges = nn.ModuleDict()
self.msg_edges = nn.ModuleDict()
self.dst_lins = nn.ModuleDict()
self.msg_keys = msg_keys
self.mu = nn.Parameter(torch.ones(len(msg_keys)))
self.mapping = {}
self.node_mapping = {}
self.skip = nn.Parameter(torch.ones(len(node_types)))
self.out_lins = nn.ModuleDict()
for key in msg_keys:
self.mapping[key] = len(self.mapping)
for key in node_types:
self.node_mapping[key] = len(self.node_mapping)
self.dropout = nn.Dropout(0.2)
self.norms = nn.ModuleDict()
for node_type in node_types:
self.src_lins[node_type] = nn.Linear(in_channels, out_channels)
self.dst_lins[node_type] = nn.Linear(in_channels, out_channels)
self.msg_lins[node_type] = nn.Linear(in_channels, out_channels)
self.out_lins[node_type] = nn.Linear(out_channels, out_channels)
self.norms[node_type] = nn.LayerNorm(out_channels)
for edge_type in edge_types:
self.attn_edges[edge_type] = nn.Linear(out_channels // self.head , out_channels // self.head ,bias = False)
self.msg_edges[edge_type] = nn.Linear(out_channels // self.head , out_channels // self.head ,bias = False)
def forward(self, node_feature, edge_index):
res = {}
for message_type in edge_index:
src_type, edge_type, dst_type = message_type
# print(message_type)
src_node_feature = node_feature[src_type]
dst_node_feature = node_feature[dst_type]
edge_index_type = edge_index[message_type]
# print(message_type)
# print(edge_index_type)
# print(src_node_feature.shape)
# print(dst_node_feature.shape)
res[dst_type] = self.propagate(edge_index_type, node_feature=dst_node_feature, src_node_feature=src_node_feature, message_type=message_type)
return res
def message(self, edge_index_i,edge_index_j, node_feature_i,src_node_feature_j,message_type):
src_type, edge_type, dst_type = message_type
#print(edge_index_i.shape)
#print(node_feature_j.shape)
# print("node_feature_i",node_feature_i)
# print(node_feature_i.shape)
msgs = self.msg_lins[src_type](src_node_feature_j).reshape(-1, self.head, self.out_channels // self.head)
msgs = self.msg_edges[edge_type](msgs)
src_vec = self.src_lins[src_type](src_node_feature_j).reshape(-1, self.head, self.out_channels // self.head)
dst_vec = self.dst_lins[dst_type](node_feature_i).reshape(-1, self.head, self.out_channels // self.head)
attn = (self.attn_edges[edge_type](src_vec)*dst_vec).sum(dim=-1) * self.sqrtd * self.mu[self.mapping[message_type]]
# print(self.mapping[message_type])
attn = pyg.utils.softmax(attn, edge_index_i).reshape(-1, self.head, 1)
res = (msgs * attn).reshape(-1, self.out_channels)
return res
def update(self, aggr_out, node_feature,message_type):
src_type, edge_type, dst_type = message_type
# skip attention?
aggr_out = nn.functional.gelu(aggr_out)
out = self.dropout(self.out_lins[dst_type](aggr_out))
out = self.norms[dst_type](out)
alpha = torch.sigmoid(self.skip[self.node_mapping[dst_type]])
if node_feature.shape[-1] == out.shape[-1]:
out = alpha * out + (1 - alpha) * node_feature
return self.norms[dst_type](out)
class HeteroGNN(torch.nn.Module):
def __init__(self, hetero_graph, msg_keys, args, aggr="add"):
super(HeteroGNN, self).__init__()
self.aggr = aggr
self.hidden_size = args['hidden_size']
self.convs1 = None
self.convs2 = None
self.post_mps = nn.ModuleDict()
#self.message_type_edges = get_num_message_edges(hetero_graph)
input_d = hetero_graph.num_node_features(hetero_graph.node_types[0])
# print("input_d",input_d)
for node_type in hetero_graph.node_types:
assert(input_d == hetero_graph.num_node_features(node_type))
self.lin1 = nn.Linear(input_d, self.hidden_size)
self.convs1 = HGTConv(self.hidden_size, self.hidden_size, args,hetero_graph.node_types,hetero_graph.edge_types,msg_keys)
self.convs2 = HGTConv(self.hidden_size, self.hidden_size, args,hetero_graph.node_types,hetero_graph.edge_types,msg_keys)
for node_type in hetero_graph.node_types:
self.post_mps[node_type] = nn.Linear(self.hidden_size, hetero_graph.num_node_labels(node_type))
##########################################
def forward(self, node_feature, edge_index):
# TODO: Implement the forward function. Notice that `node_feature` is
# a dictionary of tensors where keys are node types and values are
# corresponding feature tensors. The `edge_index` is a dictionary of
# tensors where keys are message types and values are corresponding
# edge index tensors (with respect to each message type).
x = node_feature
x = forward_op(x, self.lin1)
############# Your code here #############
## (~7 lines of code)
## Note:
## 1. `deepsnap.hetero_gnn.forward_op` can be helpful.
#print(x)
x = self.convs1(x,edge_index)
#print(x)
x = self.convs2(x,edge_index)
x = forward_op(x, self.post_mps)
##########################################
return x
def loss(self, preds, y, indices):
loss = 0
loss_func = F.cross_entropy
############# Your code here #############
## (~3 lines of code)
## Note:
## 1. For each node type in preds, accumulate computed loss to `loss`
## 2. Loss need to be computed with respect to the given index
## 3. preds is a dictionary of model predictions keyed by node_type.
## 4. indeces is a dictionary of labeled supervision nodes keyed
## by node_type
for node_type in preds:
loss += loss_func(preds[node_type][indices[node_type]], y[node_type][indices[node_type]])
##########################################
return loss
import pandas as pd
def train(model, optimizer, hetero_graph, train_idx):
model.train()
optimizer.zero_grad()
preds = model(hetero_graph.node_feature, hetero_graph.edge_index)
loss = None
############# Your code here #############
## Note:
## 1. Compute the loss here
## 2. `deepsnap.hetero_graph.HeteroGraph.node_label` is useful
loss = model.loss(preds, hetero_graph.node_label, train_idx)
##########################################
loss.backward()
optimizer.step()
return loss.item()
def test(model, graph, indices, best_model=None, best_val=0, save_preds=False, agg_type=None):
model.eval()
accs = []
for i, index in enumerate(indices):
preds = model(graph.node_feature, graph.edge_index)
num_node_types = 0
micro = 0
macro = 0
for node_type in preds:
idx = index[node_type]
pred = preds[node_type][idx]
pred = pred.max(1)[1]
label_np = graph.node_label[node_type][idx].cpu().numpy()
pred_np = pred.cpu().numpy()
micro = f1_score(label_np, pred_np, average='micro')
macro = f1_score(label_np, pred_np, average='macro')
num_node_types += 1
# Averaging f1 score might not make sense, but in our example we only
# have one node type
micro /= num_node_types
macro /= num_node_types
accs.append((micro, macro))
# Only save the test set predictions and labels!
if save_preds and i == 2:
print ("Saving Heterogeneous Node Prediction Model Predictions with Agg:", agg_type)
print()
data = {}
data['pred'] = pred_np
data['label'] = label_np
df = pd.DataFrame(data=data)
# Save locally as csv
df.to_csv('ACM-Node-' + agg_type + 'Agg.csv', sep=',', index=False)
if accs[1][0] > best_val:
best_val = accs[1][0]
best_model = copy.deepcopy(model)
return accs, best_model, best_val
# Please do not change the following parameters
args = {
'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
'hidden_size': 64,
'epochs': 100,
'weight_decay': 1e-5,
'lr': 0.045,
'attn_size': 32,
}
if 'IS_GRADESCOPE_ENV' not in os.environ:
print("Device: {}".format(args['device']))
# Load the data
data = torch.load("acm.pkl")
# Message types
message_type_1 = ("paper", "author", "paper")
message_type_2 = ("paper", "subject", "paper")
# Dictionary of edge indices
edge_index = {}
edge_index[message_type_1] = data['pap']
edge_index[message_type_2] = data['psp']
# Dictionary of node features
node_feature = {}
node_feature["paper"] = data['feature']
# Dictionary of node labels
node_label = {}
node_label["paper"] = data['label']
# Load the train, validation and test indices
train_idx = {"paper": data['train_idx'].to(args['device'])}
val_idx = {"paper": data['val_idx'].to(args['device'])}
test_idx = {"paper": data['test_idx'].to(args['device'])}
# Construct a deepsnap tensor backend HeteroGraph
hetero_graph = HeteroGraph(
node_feature=node_feature,
node_label=node_label,
edge_index=edge_index,
directed=True
)
print(f"ACM heterogeneous graph: {hetero_graph.num_nodes()} nodes, {hetero_graph.num_edges()} edges")
# Node feature and node label to device
for key in hetero_graph.node_feature:
hetero_graph.node_feature[key] = hetero_graph.node_feature[key].to(args['device'])
for key in hetero_graph.node_label:
hetero_graph.node_label[key] = hetero_graph.node_label[key].to(args['device'])
# Edge_index to sparse tensor and to device
for key in hetero_graph.edge_index:
edge_index = hetero_graph.edge_index[key]
adj = SparseTensor(row=edge_index[0], col=edge_index[1], sparse_sizes=(hetero_graph.num_nodes('paper'), hetero_graph.num_nodes('paper')))
hetero_graph.edge_index[key] = adj.t().to(args['device'])
print(hetero_graph.edge_index[message_type_1])
print(hetero_graph.edge_index[message_type_2])
# if 'IS_GRADESCOPE_ENV' not in os.environ:
# best_model = None
# best_val = 0
# model = HeteroGNN(hetero_graph, args, aggr="mean").to(args['device'])
# optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])
# for epoch in range(args['epochs']):
# loss = train(model, optimizer, hetero_graph, train_idx)
# accs, best_model, best_val = test(model, hetero_graph, [train_idx, val_idx, test_idx], best_model, best_val)
# print(
# f"Epoch {epoch + 1}: loss {round(loss, 5)}, "
# f"train micro {round(accs[0][0] * 100, 2)}%, train macro {round(accs[0][1] * 100, 2)}%, "
# f"valid micro {round(accs[1][0] * 100, 2)}%, valid macro {round(accs[1][1] * 100, 2)}%, "
# f"test micro {round(accs[2][0] * 100, 2)}%, test macro {round(accs[2][1] * 100, 2)}%"
# )
# best_accs, _, _ = test(best_model, hetero_graph, [train_idx, val_idx, test_idx], save_preds=True, agg_type="Mean")
# print(
# f"Best model: "
# f"train micro {round(best_accs[0][0] * 100, 2)}%, train macro {round(best_accs[0][1] * 100, 2)}%, "
# f"valid micro {round(best_accs[1][0] * 100, 2)}%, valid macro {round(best_accs[1][1] * 100, 2)}%, "
# f"test micro {round(best_accs[2][0] * 100, 2)}%, test macro {round(best_accs[2][1] * 100, 2)}%"
# )
if 'IS_GRADESCOPE_ENV' not in os.environ:
best_model = None
best_val = 0
output_size = hetero_graph.num_node_labels('paper')
msg_keys = []
for key in hetero_graph.edge_index:
msg_keys.append(key)
model = HeteroGNN(hetero_graph,msg_keys ,args, aggr="add").to(args['device'])
optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])
for epoch in range(args['epochs']):
loss = train(model, optimizer, hetero_graph, train_idx)
accs, best_model, best_val = test(model, hetero_graph, [train_idx, val_idx, test_idx], best_model, best_val)
print(
f"Epoch {epoch + 1}: loss {round(loss, 5)}, "
f"train micro {round(accs[0][0] * 100, 2)}%, train macro {round(accs[0][1] * 100, 2)}%, "
f"valid micro {round(accs[1][0] * 100, 2)}%, valid macro {round(accs[1][1] * 100, 2)}%, "
f"test micro {round(accs[2][0] * 100, 2)}%, test macro {round(accs[2][1] * 100, 2)}%"
)
best_accs, _, _ = test(best_model, hetero_graph, [train_idx, val_idx, test_idx], save_preds=True, agg_type="Attention")
print(
f"Best model: "
f"train micro {round(best_accs[0][0] * 100, 2)}%, train macro {round(best_accs[0][1] * 100, 2)}%, "
f"valid micro {round(best_accs[1][0] * 100, 2)}%, valid macro {round(best_accs[1][1] * 100, 2)}%, "
f"test micro {round(best_accs[2][0] * 100, 2)}%, test macro {round(best_accs[2][1] * 100, 2)}%"
)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 地球OL攻略 —— 某应届生求职总结
· 周边上新:园子的第一款马克杯温暖上架
· Open-Sora 2.0 重磅开源!
· 提示词工程——AI应用必不可少的技术
· .NET周刊【3月第1期 2025-03-02】