代码拉取完成,页面将自动刷新
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
# 导入相关包
from utils import DGraphFin
from utils.utils import prepare_folder
from utils.evaluator import Evaluator
import pandas as pd
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch_geometric.transforms as T
import numpy as np
from torch_geometric.data import Data
import os
# 随机种子
torch.manual_seed(666)
#设置gpu设备
device = 0
device = f'cuda:{device}' if torch.cuda.is_available() else 'cpu'
device = torch.device(device)
# CUDA is available
print(torch.cuda.is_available())
# In[ ]:
# 定义网络模型
# 导入需要的包
import torch
import torch.nn.functional as F
import torch_geometric.transforms as T
from torch_geometric.nn import SAGEConv # 导入 GAT 层
# Model Definition
class GraphSAGE(torch.nn.Module):
def __init__(self, in_channels, hidden_channels, out_channels, dropout):
super(GraphSAGE, self).__init__()
self.sageConv1 = SAGEConv(in_channels,hidden_channels)
self.sageConv2 = SAGEConv(hidden_channels,out_channels)
self.dropout = dropout
def reset_parameters(self):
self.sageConv1.reset_parameters()
self.sageConv2.reset_parameters()
def forward(self,x,edge_index):
x = self.sageConv1(x,edge_index)
x = F.relu(x)
x = F.dropout(x,p=self.dropout,training=self.training)
x = self.sageConv2(x,edge_index)
return F.log_softmax(x,dim=-1)
def train(model, data, train_idx, optimizer):
model.train()
optimizer.zero_grad()
out = model(data.x,data.adj_t)[train_idx]
loss = F.nll_loss(out,data.y[train_idx])
loss.backward()
optimizer.step()
return loss.item()
def test(model, data, split_idx, evaluator):
with torch.no_grad():
model.eval()
out = model(data.x, data.adj_t)
y_pred = out.exp()
losses,eval_results = dict(),dict()
for key in['train','valid']:
node_id = split_idx[key]
losses[key] = F.nll_loss(out[node_id], data.y[node_id]).item()
eval_results[key] = evaluator.eval(data.y[node_id], y_pred[node_id])['auc']
return eval_results, losses, y_pred
def predict(data,node_id):
"""
加载模型和模型预测
:param node_id: int, 需要进行预测节点的下标
:return: tensor, 类0以及类1的概率, torch.size[1,2]
"""
model = GraphSAGE(in_channels = 20, hidden_channels = 128, out_channels = 2, dropout = 0)
model.load_state_dict(torch.load(save_dir+'/model_graphSAGE.pt')) #载入验证集上表现最好的模型
with torch.no_grad():
model.eval()
out = model(data.x,data.adj_t)[node_id]
y_pred = out.exp()
return y_pred
# In[ ]:
path='./datasets/632d74d4e2843a53167ee9a1-momodel/' #数据保存路径
save_dir='./results/' #模型保存路径
dataset_name='DGraph'
dataset = DGraphFin(root=path, name=dataset_name, transform=T.ToSparseTensor())
nlabels = dataset.num_classes
if dataset_name in ['DGraph']:
nlabels = 2 #本实验中仅需预测类0和类1
data = dataset[0]
data.adj_t = data.adj_t.to_symmetric() #将有向图转化为无向图
if dataset_name in ['DGraph']:
x = data.x
x = (x - x.mean(0)) / x.std(0)
data.x = x
if data.y.dim() == 2:
data.y = data.y.squeeze(1)
split_idx = {'train': data.train_mask, 'valid': data.valid_mask, 'test': data.test_mask} #划分训练集,验证集
train_idx = split_idx['train']
result_dir = prepare_folder(dataset_name,'GraphSAGE')
# 查看数据维度
print(data)
print(data.x.shape) #feature
print(data.y.shape) #label
# In[ ]:
# 定义网络模型
model = GraphSAGE(in_channels = data.x.size(-1), hidden_channels = 128, out_channels = nlabels, dropout = 0)
print('Model GraphSAGE initialized')
eval_metric = 'auc'
evaluator = Evaluator(eval_metric)
epochs = 200
# In[ ]:
# 训练网络模型
import gc
gc.collect()
print(sum(p.numel() for p in model.parameters()))
model.reset_parameters()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-7)
min_valid_loss = 1e8
Epochs = []
Loss = []
Train_AUC = []
Valid_AUC = []
for epoch in range(1,epochs + 1):
loss = train(model, data, train_idx, optimizer)
eval_results, losses, out = test(model, data, split_idx, evaluator)
train_eval, valid_eval = eval_results['train'], eval_results['valid']
train_loss, valid_loss = losses['train'], losses['valid']
if valid_loss < min_valid_loss:
min_valid_loss = valid_loss
torch.save(model.state_dict(), save_dir+'/model_graphSAGE.pt') #将表现最好的模型保存
print(f'Epoch: {epoch:02d}, '
f'Loss: {loss:.4f}, '
f'Train: {100 * train_eval:.3f}%, ' # 我们将AUC值乘上100,使其在0-100的区间内
f'Valid: {100 * valid_eval:.3f}% ')
print('{{"metric": "Loss", "value": {:.4f}, "epoch": {} }}'.format(loss,epoch))
print('{{"metric": "Train AUC", "value": {:.4f}, "epoch": {} }}'.format(100*train_eval,epoch))
print('{{"metric": "Valid AUC", "value": {:.4f}, "epoch": {} }}'.format(100*valid_eval,epoch))
Epochs.append(epoch)
Loss.append(loss)
Train_AUC.append(train_eval)
Valid_AUC.append(valid_eval)
# In[ ]:
dataLogger = pd.DataFrame()
dataLogger.insert(0,"Epochs", Epochs)
dataLogger.insert(1,"Loss",Loss)
dataLogger.insert(2,"Train_AUC",Train_AUC)
dataLogger.insert(2,"Valid_AUC",Valid_AUC)
dataLogger.to_excel('GraphSAGE_trainingLog.xlsx',float_format="%.4f",index=False)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。