master

分支 (1)

管理

管理

master

neural-networks-with-r
/
Chapter 7 神经网络案例——高级主题.R

rm(list = ls(all = TRUE))
setwd("C:\\Users\\lenovo\\Desktop\\R\\神经网络_R语言实现")
install.packages("data.table")

#7.1 TensorFlow与R的集成
#TensorFlow是Google为机器智能提供的开源数值计算库
#它包含构建深度学习模型的所需的所有编程
#并为开发人员提供一个黑盒接口进行编程
#用于TensorFlow的Keras API为神经网络提供高级接口

#在TensorFlow中，图中的节点表示数学运算，
#而图边表示在它们之间传递的多维数据数组（张量）
#TensorFlow最初是由谷歌智能团队在谷歌关于机器学习
#和深度神经网络研究的机器智能研究中开发的
#但现在已经开源
#TensorFlow在适当配置时利用GPU处理

#安装来自CRAN的tensorflow包
install.packages("tensorflow")

#使用函数install_tensorflow安装tensorflow
library(tensorflow)
install_tensorflow()

#由下确认安装是否成功
#然而，在tensorflow2.0中已经删去了Session

library(tensorflow)

hello <- tf$constant('Hello, TensorFlow!')
hello

#7.2 Kera与R的集成
#Keras是一套以Python编码开发的开源神经网络库
#它能够在MxNet、TensorFlow或Theano上运行
install.packages("devtools")

install.packages("installr")
install.packages("stringr")    ###依赖包
library(stringr)
library(installr)
install.Rtools()

writeLines('PATH="${RTOOLS40_HOME}\\usr\\bin;${PATH}"', con = "~/.Renviron")

Sys.which("make")

devtools::install_github("rstudio/keras", force = TRUE)

library(keras)
data = dataset_mnist()

#7.3 在R中使用MNIST HWR

#手写识别（handwriting recognization）是现代技术中常用的一种程序
#手写文本的图像可以通过光学扫描
#（光学字符识别，Opical Character Recognization，OCR）
#或智能单词识别从一张纸离线检测

#从技术上说，手写识别是计算机能接受和解释来自
#纸质文档、照片、触摸屏和其他设备等的手写智能输入的能力

#HWR通过通常需要OCR的各种技术来执行
#但是，一个完整的脚本识别系统还可以管理格式、执行正确的字符分割、
#找到最合理的单词

#MNIST（modified national institute of standard and technology）
#是一个大型的数据库
#它包含70000个数据样本，是更大的数据集NIST的一个子集

#加载并启动h2o
library(h2o)

h2o.init(nthreads = -1, max_mem_size = "3G")

#设置工作目录
setwd("C:\\Users\\lenovo\\Desktop\\R\\神经网络_R语言实现\\Chapter 07")

#设置训练数据集
#利用attach将数据库添加到R搜索路径
#这意味着在评估一个变量时，数据库是由R搜索的，
#因此可以简单地给出对象的名称来访问数据库中的对象
#最后，函数names来设置数据集的名称
train_mnist = read.csv("mnist_train_100.csv", header = FALSE)
attach(train_mnist)
names(train_mnist)

test_mnist = read.csv("mnist_test_10.csv", header = FALSE)
attach(test_mnist)
names(test_mnist)

#创建一个带有像素颜色值的28x28矩阵
m = matrix(unlist(train_mnist[10,-1]),
           nrow = 28,
           byrow = TRUE)

#通过绘制一个image对象来看看得到的结果
image(m, col = grey.colors(255))

#创建一个手写数字的镜像并查看
rotate = function(x)t(apply(x, 2, rev))
image(rotate(m), col = grey.colors(255))

#对数据集的前6行进行同样的操作
par(mfrow = c(2,3))
lapply(1:6,
       function(x) image(
         rotate(matrix(unlist(train_mnist[x,-1]),
                       nrow = 28,
                       byrow = TRUE)),
         col = grey.colors(255),
         xlab = train_mnist[x,1]
       )
)

#将绘图选项重置为默认值
par(mfrow = c(1,1))

#对训练数据进行解释性分析
str(train_mnist)

x = 2:785
y = 1

#查找训练矩阵中每个数字的计数
table(train_mnist[,y])

#构建和训练模型
model = h2o.deeplearning(x,
                         y,
                         as.h2o(train_mnist),
                         model_id = "MNIST_deeplearning",
                         seed = 405,
                         activation = "RectifierWithDropout",
                         l1 = 0.00001,
                         input_dropout_ratio = 0.2,
                         classification_stop = -1,
                         epochs = 2000
                         )

summary(model)

#检查训练模型的性能，了诶所用算法的演变过程
h2o.scoreHistory(model)

preds = h2o.performance(model,
                        as.h2o(test_mnist))

#用model预测
newdata = h2o.predict(model,
                      as.h2o(test_mnist))

#格式化实际矩阵和期望矩阵来验证模型准确性
predictions = cbind(as.data.frame(seq(1,10)),
                    test_mnist[,1],
                    as.data.frame(newdata[,1]))
#输入插入到矩阵中的变量中的名称
names(predictions) = c("Number", "Actual", "Predicted")

#最后，检查输出
as.matrix(predictions)

#7.4 使用数据集iris建立LSTM

rm(list = ls(all = TRUE))
setwd("C:\\Users\\lenovo\\Desktop\\R\\神经网络_R语言实现\\Chapter 07")

#先安装MxNet
cran <- getOption("repos")
cran["dmlc"] <- "https://s3-us-west-2.amazonaws.com/apache-mxnet/R/CRAN"
options(repos = cran)
install.packages("mxnet")

cran <- getOption("repos")
cran["dmlc"] <- "https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/CRAN/"
options(repos = cran)
install.packages("mxnet")

#下载失败，放弃了

#老版本：
install.packages("https://github.com/jeremiedb/mxnet_winbin/raw/master/mxnet.zip", repos = NULL)

library(mxnet)

data(iris)

x = iris[1:5!=5,-5]
y = as.integer(iris$Species)[1:5!=5]

train.x = data.matrix(x)
train.y = data.matrix(y)

test.x = data.matrix(iris[1:5==5,-5])
test.y = as.integer(iris$Species)[1:5==5]

model <- mx.mlp(train.x, train.y, hidden_node = 10, out_node = 3,
                out_activation = "softmax",
                num.round = 20,
                array.batch.size = 15,
                learning.rate = 0.07,
                momentum = 0.9,
                eval.metric = mx.metric.accuracy)

preds = predict(model,test.x)
pred.label = max.col(t(preds))

test.y

pred.label

#7.5 使用自动编码器

#自动编码器利用神经网络来执行非线性降维
#它们通过通用函数逼近器找到数据的潜在特征
#以更好的方式呈现数据

#自动编码器尝试以不同的方式组合或压缩输入数据

#7.6 使用H2O进行主成分分析

#主成分分析（PCA）方法生成一组新的互补相关的变量，称为主成分
#每个主成分都是原始变量的线性组合，所有主成分都互相正交，
#所以没有冗余信息
#主成分作为一个整体构成数据空间的正交基础
#PCA的目标是通过最少的成分解释最大的方差
#这是一种多维度缩放的形式
#它将变量线性转换到低维空间，保留有关变量的最大量信息
#因此主成分是线性变换后的原始变量的组合

library(h2o)
h2o.init()

ausPath = system.file("extdata","australia.csv",package = "h2o")
australia.hex = h2o.uploadFile(path = ausPath)
summary(australia.hex)

#利用函数prcomp对给定数据集进行PCA
pca_model = h2o.prcomp(training_frame = australia.hex,
                       k = 8,
                       transform = "STANDARDIZE")

summary(pca_model)

barplot(as.numeric(pca_model@model$importance[2,]),
        main = "PCA model",
        xlab = "Pca component",
        ylab = "Proportion of Variance")
#可见，前两个主成分解释了大概70%的方差

#7.7 使用H2O建立自动编码器

#自动编码器是在没有高效编码控制的情况下进行学习的ANN
#自动编码器的目的是学习一组数据的编码，通常用于降维
#从架构上讲，最简单的自动编码器是一种高级且非循环的神经网络
#与MLP十分相似
#具有输入层、输出层以及一个或多个连接它们的隐含层
#但输出层具有与输入层相同数量的节点，用于重建输入

#根据流派对电影进行聚类

rm(list = ls(all = TRUE))
library(h2o)

setwd("C:\\Users\\lenovo\\Desktop\\R\\神经网络_R语言实现\\Chapter 07")

#Load the training dataset of movies
movies = read.csv("movies.csv", header = TRUE)
head(movies)

model = h2o.deeplearning(2:3,
                         training_frame = as.h2o(movies),
                         hidden = c(2),
                         autoencoder = T,
                         activation = "Tanh")

summary(model)

features = h2o.deepfeatures(model,
                            as.h2o(movies),
                            layer = 1)

d = as.matrix(features[1:10,])
labels = as.vector(movies[1:10,2])
plot(d, pch = 2)
text(d, labels, pos = 3)


#7.8 使用添加包darch检测乳腺癌
rm(list = ls(all = TRUE))
setwd("C:\\Users\\lenovo\\Desktop\\R\\神经网络_R语言实现\\Chapter 07")
#install_github("maddin79/darch")
#install.packages("darch")#无法使用，从github上下载
#library(devtools)
#install_github("maddin79/darch")
library(mlbench)
library(darch)

data(BreastCancer)
summary(BreastCancer)

data_cleaned <- na.omit(BreastCancer)
summary(data_cleaned)

model <- darch(Class ~ .,
               data_cleaned,
               layers = c(10,10,1),
               darch.numEpochs = 50,
               darch.stopClassErr = 0,
               retainData = T)

plot(model)

predictions <- predict(model, newdata = data_cleaned, type = "class")
cat(paste("Incorrect classifications:",
          sum(predictions != data_cleaned[,11])))
table(predictions,data_cleaned[,11])

library(gmodels)
CrossTable(x = data_cleaned$Class,
           y = predictions,
           prop.chisq = FALSE)

Accuracy = (443+238)/683
Accuracy
#可见分类器取得了很好的效果