TA的每日心情 | 开心 2015-7-28 13:36 |
---|
签到天数: 11 天 [LV.3]偶尔看看II
超级版主
- 积分
- 342
|
- <!--StartFragment -->
- <DIV>library(ROCR)
- qiediandata=read.csv("f:<IMG src="file:///C:/Users/tipdm/AppData/Local/Temp/%25W@GJ$ACOF(TYDYECOKVDYB.png">\\data\\model.csv")
- data=qiediandata[,3:6]
- head(data)
- set.seed(1) ##设置随机种子
- #数据集随机抽80%定义为训练数据集,20%为测试数据集
- int = sample(2, nrow(data), replace=TRUE, prob=c(0.2, 0.8))
- traindata <- data[int==2,]
- #nrow(traindata)/nrow(data)
- testdata <- data[int==1,]
- #cart 决策树
- library(tree)
- #tree.model <- tree(traindata[,4]~., data=traindata)
- tree.model <- tree(是否窃漏电~., data=traindata)
- #tree.model <- tree(traindata[,4]~traindata[,1]+traindata[,2]+traindata[,3], data=traindata)
- summary(tree.model)
- #第四列作为因变量,其他作为自变量 波浪号后面有个点
- traindata[,4]=as.factor(traindata[,4])
- #强制类型转化,使得可以用traindata[,4]
- predict(tree.model,type="class")
- tree.model$y
- newtree.model <- tree(是否窃漏电~., data=traindata)
- newtree.model
- predict(newtree.model)
- newtree.predictdata=predict(newtree.model,type="class")
- # 这两句的区别在于直接给出两个类型
- conf=table(act=traindata[,4],pre=newtree.predictdata)
- #实际和预测的比较,混淆矩阵
- sum(diag(conf))/sum(conf)
- plot(tree.model)
- text(tree.model)
- testpr=predict(newtree.model,newdata=testdata,type="class")
- #测试算法好不好,预测剩下的测试数据
- righ=table(act=as.factor(testdata[,4]),testpr)
- righ
- #ROC曲线
- library(ROCR)
- prefor<-prediction(predict(newtree.model,testdata)[,2],testdata$是否窃漏电)
- prefor
- performan<-performance(prefor,"tpr","fpr")
- #tpr:true positive rate,fpr:false positive rate
- performan
- plot(performan)
- </DIV>
- library(ROCR)
- qiediandata=read.csv("f:\\data\\model.csv")
- data=qiediandata[,3:6]
- head(data)
- set.seed(1) ##设置随机种子
- #数据集随机抽80%定义为训练数据集,20%为测试数据集
- int = sample(2, nrow(data), replace=TRUE, prob=c(0.2, 0.8))
- traindata <- data[int==2,]
- #nrow(traindata)/nrow(data)
- testdata <- data[int==1,]
- #cart 决策树
- library(tree)
- #tree.model <- tree(traindata[,4]~., data=traindata)
- tree.model <- tree(是否窃漏电~., data=traindata)
- #tree.model <- tree(traindata[,4]~traindata[,1]+traindata[,2]+traindata[,3], data=traindata)
- summary(tree.model)
- #第四列作为因变量,其他作为自变量 波浪号后面有个点
- traindata[,4]=as.factor(traindata[,4])
- #强制类型转化,使得可以用traindata[,4]
- predict(tree.model,type="class")
- tree.model$y
- newtree.model <- tree(是否窃漏电~., data=traindata)
- newtree.model
- predict(newtree.model)
- newtree.predictdata=predict(newtree.model,type="class")
- # 这两句的区别在于直接给出两个类型
- conf=table(act=traindata[,4],pre=newtree.predictdata)
- #实际和预测的比较,混淆矩阵
- sum(diag(conf))/sum(conf)
- plot(tree.model)
- text(tree.model)
- testpr=predict(newtree.model,newdata=testdata,type="class")
- #测试算法好不好,预测剩下的测试数据
- righ=table(act=as.factor(testdata[,4]),testpr)
- righ
- #ROC曲线
- library(ROCR)
- prefor<-prediction(predict(newtree.model,testdata)[,2],testdata$是否窃漏电)
- prefor
- performan<-performance(prefor,"tpr","fpr")
- #tpr:true positive rate,fpr:false positive rate
- performan
- plot(performan)
复制代码
搜索
|
|