Binary classification

Input filename: data.csv
Data postprocessing code:
Training/test set split: random
Training set condition: t<0
Split seed: 123
Split ratio: 0.7
Formula: y~.
Model predicts: probability
Classification probability threshold: 0.5
Random forest seed: 456
Baseline methodLogistic regressionRegression treeRandom forest
library(ROCR)
library(caTools)

# load data
data=read.csv('data.csv')
# split data into training/test set
set.seed(123)
split=sample.split(data$y,SplitRatio=0.7)
data.train=data[split,]
data.test=data[!split,]
# build model
data.model=mean(data.train$y)
# training set probability prediction
data.train.prob=rep_len(data.model,nrow(data.train))
# training set class prediction
data.train.class=+(data.train.prob>=0.5)
# training set accuracy
data.train.acc=mean(data.train$y==data.train.class)
# training set sensitivity
data.train.sen=mean(data.train.class[data.train$y==1]==1)
# training set specificity
data.train.spe=mean(data.train.class[data.train$y==0]==0)
# training set AUC
data.train.auc=performance(
	prediction(data.train.prob,data.train$y),'auc'
)@y.values[[1]]
# test set probability prediction
data.test.prob=rep_len(data.model,nrow(data.test))
# test set class prediction
data.test.class=+(data.test.prob>=0.5)
# test set accuracy
data.test.acc=mean(data.test$y==data.test.class)
# test set sensitivity
data.test.sen=mean(data.test.class[data.test$y==1]==1)
# test set specificity
data.test.spe=mean(data.test.class[data.test$y==0]==0)
# test set AUC
data.test.auc=performance(
	prediction(data.test.prob,data.test$y),'auc'
)@y.values[[1]]
library(ROCR)
library(caTools)

# load data
data=read.csv('data.csv')
# split data into training/test set
set.seed(123)
split=sample.split(data$y,SplitRatio=0.7)
data.train=data[split,]
data.test=data[!split,]
# build model
data.model=glm(y~.,data=data.train,family=binomial)
# training set probability prediction
data.train.prob=predict(data.model,type='response')
# training set class prediction
data.train.class=+(data.train.prob>=0.5)
# training set accuracy
data.train.acc=mean(data.train$y==data.train.class)
# training set sensitivity
data.train.sen=mean(data.train.class[data.train$y==1]==1)
# training set specificity
data.train.spe=mean(data.train.class[data.train$y==0]==0)
# training set AUC
data.train.auc=performance(
	prediction(data.train.prob,data.train$y),'auc'
)@y.values[[1]]
# test set probability prediction
data.test.prob=predict(data.model,newdata=data.test,type='response')
# test set class prediction
data.test.class=+(data.test.prob>=0.5)
# test set accuracy
data.test.acc=mean(data.test$y==data.test.class)
# test set sensitivity
data.test.sen=mean(data.test.class[data.test$y==1]==1)
# test set specificity
data.test.spe=mean(data.test.class[data.test$y==0]==0)
# test set AUC
data.test.auc=performance(
	prediction(data.test.prob,data.test$y),'auc'
)@y.values[[1]]
library(ROCR)
library(caTools)
library(rpart)

# load data
data=read.csv('data.csv')
# split data into training/test set
set.seed(123)
split=sample.split(data$y,SplitRatio=0.7)
data.train=data[split,]
data.test=data[!split,]
# build model
data.model=rpart(y~.,data=data.train)
# training set probability prediction
data.train.prob=predict(data.model)
# training set class prediction
data.train.class=+(data.train.prob>=0.5)
# training set accuracy
data.train.acc=mean(data.train$y==data.train.class)
# training set sensitivity
data.train.sen=mean(data.train.class[data.train$y==1]==1)
# training set specificity
data.train.spe=mean(data.train.class[data.train$y==0]==0)
# training set AUC
data.train.auc=performance(
	prediction(data.train.prob,data.train$y),'auc'
)@y.values[[1]]
# test set probability prediction
data.test.prob=predict(data.model,newdata=data.test)
# test set class prediction
data.test.class=+(data.test.prob>=0.5)
# test set accuracy
data.test.acc=mean(data.test$y==data.test.class)
# test set sensitivity
data.test.sen=mean(data.test.class[data.test$y==1]==1)
# test set specificity
data.test.spe=mean(data.test.class[data.test$y==0]==0)
# test set AUC
data.test.auc=performance(
	prediction(data.test.prob,data.test$y),'auc'
)@y.values[[1]]
library(ROCR)
library(caTools)
library(randomForest)

# load data
data=read.csv('data.csv')
# split data into training/test set
set.seed(123)
split=sample.split(data$y,SplitRatio=0.7)
data.train=data[split,]
data.test=data[!split,]
# build model
set.seed(456)
data.model=randomForest(y~.,data=data.train)
# training set probability prediction
data.train.prob=predict(data.model)
# training set class prediction
data.train.class=+(data.train.prob>=0.5)
# training set accuracy
data.train.acc=mean(data.train$y==data.train.class)
# training set sensitivity
data.train.sen=mean(data.train.class[data.train$y==1]==1)
# training set specificity
data.train.spe=mean(data.train.class[data.train$y==0]==0)
# training set AUC
data.train.auc=performance(
	prediction(data.train.prob,data.train$y),'auc'
)@y.values[[1]]
# test set probability prediction
data.test.prob=predict(data.model,newdata=data.test)
# test set class prediction
data.test.class=+(data.test.prob>=0.5)
# test set accuracy
data.test.acc=mean(data.test$y==data.test.class)
# test set sensitivity
data.test.sen=mean(data.test.class[data.test$y==1]==1)
# test set specificity
data.test.spe=mean(data.test.class[data.test$y==0]==0)
# test set AUC
data.test.auc=performance(
	prediction(data.test.prob,data.test$y),'auc'
)@y.values[[1]]