-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathBlogFeedbackPrediction1.R
More file actions
178 lines (106 loc) · 4.12 KB
/
BlogFeedbackPrediction1.R
File metadata and controls
178 lines (106 loc) · 4.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
A <- read.csv('/Users/shashankkumar/Studies/SecondSem/ML/Assignments/Assignment2/Dataset/BlogFeedback/blogData_train.csv',header=FALSE,sep=",",)
#Data Preparation for Experiment
#Setting the column names across the dataset
for(i in 1:281){names(A)[ncol(A)] <- paste0("V", i)}
# Correlation Matrix
cormat <- round(cor(basic_feature),2)
cormat
'''
We will remove V55 and V60 features as they are derived from other columns of the dataset.
'''
data<-A[sample(1:nrow(A), 5000,replace=FALSE),]
target<-data[281]
#basic features
Ax<-data[c("V51","V52","V53","V54","V56","V57","V58","V59")]
scaled.A <- scale(Ax,center=TRUE)
# Changing target variable to 0 and 1
target$V281<-ifelse(target$V281>0, 1,0)
Ay <-target
basic_feature = cbind(scaled.A, Ay)
#Splitting data into train and test
# Set Seed so that same sample can be reproduced in future also
set.seed(101)
# Now Selecting 80% of data as sample from total 'n' rows of the data
sample <- sample.int(n = nrow(basic_feature), size = floor(.8*nrow(data)), replace = F)
train <- basic_feature[sample, ]
test <- basic_feature[-sample, ]
#Adhoc solution
LogisticRegression_Model_Exp1 <- glm(formula=V281~.,data=train,family = binomial(link = 'logit'))
summary(LogisticRegression_Model_Exp1)
Log_pred_on_Train<-predict(LogisticRegression_Model_Exp1,train,se.fit=TRUE)
Log_pred_on_Test<-predict(LogisticRegression_Model_Exp1,test,se.fit=TRUE)
#Calculating rss ans mse using adhoc solution
train_residual = target-Log_pred_on_Train$fit
dim(train_residual)
test_residual = target-Log_pred_on_Test$fit
dim(test_residual)
train_mse_adhoc=mean(train_residual^2)
train_rss_adhoc=sum(train_residual^2)
test_mse_adhoc=mean(test_residual^2)
test_rss_adhoc=sum(test_residual^2)
#train_rss=sum((target-Log_pred_on_Train$fit)^2)
#test_rss=sum((target-Log_pred_on_Test$fit)^2)
print(paste0("rss on train data for adhoc solution Logistic regression is ", train_rss_adhoc))
print(paste0("rss on test data for adhoc solution Logistic regression is ", test_rss_adhoc))
print(paste0("mse on train data for adhoc solution Logistic regression is ", train_mse_adhoc))
print(paste0("mse on test data for adhoc solution Logistic regression is ", test_mse_adhoc))
#Separating features and target
train_X=train[,1:ncol(train)-1]
train_X
train_Y=train[,ncol(train)]
test_X=test[,1:ncol(test)-1]
test_X
test_Y=test[,ncol(test)]
# convert data to matrix form so that it can consumed by R-mosek
train_X=as.matrix(train_X)
train_Y=as.matrix(train_Y)
test_X=as.matrix(test_X)
test_Y=as.matrix(test_Y)
#Ordinary Least Squares# Minimizing (actual -predicted)^2
obj.func<-function(X,y, verb=1){
#number of parameters of interest
noOfFeatures<-dim(X)[2]
#coefficients=(inverse(X'X))(X'y)
X.square.inv<-solve(t(X)%*% X)
#objective coefficients
c<-as.vector(X.square.inv%*%(t(X)%*%y))
#problem definition in Mosek
#initializing list
qp<-list()
#Problem objective is to minimize
qp$sense<-"min"
#objective coefficients
qp$c<-c
#Constraint Matrix
qp$A<-Matrix(as.matrix(X))
blc<-rep(0,dim(X)[1])
buc<-rep(Inf,dim(X)[1])
qp$bc<-rbind(blc,buc)
blx<-c(min(X[,1]),min(X[,2]),min(X[,3]),min(X[,4]),min(X[,5]),min(X[,6]),min(X[,7]),min(X[,8]))
bux<-c(max(X[,1]),max(X[,2]),max(X[,3]),max(X[,4]),max(X[,5]),max(X[,6]),max(X[,7]),max(X[,8]))
qp$bx<-rbind(blx,bux)
qp$bc<-rbind(blc, buc) #constraint bounds
#Solving with mosek solver
result<-mosek(qp, opts = list(verbose = verb))
return(result)
}
mosek_result=obj.func(train_X, train_Y)
mosek_result
#Creating the equation using the coefficients obtained from solving the optimizing problem using rmosek
y_train_pred_mosek= train_X %*% mosek_result$sol$bas$xx
typeof(mosek_result$sol$bas$xx)
head(y_train_pred_mosek)
y_test_pred_mosek= test_X %*% mosek_result$sol$bas$xx
dim(test_X)
dim(mosek_result$sol$bas$xx)
head(y_test_pred_mosek)
train_residual = train_Y-y_train_pred_mosek
test_residual = test_Y-y_test_pred_mosek
train_mse_mosek=mean(train_residual^2)
train_rss_mosek=sum(train_residual)
train_mse_mosek
train_rss_mosek
test_mse_mosek=mean(test_residual^2)
test_rss_mosek=sum(test_residual)
test_mse_mosek
test_rss_mosek