fixed errors

This commit is contained in:
caes 2017-02-09 23:12:09 -05:00
parent 561b57c2c9
commit 970e6e36d5
4 changed files with 109 additions and 20 deletions

Binary file not shown.

View File

@ -77,3 +77,81 @@ length(test_data)
ncol(test_data) ncol(test_data)
nrow(test_data) nrow(test_data)
q() q()
qda.fit
fit.qda
qda.fit
auto
qda.fit = qda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data)
import(MASS)
qda.fit = qda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data)
import(class)
library(MASS)
qda.fit = qda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data)
qda.fit
> mean(qda.pred$class!=test_data$mpg01,na.rm=T)
mean(qda.pred$class!=test_data$mpg01,na.rm=T)
qda.fit=qda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data)
qda.pred=predict(qda.fit,test_data,na.rm=T)
mean(qda.pred$class!=test_data$mpg01,na.rm=T)
qda.fit=qda(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data)
qda.pred=predict(qda.fit,test_data,na.rm=T)
mean(qda.pred$class!=test_data$mpg01,na.rm=T)
qda.fit
qda.pred=predict(qda.fit,test_data,na.rm=T)
mean(qda.pred$class!=test_data$mpg01,na.rm=T)
glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial)
glm.probs=predict(glm.fit,test_data,type="response")
glm.pred=rep(0,199)
glm.pred[glm.probs>.5]=1
mean(glm.pred!=test_data$mpg01)
glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial)
glm.probs=predict(glm.fit,test_data,type="response")
glm.pred=rep(0,length(test_data)
glm.pred[glm.probs>.5]=1
mean(glm.pred!=test_data$mpg01)
glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial)
glm.probs=predict(glm.fit,test_data,type="response")
glm.pred=rep(0,length(test_data))
glm.pred[glm.probs>.5]=1
mean(glm.pred!=test_data$mpg01)
glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial)
glm.probs=predict(glm.fit,test_data,type="response")
glm.pred=rep(0,length(test_data))
glm.pred[glm.probs>.5]=1
mean(glm.pred!=test_data$mpg01)
glm.pred
glm.pred=rep(0,length(test_data))
glm.pred
test_data
glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial)
glm.probs=predict(glm.fit,test_data,type="response")
glm.pred=rep(0,nrow(test_data))
glm.pred[glm.probs>.5]=1
mean(glm.pred!=test_data$mpg01)
set.seed(1)
auto <- na.omit(auto)
train_bools <- (auto$year %% 2 == 0)
train_data = auto[train_bools,]
test_data = auto[!train_bools,]
train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$cylinders)[train_bools,]
test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$cylinders)[!train_bools,]
train.mpg01 = auto$mpg01[train_bools]
knn.pred = knn(train.X,test.X,train.mpg01,k=1)
mean(knn.pred != test_data$mpg01)
knn.pred = knn(train.X,test.X,train.mpg01,k=2)
mean(knn.pred != test_data$mpg01)
knn.pred = knn(train.X,test.X,train.mpg01,k=3)
mean(knn.pred != test_data$mpg01)
knn.pred = knn(train.X,test.X,train.mpg01,k=4)
mean(knn.pred != test_data$mpg0)
import(class)
library(class)
knn.pred = knn(train.X,test.X,train.mpg01,k=1)
mean(knn.pred != test_data$mpg01)
knn.pred = knn(train.X,test.X,train.mpg01,k=2)
mean(knn.pred != test_data$mpg01)
knn.pred = knn(train.X,test.X,train.mpg01,k=3)
mean(knn.pred != test_data$mpg01)
knn.pred = knn(train.X,test.X,train.mpg01,k=4)
mean(knn.pred != test_data$mpg0)
q()

View File

@ -196,7 +196,7 @@ Part B: Choose one of Questions 10 or 11
> qda.fit > qda.fit
Call: Call:
lda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data) qda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data)
Prior probabilities of groups: Prior probabilities of groups:
0 1 0 1
@ -207,16 +207,11 @@ Part B: Choose one of Questions 10 or 11
0 131.96939 3579.827 6.755102 268.4082 0 131.96939 3579.827 6.755102 268.4082
1 77.96429 2313.598 4.071429 111.7188 1 77.96429 2313.598 4.071429 111.7188
Coefficients of linear discriminants: > qda.pred=predict(qda.fit,test_data,na.rm=T)
LD1
horsepower 0.0060634365
weight -0.0011442212
cylinders -0.6390942259
displacement 0.0004517291
***Test Data Error Rate: ***Test Data Error Rate:
> mean(qda.pred$class!=test_data$mpg01,na.rm=T) > mean(qda.pred$class!=test_data$mpg01,na.rm=T)
[1] 0.1428571 [1] 0.1483516
@ -227,12 +222,12 @@ Part B: Choose one of Questions 10 or 11
> glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial) > glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial)
> glm.probs=predict(glm.fit,test_data,type="response") > glm.probs=predict(glm.fit,test_data,type="response")
> glm.pred=rep(0,199) > glm.pred=rep(0,nrow(test_data))
> glm.pred[glm.probs>.5]=1 > glm.pred[glm.probs>.5]=1
***Test Data Error Rate: ***Test Data Error Rate:
> mean(glm.pred!=test_data$mpg01) > mean(glm.pred!=test_data$mpg01)
[1] 0.1407035 [1] 0.1373626
────────────────────────────────────────────────────────────────────────── ──────────────────────────────────────────────────────────────────────────
@ -250,23 +245,29 @@ Part B: Choose one of Questions 10 or 11
> train_data = auto[train_bools,] > train_data = auto[train_bools,]
> test_data = auto[!train_bools,] > test_data = auto[!train_bools,]
> train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] > train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$cylinders)[train_bools,]
> test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] > test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$cylinders)[!train_bools,]
> train.mpg01 = auto$mpg01[train_bools] > train.mpg01 = auto$mpg01[train_bools]
***Test Data Error Rates: ***Test Data Error Rates:
k = 1 k = 1
> knn.pred = knn(train.X,test.X,train.mpg01,k=1)
> mean(knn.pred != test_data$mpg01) > mean(knn.pred != test_data$mpg01)
[1] 0.1483516 [1] 0.1483516
k = 2 k = 2
> knn.pred = knn(train.X,test.X,train.mpg01,k=2)
> mean(knn.pred != test_data$mpg01) > mean(knn.pred != test_data$mpg01)
[1] 0.1593407 [1] 0.1593407
k = 3 k = 3
> knn.pred = knn(train.X,test.X,train.mpg01,k=3)
> mean(knn.pred != test_data$mpg01) > mean(knn.pred != test_data$mpg01)
[1] 0.1648352 [1] 0.1648352
k = 4 k = 4
> knn.pred = knn(train.X,test.X,train.mpg01,k=4)
> mean(knn.pred != test_data$mpg0) > mean(knn.pred != test_data$mpg0)
[1] 0.1813187 [1] 0.1923077
k = 1 looks like the best, since the error rate increases with k. k = 1 looks like the best, since the error rate increases with k.

View File

@ -22,13 +22,13 @@ lda.fit=lda(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_da
lda.pred=predict(lda.fit, test_data) lda.pred=predict(lda.fit, test_data)
mean(lda.pred$class!=test_data$mpg01,na.rm=T) mean(lda.pred$class!=test_data$mpg01,na.rm=T)
qda.fit=qda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data) qda.fit=qda(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data)
qda.pred=predict(qda.fit,test_data,na.rm=T) qda.pred=predict(qda.fit,test_data,na.rm=T)
mean(qda.pred$class!=test_data$mpg01,na.rm=T) mean(qda.pred$class!=test_data$mpg01,na.rm=T)
glm.fit=glm(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data,family=binomial) glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial)
glm.probs=predict(glm.fit,test_data,type="response") glm.probs=predict(glm.fit,test_data,type="response")
glm.pred=rep(0,199) glm.pred=rep(0,nrow(test_data))
glm.pred[glm.probs>.5]=1 glm.pred[glm.probs>.5]=1
mean(glm.pred!=test_data$mpg01) mean(glm.pred!=test_data$mpg01)
@ -39,8 +39,18 @@ train_data = auto[train_bools,]
test_data = auto[!train_bools,] test_data = auto[!train_bools,]
train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$cylinders)[train_bools,]
test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$cylinders)[!train_bools,]
train.mpg01 = auto$mpg01[train_bools] train.mpg01 = auto$mpg01[train_bools]
knn.pred = knn(train.X,test.X,train.mpg01,k=1) knn.pred = knn(train.X,test.X,train.mpg01,k=1)
mean(knn.pred != test_data$mpg01)
knn.pred = knn(train.X,test.X,train.mpg01,k=2)
mean(knn.pred != test_data$mpg01)
knn.pred = knn(train.X,test.X,train.mpg01,k=3)
mean(knn.pred != test_data$mpg01)
knn.pred = knn(train.X,test.X,train.mpg01,k=4)
mean(knn.pred != test_data$mpg0)