diff --git a/hw3/.RData b/hw3/.RData index 75b0d6a..97b22f9 100644 Binary files a/hw3/.RData and b/hw3/.RData differ diff --git a/hw3/.Rhistory b/hw3/.Rhistory index 3a641a7..ee5aad7 100644 --- a/hw3/.Rhistory +++ b/hw3/.Rhistory @@ -77,3 +77,81 @@ length(test_data) ncol(test_data) nrow(test_data) q() +qda.fit +fit.qda +qda.fit +auto +qda.fit = qda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data) +import(MASS) +qda.fit = qda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data) +import(class) +library(MASS) +qda.fit = qda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data) +qda.fit +> mean(qda.pred$class!=test_data$mpg01,na.rm=T) +mean(qda.pred$class!=test_data$mpg01,na.rm=T) +qda.fit=qda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data) +qda.pred=predict(qda.fit,test_data,na.rm=T) +mean(qda.pred$class!=test_data$mpg01,na.rm=T) +qda.fit=qda(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data) +qda.pred=predict(qda.fit,test_data,na.rm=T) +mean(qda.pred$class!=test_data$mpg01,na.rm=T) +qda.fit +qda.pred=predict(qda.fit,test_data,na.rm=T) +mean(qda.pred$class!=test_data$mpg01,na.rm=T) +glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial) +glm.probs=predict(glm.fit,test_data,type="response") +glm.pred=rep(0,199) +glm.pred[glm.probs>.5]=1 +mean(glm.pred!=test_data$mpg01) +glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial) +glm.probs=predict(glm.fit,test_data,type="response") +glm.pred=rep(0,length(test_data) +glm.pred[glm.probs>.5]=1 +mean(glm.pred!=test_data$mpg01) +glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial) +glm.probs=predict(glm.fit,test_data,type="response") +glm.pred=rep(0,length(test_data)) +glm.pred[glm.probs>.5]=1 +mean(glm.pred!=test_data$mpg01) +glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial) +glm.probs=predict(glm.fit,test_data,type="response") +glm.pred=rep(0,length(test_data)) +glm.pred[glm.probs>.5]=1 +mean(glm.pred!=test_data$mpg01) +glm.pred +glm.pred=rep(0,length(test_data)) +glm.pred +test_data +glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial) +glm.probs=predict(glm.fit,test_data,type="response") +glm.pred=rep(0,nrow(test_data)) +glm.pred[glm.probs>.5]=1 +mean(glm.pred!=test_data$mpg01) +set.seed(1) +auto <- na.omit(auto) +train_bools <- (auto$year %% 2 == 0) +train_data = auto[train_bools,] +test_data = auto[!train_bools,] +train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$cylinders)[train_bools,] +test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$cylinders)[!train_bools,] +train.mpg01 = auto$mpg01[train_bools] +knn.pred = knn(train.X,test.X,train.mpg01,k=1) +mean(knn.pred != test_data$mpg01) +knn.pred = knn(train.X,test.X,train.mpg01,k=2) +mean(knn.pred != test_data$mpg01) +knn.pred = knn(train.X,test.X,train.mpg01,k=3) +mean(knn.pred != test_data$mpg01) +knn.pred = knn(train.X,test.X,train.mpg01,k=4) +mean(knn.pred != test_data$mpg0) +import(class) +library(class) +knn.pred = knn(train.X,test.X,train.mpg01,k=1) +mean(knn.pred != test_data$mpg01) +knn.pred = knn(train.X,test.X,train.mpg01,k=2) +mean(knn.pred != test_data$mpg01) +knn.pred = knn(train.X,test.X,train.mpg01,k=3) +mean(knn.pred != test_data$mpg01) +knn.pred = knn(train.X,test.X,train.mpg01,k=4) +mean(knn.pred != test_data$mpg0) +q() diff --git a/hw3/answers b/hw3/answers index a4e2011..56b2827 100644 --- a/hw3/answers +++ b/hw3/answers @@ -196,7 +196,7 @@ Part B: Choose one of Questions 10 or 11 > qda.fit Call: - lda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data) + qda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data) Prior probabilities of groups: 0 1 @@ -206,17 +206,12 @@ Part B: Choose one of Questions 10 or 11 horsepower weight cylinders displacement 0 131.96939 3579.827 6.755102 268.4082 1 77.96429 2313.598 4.071429 111.7188 - - Coefficients of linear discriminants: - LD1 - horsepower 0.0060634365 - weight -0.0011442212 - cylinders -0.6390942259 - displacement 0.0004517291 + + > qda.pred=predict(qda.fit,test_data,na.rm=T) ***Test Data Error Rate: > mean(qda.pred$class!=test_data$mpg01,na.rm=T) - [1] 0.1428571 + [1] 0.1483516 @@ -227,12 +222,12 @@ Part B: Choose one of Questions 10 or 11 > glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial) > glm.probs=predict(glm.fit,test_data,type="response") - > glm.pred=rep(0,199) + > glm.pred=rep(0,nrow(test_data)) > glm.pred[glm.probs>.5]=1 ***Test Data Error Rate: > mean(glm.pred!=test_data$mpg01) - [1] 0.1407035 + [1] 0.1373626 ────────────────────────────────────────────────────────────────────────── @@ -249,24 +244,30 @@ Part B: Choose one of Questions 10 or 11 > train_bools <- (auto$year %% 2 == 0) > train_data = auto[train_bools,] > test_data = auto[!train_bools,] - - > train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] - > test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] + + > train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$cylinders)[train_bools,] + > test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$cylinders)[!train_bools,] > train.mpg01 = auto$mpg01[train_bools] + ***Test Data Error Rates: k = 1 + > knn.pred = knn(train.X,test.X,train.mpg01,k=1) > mean(knn.pred != test_data$mpg01) [1] 0.1483516 k = 2 + > knn.pred = knn(train.X,test.X,train.mpg01,k=2) > mean(knn.pred != test_data$mpg01) [1] 0.1593407 k = 3 + > knn.pred = knn(train.X,test.X,train.mpg01,k=3) > mean(knn.pred != test_data$mpg01) [1] 0.1648352 k = 4 + > knn.pred = knn(train.X,test.X,train.mpg01,k=4) > mean(knn.pred != test_data$mpg0) - [1] 0.1813187 + [1] 0.1923077 + k = 1 looks like the best, since the error rate increases with k. diff --git a/hw3/commands b/hw3/commands index 6991c04..9505819 100644 --- a/hw3/commands +++ b/hw3/commands @@ -22,13 +22,13 @@ lda.fit=lda(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_da lda.pred=predict(lda.fit, test_data) mean(lda.pred$class!=test_data$mpg01,na.rm=T) -qda.fit=qda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data) +qda.fit=qda(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data) qda.pred=predict(qda.fit,test_data,na.rm=T) mean(qda.pred$class!=test_data$mpg01,na.rm=T) -glm.fit=glm(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data,family=binomial) +glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial) glm.probs=predict(glm.fit,test_data,type="response") -glm.pred=rep(0,199) +glm.pred=rep(0,nrow(test_data)) glm.pred[glm.probs>.5]=1 mean(glm.pred!=test_data$mpg01) @@ -39,8 +39,18 @@ train_data = auto[train_bools,] test_data = auto[!train_bools,] -train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] -test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] +train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$cylinders)[train_bools,] +test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$cylinders)[!train_bools,] train.mpg01 = auto$mpg01[train_bools] knn.pred = knn(train.X,test.X,train.mpg01,k=1) +mean(knn.pred != test_data$mpg01) + +knn.pred = knn(train.X,test.X,train.mpg01,k=2) +mean(knn.pred != test_data$mpg01) + +knn.pred = knn(train.X,test.X,train.mpg01,k=3) +mean(knn.pred != test_data$mpg01) + +knn.pred = knn(train.X,test.X,train.mpg01,k=4) +mean(knn.pred != test_data$mpg0)