auto = read.table("auto.data",header=T,na.strings="?") auto$mpg01=rep(0,397) auto$mpg01[auto$mpg>median(auto$mpg)]=1 library(ISLR) library(MASS) library(class) train_bools <- (auto$year %% 2 == 0) train_data = auto[train_bools,] test_data = auto[!train_bools,] help(knn) help(knn) train <- rbind(iris3[1:25,,1], iris3[1:25,,2], iris3[1:25,,3]) test <- rbind(iris3[26:50,,1], iris3[26:50,,2], iris3[26:50,,3]) train test ?knn cl <- factor(c(rep("s",25), rep("c",25), rep("v",25))) cl length(cl) length(train) nrows(train) nrow(train) train.X train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] train.X test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] test.X train.X train.mpg01 = auto$mpg01[train_bools] train.mpg01 length(train.mpg01) nrow(train.X) knn(train.X,train.Y,train.mpg01,K=1) knn(train.X,train.Y,train.mpg01,k=1) knn(train.X,test.X,train.mpg01,k=1) train.X na.omit(train.X) ?na.omit na.omit(train.X) na.omit(train.X) knn(na.omit(train.X),test.X,train.mpg01,k=1) knn(na.omit(train.X),test.X,na.omit(train.mpg01),k=1) knn(na.omit(train.X),na.omit(test.X),na.omit(train.mpg01),k=1) train.mpg012 = na.omit(auto$mpg01)[train_bools] train.mpg012 train.mpg01 nrow(train) na.omit(auto) auto na.omit(auto) summary(auto) summary(na.omit(auto)) Auto = na.omit(auto) auto = na.omit(auto) ncol(auto) nrow(auto) auto <- na.omit(auto) train_bools <- (auto$year %% 2 == 0) train_data = auto[train_bools,] test_data = auto[!train_bools,] train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] train.mpg01 = auto$mpg01[train_bools] knn.pred = knn(train.X,test.X,train.mpg01,k=1) mean(knn.pred != auto$mpg01) mean(knn.pred != test_data$mpg01) knn.pred = knn(train.X,test.X,train.mpg01,k=2) mean(knn.pred != test_data$mpg01) knn.pred = knn(train.X,test.X,train.mpg01,k=3) mean(knn.pred != test_data$mpg01) knn.pred = knn(train.X,test.X,train.mpg01,k=4) mean(knn.pred != test_data$mpg0) knn.pred length(knn.pred) dim(knn.pred) length(test_data) ncol(test_data) nrow(test_data) q() qda.fit fit.qda qda.fit auto qda.fit = qda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data) import(MASS) qda.fit = qda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data) import(class) library(MASS) qda.fit = qda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data) qda.fit > mean(qda.pred$class!=test_data$mpg01,na.rm=T) mean(qda.pred$class!=test_data$mpg01,na.rm=T) qda.fit=qda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data) qda.pred=predict(qda.fit,test_data,na.rm=T) mean(qda.pred$class!=test_data$mpg01,na.rm=T) qda.fit=qda(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data) qda.pred=predict(qda.fit,test_data,na.rm=T) mean(qda.pred$class!=test_data$mpg01,na.rm=T) qda.fit qda.pred=predict(qda.fit,test_data,na.rm=T) mean(qda.pred$class!=test_data$mpg01,na.rm=T) glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial) glm.probs=predict(glm.fit,test_data,type="response") glm.pred=rep(0,199) glm.pred[glm.probs>.5]=1 mean(glm.pred!=test_data$mpg01) glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial) glm.probs=predict(glm.fit,test_data,type="response") glm.pred=rep(0,length(test_data) glm.pred[glm.probs>.5]=1 mean(glm.pred!=test_data$mpg01) glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial) glm.probs=predict(glm.fit,test_data,type="response") glm.pred=rep(0,length(test_data)) glm.pred[glm.probs>.5]=1 mean(glm.pred!=test_data$mpg01) glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial) glm.probs=predict(glm.fit,test_data,type="response") glm.pred=rep(0,length(test_data)) glm.pred[glm.probs>.5]=1 mean(glm.pred!=test_data$mpg01) glm.pred glm.pred=rep(0,length(test_data)) glm.pred test_data glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial) glm.probs=predict(glm.fit,test_data,type="response") glm.pred=rep(0,nrow(test_data)) glm.pred[glm.probs>.5]=1 mean(glm.pred!=test_data$mpg01) set.seed(1) auto <- na.omit(auto) train_bools <- (auto$year %% 2 == 0) train_data = auto[train_bools,] test_data = auto[!train_bools,] train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$cylinders)[train_bools,] test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$cylinders)[!train_bools,] train.mpg01 = auto$mpg01[train_bools] knn.pred = knn(train.X,test.X,train.mpg01,k=1) mean(knn.pred != test_data$mpg01) knn.pred = knn(train.X,test.X,train.mpg01,k=2) mean(knn.pred != test_data$mpg01) knn.pred = knn(train.X,test.X,train.mpg01,k=3) mean(knn.pred != test_data$mpg01) knn.pred = knn(train.X,test.X,train.mpg01,k=4) mean(knn.pred != test_data$mpg0) import(class) library(class) knn.pred = knn(train.X,test.X,train.mpg01,k=1) mean(knn.pred != test_data$mpg01) knn.pred = knn(train.X,test.X,train.mpg01,k=2) mean(knn.pred != test_data$mpg01) knn.pred = knn(train.X,test.X,train.mpg01,k=3) mean(knn.pred != test_data$mpg01) knn.pred = knn(train.X,test.X,train.mpg01,k=4) mean(knn.pred != test_data$mpg0) q() library(boot) library(MASS) library(ISLr) library(ISLR) data(Default) set.seed(45) fit.glm = glm(default ~ income + balance,Default) fit.glm = glm(default ~ income + balance,Default,family="binomial) fit.glm = glm(default ~ income + balance,Default,family="binomial") summary(glm) summary(fit.glm) fit.glm = glm(default ~ income + balance,Default) fit.glm = glm(default ~ income + balance,data = Default) fit.glm = glm(default ~ income + balance,Default,family="binomial") length(Default) length(Default$student) train.default1 = default[1:6001,] train.default1 = Default[1:6001,] train.default2 = Default[1:5001,] test.default1 = Default[!1:6001,] test.default1 head(test.default1) test.default1$student test.default1 = Default[6002:10000,] head(test.default1) test.default2 = Default[5002:10000,] fit.glm.default1 = glm.fit(default ~ income + balance,data=Default,family="binomial") fit.glm.default1 = glm.fit(default ~ income + balance,Default,family="binomial") fit.glm = glm(default ~ income + balance,Default,family="binomial") fit.glm.default1 = glm(default ~ income + balance,Default,family="binomial") fit.glm.default1 = glm(default ~ income + balance,train.default1,family="binomial") summary(fit.glm.default1) fit.glm.default2 = glm(default ~ income + balance,train.default2,family="binomial") fit.glm.default1 = glm(default ~ income + balance,train.default1,family="binomial") fit.glm.default2 = glm(default ~ income + balance,train.default2,family="binomial") fit.glm.default1.prob = predict(fit.glm.default1,test.default1,type="response") fit.glm.default1.prob fit.glm.default1.pred = rep("No",nrow(test.default1)) fit.glm.default1.pred fit.glm.default1.pred[fit.glm.default1.prob>0.5] = "Yes" fit.glm.default1.pred fit.glm.default1.pred[fit.glm.default1.prob>0.5] = "Yes"fit.glm.default1.pred = rep("No",nrow(test.default1)) table(fit.glm.default1.pred,test.default1) fit.glm.default1.pred[fit.glm.default1.prob>0.5] = "Yes" fit.glm.default1.pred test.default1 length(test.default1$student) length(fit.glm.default1.pred) table(fit.glm.default1.pred,test.default1$default) 1 - (3851+44)/(3851+90+14+44) fit.glm.default2.prob = predict(fit.glm.default2,test.default2,type="response") fit.glm.default2.pred = rep("No",nrow(test.default2)) fit.glm.default2.pred[fit.glm.default2.prob > 0.5] = "Yes" table(fit.glm.default.pred,test.default2$default) table(fit.glm.default2.pred,test.default2$default) 1 - (4818+52)/(4818+106+23+52) summary(fit.glm.default2) summary(fit.glm.default1) summary(fit.glm.default2) coefficients(fit.glm.default1) fit.glm.default1$coefficients fit.glm.default1$coefficients[1,2] fit.glm.default1$coefficients[1:2] fit.glm.default1$coefficients[2:3] boot.fn = function(Default,index){ model = glm(default ~ income + balance,Default,family="binomial",subset=index) fit.glm.default1$coefficients[2:3] } boot.fn(Default,c(14,5,79)) boot.fn(Default,c(14,5,79,324,6435,234)) boot.fn(Default,seq(15:7000)) boot.fn = function(Default,index){ model = glm(default ~ income + balance,Default,family="binomial",subset=index) model$coefficients[2:3] } boot.fn = function(Default,index){ boot.fn = function(Default,index){ model = glm(default ~ income + balance,Default,family="binomial",subset=index) model$coefficients[2:3] } boot.fn(Default,seq(15:7000)) boot.fn(Default,seq(15:3000)) boot.fn(Default,seq(15:3050)) boot.fn(Default,seq(15:3500)) set.seed(56) ?boot boot(Default,boot.fn,c(1:1000)) ?boot boot(Default,boot.fn,1000) 4.68/7.06 2.32/3.232 q()