auto = read.table("auto.data",header=T,na.strings="?") length(x=auto$mpg) glm glm.pred help(rep) glm.pred=rep(FALSE,397) glm.pred medium(auto$mpg) median(auto$mpg) glm.pred[auto$mpg>median(auto$mpg)]=T glm.pred contour(auto) contour(glm.pred ~ auto$mpg) contour(glm.pred,auto$mpg) help(contour) contour(auto$mpg,auto$horsepower,glm.pred) glm.pred length(glm.pred) table(glm.pred,auto$mpg) table(glm.pred,auto$mpg,auto$horsepower) glm.pred=rep(0,397) glm.pred[auto$mpg>median(auto$mpg)]=1 glm.pred auto$mpg01=rep(0,397) auto$mpg01[auto$mpg>median(auto$mpg)]=1 auto$mpg01 auto$mpg01 auto$mpg01 plots(auto) plot(auto) boxplot(auto) boxplot.matrix(auto) help(boxplot) boxplot(auto$mpg01,auto) boxplot(auto$mpg,auto) boxplot(auto$mpg) boxplot(auto) boxplot(mpg01 ~ auto) boxplot(mpg01 ~) boxplot(auto$mpg01 ~ auto) attach(auto) boxplot(mpg01) boxplot(mpg01 ~ auto) boxplot(mpg01 ~ auto,auto) boxplot(mpg01 ~ auto,data = auto) help(plot.table) plot.table(auto) help(plot.table) plot(auto) plot(auto,t="box") help(plot.table) help(plot.table,plot.frame=1) help(plot.table) help(plot.table,frame.plot=1) help(plot.table) help(plot.table,frame.plot=is.num) help(plot.table) plot(auto,t="box",frame.plot=1) plot(auto,frame.plot=1) plot(auto,frame.plot=1) plot(auto,frame.plot=is.num) plot(auto,frame.plot=0) plot(auto,frame.plot="0") plot(auto,frame.plot="1") plot(auto,frame.plot=TRUE) plot(auto,frame.plot=FALSE) plot(auto,frame.plot=TRUE) plot(auto,frame.plot=T) plot(auto,frame.plot=1) boxplot(mpg~mpg01,auto) boxplot(mpg01 ~ mpg,auto) boxplot(mpg01 ~ *,auto) boxplot(mpg01 ~ ,auto) boxplot(mpg01 ~ auto,auto) boxplot(mpg01,auto) boxplot(auto) boxplot(auto,y=mpg01) boxplot(auto,y=mpg) boxplot(data = auto) boxplot(auto) help(for) plot(auto,frame.plot=1) plot(auto) names(auto) auto$name help(sample) x <- 1:12 x sample(x) help(sample) sample(x,replace=T) sample(x,replace=T) sample(x,replace=F) c x sample(x,replace=T) x help(sample) sample(x[x>9]) sample(x[x>8]) help(sample) x <- 1:10 sample(x[x>8]) sample(x[x>]) help(sample) help(sample) help(sample) sample(auto,size=length(mpg01)/2) x <- length(mpg01) sample(x,size=length(mpg01)/2) auto[sample(x,size=length(mpg01)/2)] auto$mpg[sample(x,size=length(mpg01)/2)] help(data.frame) data.frame( help(data.frame) auto[sample(x,size=length(mpg01)/2)] train = sample(x,size=length(mpg01)/2) train = auto[train] auto$mpg[train] auto$mpg[train,] auto$mpg[train] auto$mpg[23] auto$mpg[228] auto$mpg[391] auto.test=auto[!train] auto.train=auto[train] auto.test summary(auto.test) train=(mpg<15) train train = (sample(x,size=length(mpg01)/2)) train head(auto) auto[,train[ auto[,train] train help(contains) auto[1,train] train auto[[,train]] auto[[1,train]] autoi head(auto) head(auto[sample(nrow(auto),397/2)]) head(auto[sample(nrow(auto),3)]) data = data.frame(auto) data head(data[sample(nrow(data),3)]) nrow(data) head(data[sample(ncol(data),3)]) head(data[sample(ncol(data),397/2)]) head(data[sample(ncol(data),3)]) head(data[sample(ncol(data),3)]) head(data[sample(ncol(data),3)]) head(data[sample(ncol(data),3)]) head(data[,sample(ncol(data),3)]) head(data[,sample(ncol(data),3)]) head(data[,sample(ncol(data),3)]) head(data[,sample(ncol(data),3)]) head(data[,sample(ncol(data),3)]) head(data[sample(ncol(data),3),]) head(data[sample(ncol(data),3),]) head(data[sample(ncol(data),3),]) head(data[sample(nrow(data),3),]) head(data[sample(nrow(data),397/2),]) head(data[sample(nrow(data),397/2),]) head(data[sample(nrow(data),397/2),]) head(data[sample(nrow(data),397/2),]) head(data[sample(nrow(data),397/2),]) head(auto[sample(nrow(auto),397/2),]) head(auto[sample(nrow(auto),397/2),]) head(auto[sample(nrow(auto),397/2),]) head(auto[sample(nrow(auto),397/2),]) head(auto[sample(nrow(auto),397/2),]) head(auto[sample(nrow(auto),397/2),]) head(auto[sample(nrow(auto),397/2),]) train = auto[sample(nrow(auto),397/2),] [sample(nrow(auto),397/2),] sample(nrow(auto),397/2) train sample(nrow(auto),397/2) train = sample(nrow(auto),397/2) autp[train,] auto[train,] train = sample(nrow(auto),397/2) head(auto[train,]) head(auto[!train,]) traindata = auto[train,] testdata = auto[!train,] testdata traindata length(traindata) length(traindata$mpg) 198*2 summary(testdata) testdata = auto[!train] testdata testdata = auto[!train,] train summary(train) names(train) head(traindata) testdata = auto[!train,] testdata !train train ?sample sort(train) train_vals = train train = rep(false,397) train = rep(F,397) train help for ?for ?for help)for) help(for) help(for) help lapply() ?lapply sapply(train, ?sapply sapply(train, ?sapply train[train_vals]=T train traindata = auto[train,] traindata length(auto) length(traindata) length(traindata$mpg) testdata=auto[!train,] length(testdate$mpg) length(testdata$mpg) training_indices = sample(nrow(auto),397/2) train_bools = rep(F,length(auto$mpg)) train_bools[training_indices]=T head(train_bools) length(train_bools) train_data = auto[train_bools,] test_data = auto[!train_bools,] summary(train_data) summary(test_data) lda.fit library(MASS) lda.fit lda() detach(auto) mpg01 mpg attach(test_data) mpg01 names() names(test_data) ldf.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=test_data) detach(test_data) ldf.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=test_data) lda.fit lda.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=test_data) lda.fit summary(lda.fit) coefficients(lda.fit) plot(lda.fit) lda.pred=predict(lda.fit,test_data) lda.pred=predict(lda.fit, !training_bools) lda.pred=predict(lda.fit, !training_indices) test_data lda.pred=predict(lda.fit, test_data) lda.pred plot(lda.pred) names(lda.pred) lda.class=lda.pres$class lda.class=lda.pred$class table(lda.class,testdata) table(lda.class,test_data) length(lda.class) length(test_data) table(lda.class,test_data$mpg01) mean(lda.class==test_data$mpg01) sum(lda.pred$posterior[,1]>=.5) sum(lda.pred$posterior[,1]<.5) lda.pred$posterior[,1] sum(lda.pred$posterior<.5) lda.pred$posterior lda.pred$posterior<5 lda.pred$posterior<.5 sum(lda.pred$posterior<.5) sum(lda.pred$posterior<.5[,1]) sum(lda.pred$posterior<.5[1]) sum(lda.pred$posterior<.5[2]) lda.pred$posterior<.5[2] lda.pred$posterior<.5 lda.pred$posterior lda.pred$posterior[,1] lda.pred$posterior[1,] lda.pred$posterior[,2] lda.pred$posterior[,1] lda.pred$posterior[,1]>.5 sum(lda.pred$posterior[,1]>.5) sum.bool(lda.pred$posterior[,1]>.5) ?sum sum.bool(lda.pred$posterior[,1]>.5,na.rm=T) sum(lda.pred$posterior[,1]>.5,na.rm=T) sum(lda.pred$posterior[,1]>.5) sum(lda.pred$posterior[,1]>.5,na.rm=T) sum(lda.pred$posterior[,1]>=.5,na.rm=T) sum(lda.pred$posterior[,1]<.5,na.rm=T) mean(lda.pred$[,1]==test_data,na.rm=T) lda.pred lda.pred$class lda.pred$class==test_data$mpg01 mean(lda.pred$class==test_data$mpg01,na.rm=T) mean(lda.pred$class!=test_data$mpg01,na.rm=T) lda.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data) lda.fit mean(lda.pred$class==test_data$mpg01,na.rm=T) lda.pred=predict(lda.fit, test_data) mean(lda.pred$class==test_data$mpg01,na.rm=T) mean(lda.pred$class!=test_data$mpg01,na.rm=T) train_data == test_data train_data$mpg01 == test_data$mpg01 lda.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data) lda.pred=predict(lda.fit, test_data) mean(lda.pred$class!=test_data$mpg01,na.rm=T) lda.pred lda.pred$posterior[,1] summary(lda.fit) lda.fit lda.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=test_data) lda.fit mean(lda.pred$class!=test_data$mpg01,na.rm=T) lda.pred=predict(lda.fit, test_data) mean(lda.pred$class!=test_data$mpg01,na.rm=T) head(lda.pred) lda.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data) lda.pred=predict(lda.fit, test_data) head(lda.pred) mean(lda.pred$class!=test_data$mpg01,na.rm=T) qda.fit=qda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data) qda.fit qda.class=predict(qda.fit,test_data)$class qda.class=predict(qda.fit,test_data,na.rm=T)$class qda.class=predict(qda.fit,test_data)$class qda.class mean(qda.pred$class!=test_data$mpg01,na.rm=T) qda.pred=predict(qda.fit,test_data) qda.pred=predict(qda.fit,test_data,na.rm=T) mean(qda.pred$class!=test_data$mpg01,na.rm=T) glm.fit=glm(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data,family=binomial) glm.probs=predict(glm.fit,test_data,type="response") glm.pred=rep(0,199) glm.pred[glm.probs>.5]=1 table(glm.pred,test_data$mpg01) mean(glm.pred!=test_data$mpg01) library(class) ?cbind ?knn knn.fit = knn(train_data,test_data,auto$mpg01[training_indices]) knn.fit = knn(train_data,test_data,auto$mpg01[training_indices],k=1) knn.fit = knn(train_data,test_data,auto$mpg01[training_indices],k=1) ?knn training_indices train_bools knn.fit = knn(train_data,test_data,auto$mpg01[train_bools],k=1) sdf = (mpg01<1) sdf = (auto$mpg01<1) sdf train_bools cbind(horsepower,displacement) cbind(train_data$horsepower,displacement) cbind(train_data$horsepower,train_data$displacement) cbind(auto$horsepower,auto$displacement)[train_bools] cbind(auto$horsepower,auto$displacement)[train_bools,] cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] train.mpg01 = auto[train_bools] train.mpg01 = auto$mpg01[train_bools] test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] train.mpg01 = auto$mpg01[train_bools] set.seed(56) knn.pred = knn(train.X,test.X,train.mpg01,k=1) ?cbind ?Knn ?knn train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] train.mpg01 = auto$mpg01[train_bools] train.X = train.X[!is.na(train.X)] test.X = data.frame(test.X, train.mpg01 = train.mpg01[!is.na(train.mpg01)] knn.pred = knn(train.X,test.X,train.mpg01,k=1) length(train.mpg01) length(test.X) text.X test.X test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] length(test.X) test.X knn.pred = knn(train.X,test.X,train.mpg01,k=1) train.X train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] train.X test.X knn.pred = knn(train.X,test.X,train.mpg01,k=1) ?knn length(train.X) length(train.X[1,]) length(train.X[,1]) ?knn plot(auto) train.X = cbind(auto$horsepower,auto$displacement)[train_bools,] test.X = cbind(auto$horsepower,auto$displacement)[!train_bools,] train.mpg01 = auto$mpg01[train_bools] knn.pred = knn(train.X,test.X,train.mpg01,k=1) train.X test.X train.mpg01 knn.pred = knn(train.X,test.X,train.mpg01,k=1) q() train train.X train.X test.X p1 = seq(1:1)) p1 = 1:10 p1 p1 = ,1:10 p2 = 10:20 p2 cbind(p1,p2) p3=c(1,2,3,4,5,7,9,8,10) p4=c(10,11,12,13,14,15,16,17,18,29,20) px = cbind(p1,p2) py = cbind(p3,p4) py ?formula test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] train.mpg01 = auto$mpg01[train_bools] knn.pred = knn(train.X,test.X,train.mpg01,k=1) import(library) library(MASS) knn.pred = knn(train.X,test.X,train.mpg01,k=1) library(library) library(class) knn.pred = knn(train.X,test.X,train.mpg01,k=1) knn.pred summary(test_data) summary(train_data) fix(train_data) ?fix fix(test_data) test_data.fix test_data.fix() fix(test_data) q()