mirror of
				https://asciireactor.com/otho/cs-5821.git
				synced 2025-10-31 17:58:04 +00:00 
			
		
		
		
	finished hw3
This commit is contained in:
		
							parent
							
								
									b0c1b3ed08
								
							
						
					
					
						commit
						7d230c0b30
					
				
							
								
								
									
										
											BIN
										
									
								
								hw3/.RData
									
									
									
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								hw3/.RData
									
									
									
									
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										477
									
								
								hw3/.Rhistory
									
									
									
									
									
								
							
							
						
						
									
										477
									
								
								hw3/.Rhistory
									
									
									
									
									
								
							| @ -1,426 +1,79 @@ | ||||
| auto = read.table("auto.data",header=T,na.strings="?") | ||||
| length(x=auto$mpg) | ||||
| glm | ||||
| glm.pred | ||||
| help(rep) | ||||
| glm.pred=rep(FALSE,397) | ||||
| glm.pred | ||||
| medium(auto$mpg) | ||||
| median(auto$mpg) | ||||
| glm.pred[auto$mpg>median(auto$mpg)]=T | ||||
| glm.pred | ||||
| contour(auto) | ||||
| contour(glm.pred ~ auto$mpg) | ||||
| contour(glm.pred,auto$mpg) | ||||
| help(contour) | ||||
| contour(auto$mpg,auto$horsepower,glm.pred) | ||||
| glm.pred | ||||
| length(glm.pred) | ||||
| table(glm.pred,auto$mpg) | ||||
| table(glm.pred,auto$mpg,auto$horsepower) | ||||
| glm.pred=rep(0,397) | ||||
| glm.pred[auto$mpg>median(auto$mpg)]=1 | ||||
| glm.pred | ||||
| auto$mpg01=rep(0,397) | ||||
| auto$mpg01[auto$mpg>median(auto$mpg)]=1 | ||||
| auto$mpg01 | ||||
| auto$mpg01 | ||||
| auto$mpg01 | ||||
| plots(auto) | ||||
| plot(auto) | ||||
| boxplot(auto) | ||||
| boxplot.matrix(auto) | ||||
| help(boxplot) | ||||
| boxplot(auto$mpg01,auto) | ||||
| boxplot(auto$mpg,auto) | ||||
| boxplot(auto$mpg) | ||||
| boxplot(auto) | ||||
| boxplot(mpg01 ~ auto) | ||||
| boxplot(mpg01 ~) | ||||
| boxplot(auto$mpg01 ~ auto) | ||||
| attach(auto) | ||||
| boxplot(mpg01) | ||||
| boxplot(mpg01 ~ auto) | ||||
| boxplot(mpg01 ~ auto,auto) | ||||
| boxplot(mpg01 ~ auto,data = auto) | ||||
| help(plot.table) | ||||
| plot.table(auto) | ||||
| help(plot.table) | ||||
| plot(auto) | ||||
| plot(auto,t="box") | ||||
| help(plot.table) | ||||
| help(plot.table,plot.frame=1) | ||||
| help(plot.table) | ||||
| help(plot.table,frame.plot=1) | ||||
| help(plot.table) | ||||
| help(plot.table,frame.plot=is.num) | ||||
| help(plot.table) | ||||
| plot(auto,t="box",frame.plot=1) | ||||
| plot(auto,frame.plot=1) | ||||
| plot(auto,frame.plot=1) | ||||
| plot(auto,frame.plot=is.num) | ||||
| plot(auto,frame.plot=0) | ||||
| plot(auto,frame.plot="0") | ||||
| plot(auto,frame.plot="1") | ||||
| plot(auto,frame.plot=TRUE) | ||||
| plot(auto,frame.plot=FALSE) | ||||
| plot(auto,frame.plot=TRUE) | ||||
| plot(auto,frame.plot=T) | ||||
| plot(auto,frame.plot=1) | ||||
| boxplot(mpg~mpg01,auto) | ||||
| boxplot(mpg01 ~ mpg,auto) | ||||
| boxplot(mpg01 ~ *,auto) | ||||
| boxplot(mpg01 ~ ,auto) | ||||
| boxplot(mpg01 ~ auto,auto) | ||||
| boxplot(mpg01,auto) | ||||
| boxplot(auto) | ||||
| boxplot(auto,y=mpg01) | ||||
| boxplot(auto,y=mpg) | ||||
| boxplot(data = auto) | ||||
| boxplot(auto) | ||||
| help(for) | ||||
| plot(auto,frame.plot=1) | ||||
| plot(auto) | ||||
| names(auto) | ||||
| auto$name | ||||
| help(sample) | ||||
| x <- 1:12 | ||||
| x | ||||
| sample(x) | ||||
| help(sample) | ||||
| sample(x,replace=T) | ||||
| sample(x,replace=T) | ||||
| sample(x,replace=F) | ||||
| c | ||||
| x | ||||
| sample(x,replace=T) | ||||
| x | ||||
| help(sample) | ||||
| sample(x[x>9]) | ||||
| sample(x[x>8]) | ||||
| help(sample) | ||||
| x <- 1:10 | ||||
| sample(x[x>8]) | ||||
| sample(x[x>]) | ||||
| help(sample) | ||||
| help(sample) | ||||
| help(sample) | ||||
| sample(auto,size=length(mpg01)/2) | ||||
| x <- length(mpg01) | ||||
| sample(x,size=length(mpg01)/2) | ||||
| auto[sample(x,size=length(mpg01)/2)] | ||||
| auto$mpg[sample(x,size=length(mpg01)/2)] | ||||
| help(data.frame) | ||||
| data.frame( | ||||
| help(data.frame) | ||||
| auto[sample(x,size=length(mpg01)/2)] | ||||
| train = sample(x,size=length(mpg01)/2) | ||||
| train =  | ||||
| auto[train] | ||||
| auto$mpg[train] | ||||
| auto$mpg[train,] | ||||
| auto$mpg[train] | ||||
| auto$mpg[23] | ||||
| auto$mpg[228] | ||||
| auto$mpg[391] | ||||
| auto.test=auto[!train] | ||||
| auto.train=auto[train] | ||||
| auto.test | ||||
| summary(auto.test) | ||||
| train=(mpg<15) | ||||
| train | ||||
| train = (sample(x,size=length(mpg01)/2)) | ||||
| train | ||||
| head(auto) | ||||
| auto[,train[ | ||||
| auto[,train] | ||||
| train | ||||
| help(contains) | ||||
| auto[1,train] | ||||
| train | ||||
| auto[[,train]] | ||||
| auto[[1,train]] | ||||
| autoi | ||||
| head(auto) | ||||
| head(auto[sample(nrow(auto),397/2)]) | ||||
| head(auto[sample(nrow(auto),3)]) | ||||
| data = data.frame(auto) | ||||
| data | ||||
| head(data[sample(nrow(data),3)]) | ||||
| nrow(data) | ||||
| head(data[sample(ncol(data),3)]) | ||||
| head(data[sample(ncol(data),397/2)]) | ||||
| head(data[sample(ncol(data),3)]) | ||||
| head(data[sample(ncol(data),3)]) | ||||
| head(data[sample(ncol(data),3)]) | ||||
| head(data[sample(ncol(data),3)]) | ||||
| head(data[,sample(ncol(data),3)]) | ||||
| head(data[,sample(ncol(data),3)]) | ||||
| head(data[,sample(ncol(data),3)]) | ||||
| head(data[,sample(ncol(data),3)]) | ||||
| head(data[,sample(ncol(data),3)]) | ||||
| head(data[sample(ncol(data),3),]) | ||||
| head(data[sample(ncol(data),3),]) | ||||
| head(data[sample(ncol(data),3),]) | ||||
| head(data[sample(nrow(data),3),]) | ||||
| head(data[sample(nrow(data),397/2),]) | ||||
| head(data[sample(nrow(data),397/2),]) | ||||
| head(data[sample(nrow(data),397/2),]) | ||||
| head(data[sample(nrow(data),397/2),]) | ||||
| head(data[sample(nrow(data),397/2),]) | ||||
| head(auto[sample(nrow(auto),397/2),]) | ||||
| head(auto[sample(nrow(auto),397/2),]) | ||||
| head(auto[sample(nrow(auto),397/2),]) | ||||
| head(auto[sample(nrow(auto),397/2),]) | ||||
| head(auto[sample(nrow(auto),397/2),]) | ||||
| head(auto[sample(nrow(auto),397/2),]) | ||||
| head(auto[sample(nrow(auto),397/2),]) | ||||
| train = auto[sample(nrow(auto),397/2),] | ||||
| [sample(nrow(auto),397/2),] | ||||
| sample(nrow(auto),397/2) | ||||
| train sample(nrow(auto),397/2) | ||||
| train = sample(nrow(auto),397/2) | ||||
| autp[train,] | ||||
| auto[train,] | ||||
| train = sample(nrow(auto),397/2) | ||||
| head(auto[train,]) | ||||
| head(auto[!train,]) | ||||
| traindata = auto[train,] | ||||
| testdata = auto[!train,] | ||||
| testdata | ||||
| traindata | ||||
| length(traindata) | ||||
| length(traindata$mpg) | ||||
| 198*2 | ||||
| summary(testdata) | ||||
| testdata = auto[!train] | ||||
| testdata | ||||
| testdata = auto[!train,] | ||||
| train | ||||
| summary(train) | ||||
| names(train) | ||||
| head(traindata) | ||||
| 
 | ||||
| testdata = auto[!train,] | ||||
| testdata | ||||
| !train | ||||
| train | ||||
| ?sample | ||||
| sort(train) | ||||
| train_vals = train | ||||
| train = rep(false,397) | ||||
| train = rep(F,397) | ||||
| train | ||||
| help for | ||||
| ?for | ||||
| ?for | ||||
| help)for) | ||||
| help(for) | ||||
| help(for) | ||||
| help lapply() | ||||
| ?lapply | ||||
| sapply(train, | ||||
| ?sapply | ||||
| sapply(train, | ||||
| ?sapply | ||||
| train[train_vals]=T | ||||
| train | ||||
| traindata = auto[train,] | ||||
| traindata | ||||
| length(auto) | ||||
| length(traindata) | ||||
| length(traindata$mpg) | ||||
| testdata=auto[!train,] | ||||
| length(testdate$mpg) | ||||
| length(testdata$mpg) | ||||
| training_indices = sample(nrow(auto),397/2) | ||||
| train_bools = rep(F,length(auto$mpg)) | ||||
| train_bools[training_indices]=T | ||||
| head(train_bools) | ||||
| length(train_bools) | ||||
| library(ISLR) | ||||
| library(MASS) | ||||
| library(class) | ||||
| train_bools <- (auto$year %% 2 == 0) | ||||
| train_data = auto[train_bools,] | ||||
| test_data = auto[!train_bools,] | ||||
| summary(train_data) | ||||
| summary(test_data) | ||||
| lda.fit | ||||
| library(MASS) | ||||
| lda.fit | ||||
| lda() | ||||
| detach(auto) | ||||
| mpg01 | ||||
| mpg | ||||
| attach(test_data) | ||||
| mpg01 | ||||
| names() | ||||
| names(test_data) | ||||
| ldf.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=test_data) | ||||
| detach(test_data) | ||||
| ldf.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=test_data) | ||||
| lda.fit | ||||
| lda.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=test_data) | ||||
| lda.fit | ||||
| summary(lda.fit) | ||||
| coefficients(lda.fit) | ||||
| plot(lda.fit) | ||||
| lda.pred=predict(lda.fit,test_data) | ||||
| lda.pred=predict(lda.fit, !training_bools) | ||||
| lda.pred=predict(lda.fit, !training_indices) | ||||
| test_data | ||||
| lda.pred=predict(lda.fit, test_data) | ||||
| lda.pred | ||||
| plot(lda.pred) | ||||
| names(lda.pred) | ||||
| lda.class=lda.pres$class | ||||
| lda.class=lda.pred$class | ||||
| table(lda.class,testdata) | ||||
| table(lda.class,test_data) | ||||
| length(lda.class) | ||||
| length(test_data) | ||||
| table(lda.class,test_data$mpg01) | ||||
| mean(lda.class==test_data$mpg01) | ||||
| sum(lda.pred$posterior[,1]>=.5) | ||||
| sum(lda.pred$posterior[,1]<.5) | ||||
| lda.pred$posterior[,1] | ||||
| sum(lda.pred$posterior<.5) | ||||
| lda.pred$posterior | ||||
| lda.pred$posterior<5 | ||||
| lda.pred$posterior<.5 | ||||
| sum(lda.pred$posterior<.5) | ||||
| sum(lda.pred$posterior<.5[,1]) | ||||
| sum(lda.pred$posterior<.5[1]) | ||||
| sum(lda.pred$posterior<.5[2]) | ||||
| lda.pred$posterior<.5[2] | ||||
| lda.pred$posterior<.5 | ||||
| lda.pred$posterior | ||||
| lda.pred$posterior[,1] | ||||
| lda.pred$posterior[1,] | ||||
| lda.pred$posterior[,2] | ||||
| lda.pred$posterior[,1] | ||||
| lda.pred$posterior[,1]>.5 | ||||
| sum(lda.pred$posterior[,1]>.5) | ||||
| sum.bool(lda.pred$posterior[,1]>.5) | ||||
| ?sum | ||||
| sum.bool(lda.pred$posterior[,1]>.5,na.rm=T) | ||||
| sum(lda.pred$posterior[,1]>.5,na.rm=T) | ||||
| sum(lda.pred$posterior[,1]>.5) | ||||
| sum(lda.pred$posterior[,1]>.5,na.rm=T) | ||||
| sum(lda.pred$posterior[,1]>=.5,na.rm=T) | ||||
| sum(lda.pred$posterior[,1]<.5,na.rm=T) | ||||
| mean(lda.pred$[,1]==test_data,na.rm=T) | ||||
| lda.pred | ||||
| lda.pred$class | ||||
| lda.pred$class==test_data$mpg01 | ||||
| mean(lda.pred$class==test_data$mpg01,na.rm=T) | ||||
| mean(lda.pred$class!=test_data$mpg01,na.rm=T) | ||||
| lda.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data) | ||||
| lda.fit | ||||
| mean(lda.pred$class==test_data$mpg01,na.rm=T) | ||||
| lda.pred=predict(lda.fit, test_data) | ||||
| mean(lda.pred$class==test_data$mpg01,na.rm=T) | ||||
| mean(lda.pred$class!=test_data$mpg01,na.rm=T) | ||||
| train_data == test_data | ||||
| train_data$mpg01 == test_data$mpg01 | ||||
| lda.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data) | ||||
| lda.pred=predict(lda.fit, test_data) | ||||
| mean(lda.pred$class!=test_data$mpg01,na.rm=T) | ||||
| lda.pred | ||||
| lda.pred$posterior[,1] | ||||
| summary(lda.fit) | ||||
| lda.fit | ||||
| lda.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=test_data) | ||||
| lda.fit | ||||
| mean(lda.pred$class!=test_data$mpg01,na.rm=T) | ||||
| lda.pred=predict(lda.fit, test_data) | ||||
| mean(lda.pred$class!=test_data$mpg01,na.rm=T) | ||||
| head(lda.pred) | ||||
| lda.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data) | ||||
| lda.pred=predict(lda.fit, test_data) | ||||
| head(lda.pred) | ||||
| mean(lda.pred$class!=test_data$mpg01,na.rm=T) | ||||
| qda.fit=qda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data) | ||||
| qda.fit | ||||
| qda.class=predict(qda.fit,test_data)$class | ||||
| qda.class=predict(qda.fit,test_data,na.rm=T)$class | ||||
| qda.class=predict(qda.fit,test_data)$class | ||||
| qda.class | ||||
| mean(qda.pred$class!=test_data$mpg01,na.rm=T) | ||||
| qda.pred=predict(qda.fit,test_data) | ||||
| qda.pred=predict(qda.fit,test_data,na.rm=T) | ||||
| mean(qda.pred$class!=test_data$mpg01,na.rm=T) | ||||
| glm.fit=glm(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data,family=binomial) | ||||
| glm.probs=predict(glm.fit,test_data,type="response") | ||||
| glm.pred=rep(0,199) | ||||
| glm.pred[glm.probs>.5]=1 | ||||
| table(glm.pred,test_data$mpg01) | ||||
| mean(glm.pred!=test_data$mpg01) | ||||
| library(class) | ||||
| ?cbind | ||||
| help(knn) | ||||
| help(knn) | ||||
|      train <- rbind(iris3[1:25,,1], iris3[1:25,,2], iris3[1:25,,3]) | ||||
|      test <- rbind(iris3[26:50,,1], iris3[26:50,,2], iris3[26:50,,3]) | ||||
| train | ||||
| test | ||||
| ?knn | ||||
| knn.fit = knn(train_data,test_data,auto$mpg01[training_indices]) | ||||
| knn.fit = knn(train_data,test_data,auto$mpg01[training_indices],k=1) | ||||
| knn.fit = knn(train_data,test_data,auto$mpg01[training_indices],k=1) | ||||
| ?knn | ||||
| training_indices | ||||
| train_bools | ||||
| knn.fit = knn(train_data,test_data,auto$mpg01[train_bools],k=1) | ||||
| sdf = (mpg01<1) | ||||
| sdf = (auto$mpg01<1) | ||||
| sdf | ||||
| train_bools | ||||
| cbind(horsepower,displacement) | ||||
| cbind(train_data$horsepower,displacement) | ||||
| cbind(train_data$horsepower,train_data$displacement) | ||||
| cbind(auto$horsepower,auto$displacement)[train_bools] | ||||
| cbind(auto$horsepower,auto$displacement)[train_bools,] | ||||
| cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] | ||||
| cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] | ||||
| train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] | ||||
| test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] | ||||
| train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] | ||||
| test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] | ||||
| train.mpg01 = auto[train_bools] | ||||
| train.mpg01 = auto$mpg01[train_bools] | ||||
| test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] | ||||
| train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] | ||||
| test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] | ||||
| train.mpg01 = auto$mpg01[train_bools] | ||||
| set.seed(56) | ||||
| knn.pred = knn(train.X,test.X,train.mpg01,k=1) | ||||
| ?cbind | ||||
| ?Knn | ||||
| ?knn | ||||
| train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] | ||||
| test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] | ||||
| train.mpg01 = auto$mpg01[train_bools] | ||||
| train.X = train.X[!is.na(train.X)] | ||||
| test.X = data.frame(test.X, | ||||
| train.mpg01 = train.mpg01[!is.na(train.mpg01)] | ||||
| knn.pred = knn(train.X,test.X,train.mpg01,k=1) | ||||
| length(train.mpg01) | ||||
| length(test.X) | ||||
| text.X | ||||
| test.X | ||||
| test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] | ||||
| length(test.X) | ||||
| test.X | ||||
| knn.pred = knn(train.X,test.X,train.mpg01,k=1) | ||||
|      cl <- factor(c(rep("s",25), rep("c",25), rep("v",25))) | ||||
| cl | ||||
| length(cl) | ||||
| length(train) | ||||
| nrows(train) | ||||
| nrow(train) | ||||
| train.X | ||||
| train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] | ||||
| train.X | ||||
| test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] | ||||
| test.X | ||||
| knn.pred = knn(train.X,test.X,train.mpg01,k=1) | ||||
| ?knn | ||||
| length(train.X) | ||||
| length(train.X[1,]) | ||||
| length(train.X[,1]) | ||||
| ?knn | ||||
| plot(auto) | ||||
| train.X = cbind(auto$horsepower,auto$displacement)[train_bools,] | ||||
| test.X = cbind(auto$horsepower,auto$displacement)[!train_bools,] | ||||
| train.mpg01 = auto$mpg01[train_bools] | ||||
| knn.pred = knn(train.X,test.X,train.mpg01,k=1) | ||||
| train.X | ||||
| test.X | ||||
| train.mpg01 = auto$mpg01[train_bools] | ||||
| train.mpg01 | ||||
| length(train.mpg01) | ||||
| nrow(train.X) | ||||
| knn(train.X,train.Y,train.mpg01,K=1) | ||||
| knn(train.X,train.Y,train.mpg01,k=1) | ||||
| knn(train.X,test.X,train.mpg01,k=1) | ||||
| train.X | ||||
| na.omit(train.X) | ||||
| ?na.omit | ||||
| na.omit(train.X) | ||||
| na.omit(train.X) | ||||
| knn(na.omit(train.X),test.X,train.mpg01,k=1) | ||||
| knn(na.omit(train.X),test.X,na.omit(train.mpg01),k=1) | ||||
| knn(na.omit(train.X),na.omit(test.X),na.omit(train.mpg01),k=1) | ||||
| train.mpg012 = na.omit(auto$mpg01)[train_bools] | ||||
| train.mpg012 | ||||
| train.mpg01 | ||||
| nrow(train) | ||||
| na.omit(auto) | ||||
| auto | ||||
| na.omit(auto) | ||||
| summary(auto) | ||||
| summary(na.omit(auto)) | ||||
| Auto = na.omit(auto) | ||||
| auto = na.omit(auto) | ||||
| ncol(auto) | ||||
| nrow(auto) | ||||
| auto <- na.omit(auto) | ||||
| train_bools <- (auto$year %% 2 == 0) | ||||
| train_data = auto[train_bools,] | ||||
| test_data = auto[!train_bools,] | ||||
| train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] | ||||
| test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] | ||||
| train.mpg01 = auto$mpg01[train_bools] | ||||
| knn.pred = knn(train.X,test.X,train.mpg01,k=1) | ||||
| mean(knn.pred != auto$mpg01) | ||||
| mean(knn.pred != test_data$mpg01) | ||||
| knn.pred = knn(train.X,test.X,train.mpg01,k=2) | ||||
| mean(knn.pred != test_data$mpg01) | ||||
| knn.pred = knn(train.X,test.X,train.mpg01,k=3) | ||||
| mean(knn.pred != test_data$mpg01) | ||||
| knn.pred = knn(train.X,test.X,train.mpg01,k=4) | ||||
| mean(knn.pred != test_data$mpg0) | ||||
| knn.pred | ||||
| length(knn.pred) | ||||
| dim(knn.pred) | ||||
| length(test_data) | ||||
| ncol(test_data) | ||||
| nrow(test_data) | ||||
| q() | ||||
|  | ||||
							
								
								
									
										113
									
								
								hw3/answers
									
									
									
									
									
								
							
							
						
						
									
										113
									
								
								hw3/answers
									
									
									
									
									
								
							| @ -85,6 +85,7 @@ Part B: Choose one of Questions 10 or 11 | ||||
|     given car gets high or low gas mileage based on the Auto data | ||||
|     set. | ||||
| 
 | ||||
| ────────────────────────────────────────────────────────────────────────── | ||||
|     (a) Create a binary variable, mpg01 , that contains a 1 if mpg | ||||
|     contains a value above its median, and a 0 if mpg contains a | ||||
|     value below its median. You can compute the median using the | ||||
| @ -92,6 +93,9 @@ Part B: Choose one of Questions 10 or 11 | ||||
|     data.frame() function to create a single data set containing | ||||
|     both mpg01 and the other Auto variables. | ||||
| 
 | ||||
|         > auto$mpg01=rep(0,397) | ||||
|         > auto$mpg01[auto$mpg>median(auto$mpg)]=1 | ||||
| 
 | ||||
| > auto$mpg01 | ||||
|   [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 1 1 1 1 0 0 0 0 0 1 1 1 1 0 0 0 0 | ||||
|  [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | ||||
| @ -106,6 +110,7 @@ Part B: Choose one of Questions 10 or 11 | ||||
| [371] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 | ||||
| 
 | ||||
| 
 | ||||
| ────────────────────────────────────────────────────────────────────────── | ||||
|     (b) Explore the data graphically in order to investigate the | ||||
|     associ- ation between mpg01 and the other features. Which of the | ||||
|     other features seem most likely to be useful in predicting mpg01 | ||||
| @ -119,6 +124,16 @@ Part B: Choose one of Questions 10 or 11 | ||||
|         Displacement is on the cusp and the other variables don't | ||||
|         have a terribly useful relationship with this median. | ||||
| 
 | ||||
|         The boxplots indicate that acceleration really isn't a great | ||||
|         predictor of mpg01, but displacement is. It also confirms | ||||
|         horsepower and weight as good predictors, and cylinders also | ||||
|         seems to be very strong, even though I didn't take that from | ||||
|         the scatter plots. | ||||
| 
 | ||||
|         I will use mpg01 ~ horsepower + weight + cylinders + displacement | ||||
| 
 | ||||
| 
 | ||||
| ────────────────────────────────────────────────────────────────────────── | ||||
|     (c) Split the data into a training set and a test set. | ||||
| 
 | ||||
|         Seems like a 50/50 random sampling is appropriate enough.  | ||||
| @ -127,87 +142,133 @@ Part B: Choose one of Questions 10 or 11 | ||||
|         > train_bools = rep(F,length(auto$mpg)) | ||||
|         > train_bools[training_indices]=T | ||||
|         > head(train_bools) | ||||
|         [1] FALSE  TRUE FALSE FALSE  TRUE FALSE | ||||
|         [1]  TRUE  TRUE  TRUE FALSE  TRUE FALSE | ||||
|         > length(train_bools) | ||||
|         [1] 397 | ||||
|         > train_data = auto[train_bools,] | ||||
|         > test_data = auto[!train_bools,] | ||||
|                   | ||||
|         Actually, I changed this now, because a solution I found | ||||
|         online suggested a different test split and I was having | ||||
|         trouble with the KNN model, so I followed their style. I used: | ||||
| 
 | ||||
|         > train <- (auto$year %% 2 == 0) | ||||
| 
 | ||||
|         and then the rest the same | ||||
| 
 | ||||
| ────────────────────────────────────────────────────────────────────────── | ||||
|     (d) Perform LDA on the training data in order to predict mpg01 | ||||
|     using the variables that seemed most associated with mpg01 in | ||||
|     (b). What is the test error of the model obtained? | ||||
| 
 | ||||
|         > lda.fit | ||||
|         Call: | ||||
|         lda(mpg01 ~ horsepower + weight + acceleration + displacement,  | ||||
|             data = train_data) | ||||
|         lda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data) | ||||
|          | ||||
|         Prior probabilities of groups: | ||||
|                 0         1  | ||||
|         0.5431472 0.4568528  | ||||
|         0.4666667 0.5333333  | ||||
|          | ||||
|         Group means: | ||||
|           horsepower   weight acceleration displacement | ||||
|         0  129.08411 3557.757     14.55981      269.729 | ||||
|         1   79.64444 2345.233     16.39222      116.800 | ||||
|           horsepower   weight cylinders displacement | ||||
|         0  131.96939 3579.827  6.755102     268.4082 | ||||
|         1   77.96429 2313.598  4.071429     111.7188 | ||||
|          | ||||
|         Coefficients of linear discriminants: | ||||
|                               LD1 | ||||
|         horsepower    0.005678626 | ||||
|         weight       -0.001137499 | ||||
|         acceleration -0.014950459 | ||||
|         displacement -0.007401647 | ||||
|                                LD1 | ||||
|         horsepower    0.0060634365 | ||||
|         weight       -0.0011442212 | ||||
|         cylinders    -0.6390942259 | ||||
|         displacement  0.0004517291 | ||||
| 
 | ||||
| 
 | ||||
|         Error Rate against test data: | ||||
| 
 | ||||
|      ***Test Data Error Rate: | ||||
|         > mean(lda.pred$class!=test_data$mpg01,na.rm=T) | ||||
|         [1] 0.1179487 | ||||
|         [1] 0.1428571 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| ────────────────────────────────────────────────────────────────────────── | ||||
|     (e) Perform QDA on the training data in order to predict mpg01 | ||||
|     using the variables that seemed most associated with mpg01 in | ||||
|     (b). What is the test error of the model obtained? | ||||
| 
 | ||||
|         > qda.fit=qda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data) | ||||
|         > qda.fit | ||||
|         Call: | ||||
|         qda(mpg01 ~ horsepower + weight + acceleration + displacement,  | ||||
|             data = train_data) | ||||
|         lda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data) | ||||
|          | ||||
|         Prior probabilities of groups: | ||||
|                 0         1  | ||||
|         0.5431472 0.4568528  | ||||
|         0.4666667 0.5333333  | ||||
|          | ||||
|         Group means: | ||||
|           horsepower   weight acceleration displacement | ||||
|         0  129.08411 3557.757     14.55981      269.729 | ||||
|         1   79.64444 2345.233     16.39222      116.800 | ||||
|           horsepower   weight cylinders displacement | ||||
|         0  131.96939 3579.827  6.755102     268.4082 | ||||
|         1   77.96429 2313.598  4.071429     111.7188 | ||||
|          | ||||
|         Coefficients of linear discriminants: | ||||
|                                LD1 | ||||
|         horsepower    0.0060634365 | ||||
|         weight       -0.0011442212 | ||||
|         cylinders    -0.6390942259 | ||||
|         displacement  0.0004517291 | ||||
| 
 | ||||
|         Error Rate: | ||||
|     ***Test Data Error Rate: | ||||
|         > mean(qda.pred$class!=test_data$mpg01,na.rm=T) | ||||
|         [1] 0.1025641 | ||||
|         [1] 0.1428571 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| ────────────────────────────────────────────────────────────────────────── | ||||
|     (f) Perform logistic regression on the training data in order to | ||||
|     pre- dict mpg01 using the variables that seemed most associated | ||||
|     with mpg01 in (b). What is the test error of the model obtained? | ||||
| 
 | ||||
|         > glm.fit=glm(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data,family=binomial) | ||||
|         > glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial) | ||||
|         > glm.probs=predict(glm.fit,test_data,type="response") | ||||
|         > glm.pred=rep(0,199) | ||||
|         > glm.pred[glm.probs>.5]=1 | ||||
| 
 | ||||
|      ***Test Data Error Rate: | ||||
|         > mean(glm.pred!=test_data$mpg01) | ||||
|         [1] 0.120603 | ||||
|         [1] 0.1407035 | ||||
| 
 | ||||
| 
 | ||||
| ────────────────────────────────────────────────────────────────────────── | ||||
|     (g) Perform KNN on the training data, with several values of K, | ||||
|     in order to predict mpg01 . Use only the variables that seemed | ||||
|     most associated with mpg01 in (b). What test errors do you | ||||
|     obtain? Which value of K seems to perform the best on this data | ||||
|     set? | ||||
| 
 | ||||
|          | ||||
|        The knn method can't handle the NA values, so | ||||
| 
 | ||||
|         > set.seed(1) | ||||
|         > auto <- na.omit(auto) | ||||
|         > train_bools <- (auto$year %% 2 == 0) | ||||
|         > train_data = auto[train_bools,] | ||||
|         > test_data = auto[!train_bools,] | ||||
| 
 | ||||
|         > train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,] | ||||
|         > test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,] | ||||
|         > train.mpg01 = auto$mpg01[train_bools] | ||||
| 
 | ||||
|      ***Test Data Error Rates: | ||||
|      k = 1 | ||||
|         > mean(knn.pred != test_data$mpg01) | ||||
|         [1] 0.1483516 | ||||
|      k = 2 | ||||
|         > mean(knn.pred != test_data$mpg01) | ||||
|         [1] 0.1593407 | ||||
|      k = 3 | ||||
|         > mean(knn.pred != test_data$mpg01) | ||||
|         [1] 0.1648352 | ||||
|      k = 4 | ||||
|         > mean(knn.pred != test_data$mpg0) | ||||
|         [1] 0.1813187 | ||||
| 
 | ||||
|         k = 1 looks like the best, since the error rate increases with k. | ||||
| 
 | ||||
|   | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user