mirror of
https://asciireactor.com/otho/cs-5821.git
synced 2024-11-24 01:25:08 +00:00
finished hw3
This commit is contained in:
parent
b0c1b3ed08
commit
7d230c0b30
BIN
hw3/.RData
BIN
hw3/.RData
Binary file not shown.
477
hw3/.Rhistory
477
hw3/.Rhistory
@ -1,426 +1,79 @@
|
|||||||
auto = read.table("auto.data",header=T,na.strings="?")
|
auto = read.table("auto.data",header=T,na.strings="?")
|
||||||
length(x=auto$mpg)
|
|
||||||
glm
|
|
||||||
glm.pred
|
|
||||||
help(rep)
|
|
||||||
glm.pred=rep(FALSE,397)
|
|
||||||
glm.pred
|
|
||||||
medium(auto$mpg)
|
|
||||||
median(auto$mpg)
|
|
||||||
glm.pred[auto$mpg>median(auto$mpg)]=T
|
|
||||||
glm.pred
|
|
||||||
contour(auto)
|
|
||||||
contour(glm.pred ~ auto$mpg)
|
|
||||||
contour(glm.pred,auto$mpg)
|
|
||||||
help(contour)
|
|
||||||
contour(auto$mpg,auto$horsepower,glm.pred)
|
|
||||||
glm.pred
|
|
||||||
length(glm.pred)
|
|
||||||
table(glm.pred,auto$mpg)
|
|
||||||
table(glm.pred,auto$mpg,auto$horsepower)
|
|
||||||
glm.pred=rep(0,397)
|
|
||||||
glm.pred[auto$mpg>median(auto$mpg)]=1
|
|
||||||
glm.pred
|
|
||||||
auto$mpg01=rep(0,397)
|
auto$mpg01=rep(0,397)
|
||||||
auto$mpg01[auto$mpg>median(auto$mpg)]=1
|
auto$mpg01[auto$mpg>median(auto$mpg)]=1
|
||||||
auto$mpg01
|
library(ISLR)
|
||||||
auto$mpg01
|
library(MASS)
|
||||||
auto$mpg01
|
library(class)
|
||||||
plots(auto)
|
train_bools <- (auto$year %% 2 == 0)
|
||||||
plot(auto)
|
|
||||||
boxplot(auto)
|
|
||||||
boxplot.matrix(auto)
|
|
||||||
help(boxplot)
|
|
||||||
boxplot(auto$mpg01,auto)
|
|
||||||
boxplot(auto$mpg,auto)
|
|
||||||
boxplot(auto$mpg)
|
|
||||||
boxplot(auto)
|
|
||||||
boxplot(mpg01 ~ auto)
|
|
||||||
boxplot(mpg01 ~)
|
|
||||||
boxplot(auto$mpg01 ~ auto)
|
|
||||||
attach(auto)
|
|
||||||
boxplot(mpg01)
|
|
||||||
boxplot(mpg01 ~ auto)
|
|
||||||
boxplot(mpg01 ~ auto,auto)
|
|
||||||
boxplot(mpg01 ~ auto,data = auto)
|
|
||||||
help(plot.table)
|
|
||||||
plot.table(auto)
|
|
||||||
help(plot.table)
|
|
||||||
plot(auto)
|
|
||||||
plot(auto,t="box")
|
|
||||||
help(plot.table)
|
|
||||||
help(plot.table,plot.frame=1)
|
|
||||||
help(plot.table)
|
|
||||||
help(plot.table,frame.plot=1)
|
|
||||||
help(plot.table)
|
|
||||||
help(plot.table,frame.plot=is.num)
|
|
||||||
help(plot.table)
|
|
||||||
plot(auto,t="box",frame.plot=1)
|
|
||||||
plot(auto,frame.plot=1)
|
|
||||||
plot(auto,frame.plot=1)
|
|
||||||
plot(auto,frame.plot=is.num)
|
|
||||||
plot(auto,frame.plot=0)
|
|
||||||
plot(auto,frame.plot="0")
|
|
||||||
plot(auto,frame.plot="1")
|
|
||||||
plot(auto,frame.plot=TRUE)
|
|
||||||
plot(auto,frame.plot=FALSE)
|
|
||||||
plot(auto,frame.plot=TRUE)
|
|
||||||
plot(auto,frame.plot=T)
|
|
||||||
plot(auto,frame.plot=1)
|
|
||||||
boxplot(mpg~mpg01,auto)
|
|
||||||
boxplot(mpg01 ~ mpg,auto)
|
|
||||||
boxplot(mpg01 ~ *,auto)
|
|
||||||
boxplot(mpg01 ~ ,auto)
|
|
||||||
boxplot(mpg01 ~ auto,auto)
|
|
||||||
boxplot(mpg01,auto)
|
|
||||||
boxplot(auto)
|
|
||||||
boxplot(auto,y=mpg01)
|
|
||||||
boxplot(auto,y=mpg)
|
|
||||||
boxplot(data = auto)
|
|
||||||
boxplot(auto)
|
|
||||||
help(for)
|
|
||||||
plot(auto,frame.plot=1)
|
|
||||||
plot(auto)
|
|
||||||
names(auto)
|
|
||||||
auto$name
|
|
||||||
help(sample)
|
|
||||||
x <- 1:12
|
|
||||||
x
|
|
||||||
sample(x)
|
|
||||||
help(sample)
|
|
||||||
sample(x,replace=T)
|
|
||||||
sample(x,replace=T)
|
|
||||||
sample(x,replace=F)
|
|
||||||
c
|
|
||||||
x
|
|
||||||
sample(x,replace=T)
|
|
||||||
x
|
|
||||||
help(sample)
|
|
||||||
sample(x[x>9])
|
|
||||||
sample(x[x>8])
|
|
||||||
help(sample)
|
|
||||||
x <- 1:10
|
|
||||||
sample(x[x>8])
|
|
||||||
sample(x[x>])
|
|
||||||
help(sample)
|
|
||||||
help(sample)
|
|
||||||
help(sample)
|
|
||||||
sample(auto,size=length(mpg01)/2)
|
|
||||||
x <- length(mpg01)
|
|
||||||
sample(x,size=length(mpg01)/2)
|
|
||||||
auto[sample(x,size=length(mpg01)/2)]
|
|
||||||
auto$mpg[sample(x,size=length(mpg01)/2)]
|
|
||||||
help(data.frame)
|
|
||||||
data.frame(
|
|
||||||
help(data.frame)
|
|
||||||
auto[sample(x,size=length(mpg01)/2)]
|
|
||||||
train = sample(x,size=length(mpg01)/2)
|
|
||||||
train =
|
|
||||||
auto[train]
|
|
||||||
auto$mpg[train]
|
|
||||||
auto$mpg[train,]
|
|
||||||
auto$mpg[train]
|
|
||||||
auto$mpg[23]
|
|
||||||
auto$mpg[228]
|
|
||||||
auto$mpg[391]
|
|
||||||
auto.test=auto[!train]
|
|
||||||
auto.train=auto[train]
|
|
||||||
auto.test
|
|
||||||
summary(auto.test)
|
|
||||||
train=(mpg<15)
|
|
||||||
train
|
|
||||||
train = (sample(x,size=length(mpg01)/2))
|
|
||||||
train
|
|
||||||
head(auto)
|
|
||||||
auto[,train[
|
|
||||||
auto[,train]
|
|
||||||
train
|
|
||||||
help(contains)
|
|
||||||
auto[1,train]
|
|
||||||
train
|
|
||||||
auto[[,train]]
|
|
||||||
auto[[1,train]]
|
|
||||||
autoi
|
|
||||||
head(auto)
|
|
||||||
head(auto[sample(nrow(auto),397/2)])
|
|
||||||
head(auto[sample(nrow(auto),3)])
|
|
||||||
data = data.frame(auto)
|
|
||||||
data
|
|
||||||
head(data[sample(nrow(data),3)])
|
|
||||||
nrow(data)
|
|
||||||
head(data[sample(ncol(data),3)])
|
|
||||||
head(data[sample(ncol(data),397/2)])
|
|
||||||
head(data[sample(ncol(data),3)])
|
|
||||||
head(data[sample(ncol(data),3)])
|
|
||||||
head(data[sample(ncol(data),3)])
|
|
||||||
head(data[sample(ncol(data),3)])
|
|
||||||
head(data[,sample(ncol(data),3)])
|
|
||||||
head(data[,sample(ncol(data),3)])
|
|
||||||
head(data[,sample(ncol(data),3)])
|
|
||||||
head(data[,sample(ncol(data),3)])
|
|
||||||
head(data[,sample(ncol(data),3)])
|
|
||||||
head(data[sample(ncol(data),3),])
|
|
||||||
head(data[sample(ncol(data),3),])
|
|
||||||
head(data[sample(ncol(data),3),])
|
|
||||||
head(data[sample(nrow(data),3),])
|
|
||||||
head(data[sample(nrow(data),397/2),])
|
|
||||||
head(data[sample(nrow(data),397/2),])
|
|
||||||
head(data[sample(nrow(data),397/2),])
|
|
||||||
head(data[sample(nrow(data),397/2),])
|
|
||||||
head(data[sample(nrow(data),397/2),])
|
|
||||||
head(auto[sample(nrow(auto),397/2),])
|
|
||||||
head(auto[sample(nrow(auto),397/2),])
|
|
||||||
head(auto[sample(nrow(auto),397/2),])
|
|
||||||
head(auto[sample(nrow(auto),397/2),])
|
|
||||||
head(auto[sample(nrow(auto),397/2),])
|
|
||||||
head(auto[sample(nrow(auto),397/2),])
|
|
||||||
head(auto[sample(nrow(auto),397/2),])
|
|
||||||
train = auto[sample(nrow(auto),397/2),]
|
|
||||||
[sample(nrow(auto),397/2),]
|
|
||||||
sample(nrow(auto),397/2)
|
|
||||||
train sample(nrow(auto),397/2)
|
|
||||||
train = sample(nrow(auto),397/2)
|
|
||||||
autp[train,]
|
|
||||||
auto[train,]
|
|
||||||
train = sample(nrow(auto),397/2)
|
|
||||||
head(auto[train,])
|
|
||||||
head(auto[!train,])
|
|
||||||
traindata = auto[train,]
|
|
||||||
testdata = auto[!train,]
|
|
||||||
testdata
|
|
||||||
traindata
|
|
||||||
length(traindata)
|
|
||||||
length(traindata$mpg)
|
|
||||||
198*2
|
|
||||||
summary(testdata)
|
|
||||||
testdata = auto[!train]
|
|
||||||
testdata
|
|
||||||
testdata = auto[!train,]
|
|
||||||
train
|
|
||||||
summary(train)
|
|
||||||
names(train)
|
|
||||||
head(traindata)
|
|
||||||
|
|
||||||
testdata = auto[!train,]
|
|
||||||
testdata
|
|
||||||
!train
|
|
||||||
train
|
|
||||||
?sample
|
|
||||||
sort(train)
|
|
||||||
train_vals = train
|
|
||||||
train = rep(false,397)
|
|
||||||
train = rep(F,397)
|
|
||||||
train
|
|
||||||
help for
|
|
||||||
?for
|
|
||||||
?for
|
|
||||||
help)for)
|
|
||||||
help(for)
|
|
||||||
help(for)
|
|
||||||
help lapply()
|
|
||||||
?lapply
|
|
||||||
sapply(train,
|
|
||||||
?sapply
|
|
||||||
sapply(train,
|
|
||||||
?sapply
|
|
||||||
train[train_vals]=T
|
|
||||||
train
|
|
||||||
traindata = auto[train,]
|
|
||||||
traindata
|
|
||||||
length(auto)
|
|
||||||
length(traindata)
|
|
||||||
length(traindata$mpg)
|
|
||||||
testdata=auto[!train,]
|
|
||||||
length(testdate$mpg)
|
|
||||||
length(testdata$mpg)
|
|
||||||
training_indices = sample(nrow(auto),397/2)
|
|
||||||
train_bools = rep(F,length(auto$mpg))
|
|
||||||
train_bools[training_indices]=T
|
|
||||||
head(train_bools)
|
|
||||||
length(train_bools)
|
|
||||||
train_data = auto[train_bools,]
|
train_data = auto[train_bools,]
|
||||||
test_data = auto[!train_bools,]
|
test_data = auto[!train_bools,]
|
||||||
summary(train_data)
|
help(knn)
|
||||||
summary(test_data)
|
help(knn)
|
||||||
lda.fit
|
train <- rbind(iris3[1:25,,1], iris3[1:25,,2], iris3[1:25,,3])
|
||||||
library(MASS)
|
test <- rbind(iris3[26:50,,1], iris3[26:50,,2], iris3[26:50,,3])
|
||||||
lda.fit
|
train
|
||||||
lda()
|
test
|
||||||
detach(auto)
|
|
||||||
mpg01
|
|
||||||
mpg
|
|
||||||
attach(test_data)
|
|
||||||
mpg01
|
|
||||||
names()
|
|
||||||
names(test_data)
|
|
||||||
ldf.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=test_data)
|
|
||||||
detach(test_data)
|
|
||||||
ldf.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=test_data)
|
|
||||||
lda.fit
|
|
||||||
lda.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=test_data)
|
|
||||||
lda.fit
|
|
||||||
summary(lda.fit)
|
|
||||||
coefficients(lda.fit)
|
|
||||||
plot(lda.fit)
|
|
||||||
lda.pred=predict(lda.fit,test_data)
|
|
||||||
lda.pred=predict(lda.fit, !training_bools)
|
|
||||||
lda.pred=predict(lda.fit, !training_indices)
|
|
||||||
test_data
|
|
||||||
lda.pred=predict(lda.fit, test_data)
|
|
||||||
lda.pred
|
|
||||||
plot(lda.pred)
|
|
||||||
names(lda.pred)
|
|
||||||
lda.class=lda.pres$class
|
|
||||||
lda.class=lda.pred$class
|
|
||||||
table(lda.class,testdata)
|
|
||||||
table(lda.class,test_data)
|
|
||||||
length(lda.class)
|
|
||||||
length(test_data)
|
|
||||||
table(lda.class,test_data$mpg01)
|
|
||||||
mean(lda.class==test_data$mpg01)
|
|
||||||
sum(lda.pred$posterior[,1]>=.5)
|
|
||||||
sum(lda.pred$posterior[,1]<.5)
|
|
||||||
lda.pred$posterior[,1]
|
|
||||||
sum(lda.pred$posterior<.5)
|
|
||||||
lda.pred$posterior
|
|
||||||
lda.pred$posterior<5
|
|
||||||
lda.pred$posterior<.5
|
|
||||||
sum(lda.pred$posterior<.5)
|
|
||||||
sum(lda.pred$posterior<.5[,1])
|
|
||||||
sum(lda.pred$posterior<.5[1])
|
|
||||||
sum(lda.pred$posterior<.5[2])
|
|
||||||
lda.pred$posterior<.5[2]
|
|
||||||
lda.pred$posterior<.5
|
|
||||||
lda.pred$posterior
|
|
||||||
lda.pred$posterior[,1]
|
|
||||||
lda.pred$posterior[1,]
|
|
||||||
lda.pred$posterior[,2]
|
|
||||||
lda.pred$posterior[,1]
|
|
||||||
lda.pred$posterior[,1]>.5
|
|
||||||
sum(lda.pred$posterior[,1]>.5)
|
|
||||||
sum.bool(lda.pred$posterior[,1]>.5)
|
|
||||||
?sum
|
|
||||||
sum.bool(lda.pred$posterior[,1]>.5,na.rm=T)
|
|
||||||
sum(lda.pred$posterior[,1]>.5,na.rm=T)
|
|
||||||
sum(lda.pred$posterior[,1]>.5)
|
|
||||||
sum(lda.pred$posterior[,1]>.5,na.rm=T)
|
|
||||||
sum(lda.pred$posterior[,1]>=.5,na.rm=T)
|
|
||||||
sum(lda.pred$posterior[,1]<.5,na.rm=T)
|
|
||||||
mean(lda.pred$[,1]==test_data,na.rm=T)
|
|
||||||
lda.pred
|
|
||||||
lda.pred$class
|
|
||||||
lda.pred$class==test_data$mpg01
|
|
||||||
mean(lda.pred$class==test_data$mpg01,na.rm=T)
|
|
||||||
mean(lda.pred$class!=test_data$mpg01,na.rm=T)
|
|
||||||
lda.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data)
|
|
||||||
lda.fit
|
|
||||||
mean(lda.pred$class==test_data$mpg01,na.rm=T)
|
|
||||||
lda.pred=predict(lda.fit, test_data)
|
|
||||||
mean(lda.pred$class==test_data$mpg01,na.rm=T)
|
|
||||||
mean(lda.pred$class!=test_data$mpg01,na.rm=T)
|
|
||||||
train_data == test_data
|
|
||||||
train_data$mpg01 == test_data$mpg01
|
|
||||||
lda.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data)
|
|
||||||
lda.pred=predict(lda.fit, test_data)
|
|
||||||
mean(lda.pred$class!=test_data$mpg01,na.rm=T)
|
|
||||||
lda.pred
|
|
||||||
lda.pred$posterior[,1]
|
|
||||||
summary(lda.fit)
|
|
||||||
lda.fit
|
|
||||||
lda.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=test_data)
|
|
||||||
lda.fit
|
|
||||||
mean(lda.pred$class!=test_data$mpg01,na.rm=T)
|
|
||||||
lda.pred=predict(lda.fit, test_data)
|
|
||||||
mean(lda.pred$class!=test_data$mpg01,na.rm=T)
|
|
||||||
head(lda.pred)
|
|
||||||
lda.fit=lda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data)
|
|
||||||
lda.pred=predict(lda.fit, test_data)
|
|
||||||
head(lda.pred)
|
|
||||||
mean(lda.pred$class!=test_data$mpg01,na.rm=T)
|
|
||||||
qda.fit=qda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data)
|
|
||||||
qda.fit
|
|
||||||
qda.class=predict(qda.fit,test_data)$class
|
|
||||||
qda.class=predict(qda.fit,test_data,na.rm=T)$class
|
|
||||||
qda.class=predict(qda.fit,test_data)$class
|
|
||||||
qda.class
|
|
||||||
mean(qda.pred$class!=test_data$mpg01,na.rm=T)
|
|
||||||
qda.pred=predict(qda.fit,test_data)
|
|
||||||
qda.pred=predict(qda.fit,test_data,na.rm=T)
|
|
||||||
mean(qda.pred$class!=test_data$mpg01,na.rm=T)
|
|
||||||
glm.fit=glm(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data,family=binomial)
|
|
||||||
glm.probs=predict(glm.fit,test_data,type="response")
|
|
||||||
glm.pred=rep(0,199)
|
|
||||||
glm.pred[glm.probs>.5]=1
|
|
||||||
table(glm.pred,test_data$mpg01)
|
|
||||||
mean(glm.pred!=test_data$mpg01)
|
|
||||||
library(class)
|
|
||||||
?cbind
|
|
||||||
?knn
|
?knn
|
||||||
knn.fit = knn(train_data,test_data,auto$mpg01[training_indices])
|
cl <- factor(c(rep("s",25), rep("c",25), rep("v",25)))
|
||||||
knn.fit = knn(train_data,test_data,auto$mpg01[training_indices],k=1)
|
cl
|
||||||
knn.fit = knn(train_data,test_data,auto$mpg01[training_indices],k=1)
|
length(cl)
|
||||||
?knn
|
length(train)
|
||||||
training_indices
|
nrows(train)
|
||||||
train_bools
|
nrow(train)
|
||||||
knn.fit = knn(train_data,test_data,auto$mpg01[train_bools],k=1)
|
|
||||||
sdf = (mpg01<1)
|
|
||||||
sdf = (auto$mpg01<1)
|
|
||||||
sdf
|
|
||||||
train_bools
|
|
||||||
cbind(horsepower,displacement)
|
|
||||||
cbind(train_data$horsepower,displacement)
|
|
||||||
cbind(train_data$horsepower,train_data$displacement)
|
|
||||||
cbind(auto$horsepower,auto$displacement)[train_bools]
|
|
||||||
cbind(auto$horsepower,auto$displacement)[train_bools,]
|
|
||||||
cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,]
|
|
||||||
cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,]
|
|
||||||
train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,]
|
|
||||||
test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,]
|
|
||||||
train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,]
|
|
||||||
test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,]
|
|
||||||
train.mpg01 = auto[train_bools]
|
|
||||||
train.mpg01 = auto$mpg01[train_bools]
|
|
||||||
test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,]
|
|
||||||
train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,]
|
|
||||||
test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,]
|
|
||||||
train.mpg01 = auto$mpg01[train_bools]
|
|
||||||
set.seed(56)
|
|
||||||
knn.pred = knn(train.X,test.X,train.mpg01,k=1)
|
|
||||||
?cbind
|
|
||||||
?Knn
|
|
||||||
?knn
|
|
||||||
train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,]
|
|
||||||
test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,]
|
|
||||||
train.mpg01 = auto$mpg01[train_bools]
|
|
||||||
train.X = train.X[!is.na(train.X)]
|
|
||||||
test.X = data.frame(test.X,
|
|
||||||
train.mpg01 = train.mpg01[!is.na(train.mpg01)]
|
|
||||||
knn.pred = knn(train.X,test.X,train.mpg01,k=1)
|
|
||||||
length(train.mpg01)
|
|
||||||
length(test.X)
|
|
||||||
text.X
|
|
||||||
test.X
|
|
||||||
test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,]
|
|
||||||
length(test.X)
|
|
||||||
test.X
|
|
||||||
knn.pred = knn(train.X,test.X,train.mpg01,k=1)
|
|
||||||
train.X
|
train.X
|
||||||
train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,]
|
train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,]
|
||||||
train.X
|
train.X
|
||||||
|
test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,]
|
||||||
test.X
|
test.X
|
||||||
knn.pred = knn(train.X,test.X,train.mpg01,k=1)
|
|
||||||
?knn
|
|
||||||
length(train.X)
|
|
||||||
length(train.X[1,])
|
|
||||||
length(train.X[,1])
|
|
||||||
?knn
|
|
||||||
plot(auto)
|
|
||||||
train.X = cbind(auto$horsepower,auto$displacement)[train_bools,]
|
|
||||||
test.X = cbind(auto$horsepower,auto$displacement)[!train_bools,]
|
|
||||||
train.mpg01 = auto$mpg01[train_bools]
|
|
||||||
knn.pred = knn(train.X,test.X,train.mpg01,k=1)
|
|
||||||
train.X
|
train.X
|
||||||
test.X
|
train.mpg01 = auto$mpg01[train_bools]
|
||||||
train.mpg01
|
train.mpg01
|
||||||
|
length(train.mpg01)
|
||||||
|
nrow(train.X)
|
||||||
|
knn(train.X,train.Y,train.mpg01,K=1)
|
||||||
|
knn(train.X,train.Y,train.mpg01,k=1)
|
||||||
|
knn(train.X,test.X,train.mpg01,k=1)
|
||||||
|
train.X
|
||||||
|
na.omit(train.X)
|
||||||
|
?na.omit
|
||||||
|
na.omit(train.X)
|
||||||
|
na.omit(train.X)
|
||||||
|
knn(na.omit(train.X),test.X,train.mpg01,k=1)
|
||||||
|
knn(na.omit(train.X),test.X,na.omit(train.mpg01),k=1)
|
||||||
|
knn(na.omit(train.X),na.omit(test.X),na.omit(train.mpg01),k=1)
|
||||||
|
train.mpg012 = na.omit(auto$mpg01)[train_bools]
|
||||||
|
train.mpg012
|
||||||
|
train.mpg01
|
||||||
|
nrow(train)
|
||||||
|
na.omit(auto)
|
||||||
|
auto
|
||||||
|
na.omit(auto)
|
||||||
|
summary(auto)
|
||||||
|
summary(na.omit(auto))
|
||||||
|
Auto = na.omit(auto)
|
||||||
|
auto = na.omit(auto)
|
||||||
|
ncol(auto)
|
||||||
|
nrow(auto)
|
||||||
|
auto <- na.omit(auto)
|
||||||
|
train_bools <- (auto$year %% 2 == 0)
|
||||||
|
train_data = auto[train_bools,]
|
||||||
|
test_data = auto[!train_bools,]
|
||||||
|
train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,]
|
||||||
|
test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,]
|
||||||
|
train.mpg01 = auto$mpg01[train_bools]
|
||||||
knn.pred = knn(train.X,test.X,train.mpg01,k=1)
|
knn.pred = knn(train.X,test.X,train.mpg01,k=1)
|
||||||
|
mean(knn.pred != auto$mpg01)
|
||||||
|
mean(knn.pred != test_data$mpg01)
|
||||||
|
knn.pred = knn(train.X,test.X,train.mpg01,k=2)
|
||||||
|
mean(knn.pred != test_data$mpg01)
|
||||||
|
knn.pred = knn(train.X,test.X,train.mpg01,k=3)
|
||||||
|
mean(knn.pred != test_data$mpg01)
|
||||||
|
knn.pred = knn(train.X,test.X,train.mpg01,k=4)
|
||||||
|
mean(knn.pred != test_data$mpg0)
|
||||||
|
knn.pred
|
||||||
|
length(knn.pred)
|
||||||
|
dim(knn.pred)
|
||||||
|
length(test_data)
|
||||||
|
ncol(test_data)
|
||||||
|
nrow(test_data)
|
||||||
q()
|
q()
|
||||||
|
113
hw3/answers
113
hw3/answers
@ -85,6 +85,7 @@ Part B: Choose one of Questions 10 or 11
|
|||||||
given car gets high or low gas mileage based on the Auto data
|
given car gets high or low gas mileage based on the Auto data
|
||||||
set.
|
set.
|
||||||
|
|
||||||
|
──────────────────────────────────────────────────────────────────────────
|
||||||
(a) Create a binary variable, mpg01 , that contains a 1 if mpg
|
(a) Create a binary variable, mpg01 , that contains a 1 if mpg
|
||||||
contains a value above its median, and a 0 if mpg contains a
|
contains a value above its median, and a 0 if mpg contains a
|
||||||
value below its median. You can compute the median using the
|
value below its median. You can compute the median using the
|
||||||
@ -92,6 +93,9 @@ Part B: Choose one of Questions 10 or 11
|
|||||||
data.frame() function to create a single data set containing
|
data.frame() function to create a single data set containing
|
||||||
both mpg01 and the other Auto variables.
|
both mpg01 and the other Auto variables.
|
||||||
|
|
||||||
|
> auto$mpg01=rep(0,397)
|
||||||
|
> auto$mpg01[auto$mpg>median(auto$mpg)]=1
|
||||||
|
|
||||||
> auto$mpg01
|
> auto$mpg01
|
||||||
[1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 1 1 1 1 0 0 0 0 0 1 1 1 1 0 0 0 0
|
[1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 1 1 1 1 0 0 0 0 0 1 1 1 1 0 0 0 0
|
||||||
[38] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
[38] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
@ -106,6 +110,7 @@ Part B: Choose one of Questions 10 or 11
|
|||||||
[371] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1
|
[371] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1
|
||||||
|
|
||||||
|
|
||||||
|
──────────────────────────────────────────────────────────────────────────
|
||||||
(b) Explore the data graphically in order to investigate the
|
(b) Explore the data graphically in order to investigate the
|
||||||
associ- ation between mpg01 and the other features. Which of the
|
associ- ation between mpg01 and the other features. Which of the
|
||||||
other features seem most likely to be useful in predicting mpg01
|
other features seem most likely to be useful in predicting mpg01
|
||||||
@ -119,6 +124,16 @@ Part B: Choose one of Questions 10 or 11
|
|||||||
Displacement is on the cusp and the other variables don't
|
Displacement is on the cusp and the other variables don't
|
||||||
have a terribly useful relationship with this median.
|
have a terribly useful relationship with this median.
|
||||||
|
|
||||||
|
The boxplots indicate that acceleration really isn't a great
|
||||||
|
predictor of mpg01, but displacement is. It also confirms
|
||||||
|
horsepower and weight as good predictors, and cylinders also
|
||||||
|
seems to be very strong, even though I didn't take that from
|
||||||
|
the scatter plots.
|
||||||
|
|
||||||
|
I will use mpg01 ~ horsepower + weight + cylinders + displacement
|
||||||
|
|
||||||
|
|
||||||
|
──────────────────────────────────────────────────────────────────────────
|
||||||
(c) Split the data into a training set and a test set.
|
(c) Split the data into a training set and a test set.
|
||||||
|
|
||||||
Seems like a 50/50 random sampling is appropriate enough.
|
Seems like a 50/50 random sampling is appropriate enough.
|
||||||
@ -127,87 +142,133 @@ Part B: Choose one of Questions 10 or 11
|
|||||||
> train_bools = rep(F,length(auto$mpg))
|
> train_bools = rep(F,length(auto$mpg))
|
||||||
> train_bools[training_indices]=T
|
> train_bools[training_indices]=T
|
||||||
> head(train_bools)
|
> head(train_bools)
|
||||||
[1] FALSE TRUE FALSE FALSE TRUE FALSE
|
[1] TRUE TRUE TRUE FALSE TRUE FALSE
|
||||||
> length(train_bools)
|
> length(train_bools)
|
||||||
[1] 397
|
[1] 397
|
||||||
> train_data = auto[train_bools,]
|
> train_data = auto[train_bools,]
|
||||||
> test_data = auto[!train_bools,]
|
> test_data = auto[!train_bools,]
|
||||||
|
|
||||||
|
Actually, I changed this now, because a solution I found
|
||||||
|
online suggested a different test split and I was having
|
||||||
|
trouble with the KNN model, so I followed their style. I used:
|
||||||
|
|
||||||
|
> train <- (auto$year %% 2 == 0)
|
||||||
|
|
||||||
|
and then the rest the same
|
||||||
|
|
||||||
|
──────────────────────────────────────────────────────────────────────────
|
||||||
(d) Perform LDA on the training data in order to predict mpg01
|
(d) Perform LDA on the training data in order to predict mpg01
|
||||||
using the variables that seemed most associated with mpg01 in
|
using the variables that seemed most associated with mpg01 in
|
||||||
(b). What is the test error of the model obtained?
|
(b). What is the test error of the model obtained?
|
||||||
|
|
||||||
> lda.fit
|
> lda.fit
|
||||||
Call:
|
Call:
|
||||||
lda(mpg01 ~ horsepower + weight + acceleration + displacement,
|
lda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data)
|
||||||
data = train_data)
|
|
||||||
|
|
||||||
Prior probabilities of groups:
|
Prior probabilities of groups:
|
||||||
0 1
|
0 1
|
||||||
0.5431472 0.4568528
|
0.4666667 0.5333333
|
||||||
|
|
||||||
Group means:
|
Group means:
|
||||||
horsepower weight acceleration displacement
|
horsepower weight cylinders displacement
|
||||||
0 129.08411 3557.757 14.55981 269.729
|
0 131.96939 3579.827 6.755102 268.4082
|
||||||
1 79.64444 2345.233 16.39222 116.800
|
1 77.96429 2313.598 4.071429 111.7188
|
||||||
|
|
||||||
Coefficients of linear discriminants:
|
Coefficients of linear discriminants:
|
||||||
LD1
|
LD1
|
||||||
horsepower 0.005678626
|
horsepower 0.0060634365
|
||||||
weight -0.001137499
|
weight -0.0011442212
|
||||||
acceleration -0.014950459
|
cylinders -0.6390942259
|
||||||
displacement -0.007401647
|
displacement 0.0004517291
|
||||||
|
|
||||||
|
|
||||||
Error Rate against test data:
|
|
||||||
|
***Test Data Error Rate:
|
||||||
> mean(lda.pred$class!=test_data$mpg01,na.rm=T)
|
> mean(lda.pred$class!=test_data$mpg01,na.rm=T)
|
||||||
[1] 0.1179487
|
[1] 0.1428571
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
──────────────────────────────────────────────────────────────────────────
|
||||||
(e) Perform QDA on the training data in order to predict mpg01
|
(e) Perform QDA on the training data in order to predict mpg01
|
||||||
using the variables that seemed most associated with mpg01 in
|
using the variables that seemed most associated with mpg01 in
|
||||||
(b). What is the test error of the model obtained?
|
(b). What is the test error of the model obtained?
|
||||||
|
|
||||||
> qda.fit=qda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data)
|
|
||||||
> qda.fit
|
> qda.fit
|
||||||
Call:
|
Call:
|
||||||
qda(mpg01 ~ horsepower + weight + acceleration + displacement,
|
lda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data)
|
||||||
data = train_data)
|
|
||||||
|
|
||||||
Prior probabilities of groups:
|
Prior probabilities of groups:
|
||||||
0 1
|
0 1
|
||||||
0.5431472 0.4568528
|
0.4666667 0.5333333
|
||||||
|
|
||||||
Group means:
|
Group means:
|
||||||
horsepower weight acceleration displacement
|
horsepower weight cylinders displacement
|
||||||
0 129.08411 3557.757 14.55981 269.729
|
0 131.96939 3579.827 6.755102 268.4082
|
||||||
1 79.64444 2345.233 16.39222 116.800
|
1 77.96429 2313.598 4.071429 111.7188
|
||||||
|
|
||||||
|
Coefficients of linear discriminants:
|
||||||
|
LD1
|
||||||
|
horsepower 0.0060634365
|
||||||
|
weight -0.0011442212
|
||||||
|
cylinders -0.6390942259
|
||||||
|
displacement 0.0004517291
|
||||||
|
|
||||||
Error Rate:
|
***Test Data Error Rate:
|
||||||
> mean(qda.pred$class!=test_data$mpg01,na.rm=T)
|
> mean(qda.pred$class!=test_data$mpg01,na.rm=T)
|
||||||
[1] 0.1025641
|
[1] 0.1428571
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
──────────────────────────────────────────────────────────────────────────
|
||||||
(f) Perform logistic regression on the training data in order to
|
(f) Perform logistic regression on the training data in order to
|
||||||
pre- dict mpg01 using the variables that seemed most associated
|
pre- dict mpg01 using the variables that seemed most associated
|
||||||
with mpg01 in (b). What is the test error of the model obtained?
|
with mpg01 in (b). What is the test error of the model obtained?
|
||||||
|
|
||||||
> glm.fit=glm(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data,family=binomial)
|
> glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial)
|
||||||
> glm.probs=predict(glm.fit,test_data,type="response")
|
> glm.probs=predict(glm.fit,test_data,type="response")
|
||||||
> glm.pred=rep(0,199)
|
> glm.pred=rep(0,199)
|
||||||
> glm.pred[glm.probs>.5]=1
|
> glm.pred[glm.probs>.5]=1
|
||||||
|
|
||||||
|
***Test Data Error Rate:
|
||||||
> mean(glm.pred!=test_data$mpg01)
|
> mean(glm.pred!=test_data$mpg01)
|
||||||
[1] 0.120603
|
[1] 0.1407035
|
||||||
|
|
||||||
|
|
||||||
|
──────────────────────────────────────────────────────────────────────────
|
||||||
(g) Perform KNN on the training data, with several values of K,
|
(g) Perform KNN on the training data, with several values of K,
|
||||||
in order to predict mpg01 . Use only the variables that seemed
|
in order to predict mpg01 . Use only the variables that seemed
|
||||||
most associated with mpg01 in (b). What test errors do you
|
most associated with mpg01 in (b). What test errors do you
|
||||||
obtain? Which value of K seems to perform the best on this data
|
obtain? Which value of K seems to perform the best on this data
|
||||||
set?
|
set?
|
||||||
|
|
||||||
|
The knn method can't handle the NA values, so
|
||||||
|
|
||||||
|
> set.seed(1)
|
||||||
|
> auto <- na.omit(auto)
|
||||||
|
> train_bools <- (auto$year %% 2 == 0)
|
||||||
|
> train_data = auto[train_bools,]
|
||||||
|
> test_data = auto[!train_bools,]
|
||||||
|
|
||||||
|
> train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,]
|
||||||
|
> test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,]
|
||||||
|
> train.mpg01 = auto$mpg01[train_bools]
|
||||||
|
|
||||||
|
***Test Data Error Rates:
|
||||||
|
k = 1
|
||||||
|
> mean(knn.pred != test_data$mpg01)
|
||||||
|
[1] 0.1483516
|
||||||
|
k = 2
|
||||||
|
> mean(knn.pred != test_data$mpg01)
|
||||||
|
[1] 0.1593407
|
||||||
|
k = 3
|
||||||
|
> mean(knn.pred != test_data$mpg01)
|
||||||
|
[1] 0.1648352
|
||||||
|
k = 4
|
||||||
|
> mean(knn.pred != test_data$mpg0)
|
||||||
|
[1] 0.1813187
|
||||||
|
|
||||||
|
k = 1 looks like the best, since the error rate increases with k.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user