2017-02-09 08:17:03 +00:00
|
|
|
auto = read.table("auto.data",header=T,na.strings="?")
|
|
|
|
auto$mpg01=rep(0,397)
|
|
|
|
auto$mpg01[auto$mpg>median(auto$mpg)]=1
|
2017-02-10 03:59:23 +00:00
|
|
|
library(ISLR)
|
2017-02-09 08:17:03 +00:00
|
|
|
library(MASS)
|
|
|
|
library(class)
|
2017-02-10 03:59:23 +00:00
|
|
|
train_bools <- (auto$year %% 2 == 0)
|
|
|
|
train_data = auto[train_bools,]
|
|
|
|
test_data = auto[!train_bools,]
|
|
|
|
help(knn)
|
|
|
|
help(knn)
|
|
|
|
train <- rbind(iris3[1:25,,1], iris3[1:25,,2], iris3[1:25,,3])
|
|
|
|
test <- rbind(iris3[26:50,,1], iris3[26:50,,2], iris3[26:50,,3])
|
|
|
|
train
|
|
|
|
test
|
2017-02-09 08:17:03 +00:00
|
|
|
?knn
|
2017-02-10 03:59:23 +00:00
|
|
|
cl <- factor(c(rep("s",25), rep("c",25), rep("v",25)))
|
|
|
|
cl
|
|
|
|
length(cl)
|
|
|
|
length(train)
|
|
|
|
nrows(train)
|
|
|
|
nrow(train)
|
2017-02-09 08:17:03 +00:00
|
|
|
train.X
|
|
|
|
train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,]
|
|
|
|
train.X
|
2017-02-10 03:59:23 +00:00
|
|
|
test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,]
|
2017-02-09 08:17:03 +00:00
|
|
|
test.X
|
2017-02-10 03:59:23 +00:00
|
|
|
train.X
|
2017-02-09 08:17:03 +00:00
|
|
|
train.mpg01 = auto$mpg01[train_bools]
|
2017-02-10 03:59:23 +00:00
|
|
|
train.mpg01
|
|
|
|
length(train.mpg01)
|
|
|
|
nrow(train.X)
|
|
|
|
knn(train.X,train.Y,train.mpg01,K=1)
|
|
|
|
knn(train.X,train.Y,train.mpg01,k=1)
|
|
|
|
knn(train.X,test.X,train.mpg01,k=1)
|
2017-02-09 08:17:03 +00:00
|
|
|
train.X
|
2017-02-10 03:59:23 +00:00
|
|
|
na.omit(train.X)
|
|
|
|
?na.omit
|
|
|
|
na.omit(train.X)
|
|
|
|
na.omit(train.X)
|
|
|
|
knn(na.omit(train.X),test.X,train.mpg01,k=1)
|
|
|
|
knn(na.omit(train.X),test.X,na.omit(train.mpg01),k=1)
|
|
|
|
knn(na.omit(train.X),na.omit(test.X),na.omit(train.mpg01),k=1)
|
|
|
|
train.mpg012 = na.omit(auto$mpg01)[train_bools]
|
|
|
|
train.mpg012
|
2017-02-09 08:17:03 +00:00
|
|
|
train.mpg01
|
2017-02-10 03:59:23 +00:00
|
|
|
nrow(train)
|
|
|
|
na.omit(auto)
|
|
|
|
auto
|
|
|
|
na.omit(auto)
|
|
|
|
summary(auto)
|
|
|
|
summary(na.omit(auto))
|
|
|
|
Auto = na.omit(auto)
|
|
|
|
auto = na.omit(auto)
|
|
|
|
ncol(auto)
|
|
|
|
nrow(auto)
|
|
|
|
auto <- na.omit(auto)
|
|
|
|
train_bools <- (auto$year %% 2 == 0)
|
|
|
|
train_data = auto[train_bools,]
|
|
|
|
test_data = auto[!train_bools,]
|
|
|
|
train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[train_bools,]
|
|
|
|
test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$acceleration)[!train_bools,]
|
|
|
|
train.mpg01 = auto$mpg01[train_bools]
|
2017-02-09 08:17:03 +00:00
|
|
|
knn.pred = knn(train.X,test.X,train.mpg01,k=1)
|
2017-02-10 03:59:23 +00:00
|
|
|
mean(knn.pred != auto$mpg01)
|
|
|
|
mean(knn.pred != test_data$mpg01)
|
|
|
|
knn.pred = knn(train.X,test.X,train.mpg01,k=2)
|
|
|
|
mean(knn.pred != test_data$mpg01)
|
|
|
|
knn.pred = knn(train.X,test.X,train.mpg01,k=3)
|
|
|
|
mean(knn.pred != test_data$mpg01)
|
|
|
|
knn.pred = knn(train.X,test.X,train.mpg01,k=4)
|
|
|
|
mean(knn.pred != test_data$mpg0)
|
|
|
|
knn.pred
|
|
|
|
length(knn.pred)
|
|
|
|
dim(knn.pred)
|
|
|
|
length(test_data)
|
|
|
|
ncol(test_data)
|
|
|
|
nrow(test_data)
|
2017-02-09 08:17:03 +00:00
|
|
|
q()
|
2017-02-10 04:12:09 +00:00
|
|
|
qda.fit
|
|
|
|
fit.qda
|
|
|
|
qda.fit
|
|
|
|
auto
|
|
|
|
qda.fit = qda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data)
|
|
|
|
import(MASS)
|
|
|
|
qda.fit = qda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data)
|
|
|
|
import(class)
|
|
|
|
library(MASS)
|
|
|
|
qda.fit = qda(mpg01 ~ horsepower + weight + cylinders + displacement, data = train_data)
|
|
|
|
qda.fit
|
|
|
|
> mean(qda.pred$class!=test_data$mpg01,na.rm=T)
|
|
|
|
mean(qda.pred$class!=test_data$mpg01,na.rm=T)
|
|
|
|
qda.fit=qda(mpg01 ~ horsepower + weight + acceleration + displacement,data=train_data)
|
|
|
|
qda.pred=predict(qda.fit,test_data,na.rm=T)
|
|
|
|
mean(qda.pred$class!=test_data$mpg01,na.rm=T)
|
|
|
|
qda.fit=qda(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data)
|
|
|
|
qda.pred=predict(qda.fit,test_data,na.rm=T)
|
|
|
|
mean(qda.pred$class!=test_data$mpg01,na.rm=T)
|
|
|
|
qda.fit
|
|
|
|
qda.pred=predict(qda.fit,test_data,na.rm=T)
|
|
|
|
mean(qda.pred$class!=test_data$mpg01,na.rm=T)
|
|
|
|
glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial)
|
|
|
|
glm.probs=predict(glm.fit,test_data,type="response")
|
|
|
|
glm.pred=rep(0,199)
|
|
|
|
glm.pred[glm.probs>.5]=1
|
|
|
|
mean(glm.pred!=test_data$mpg01)
|
|
|
|
glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial)
|
|
|
|
glm.probs=predict(glm.fit,test_data,type="response")
|
|
|
|
glm.pred=rep(0,length(test_data)
|
|
|
|
glm.pred[glm.probs>.5]=1
|
|
|
|
mean(glm.pred!=test_data$mpg01)
|
|
|
|
glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial)
|
|
|
|
glm.probs=predict(glm.fit,test_data,type="response")
|
|
|
|
glm.pred=rep(0,length(test_data))
|
|
|
|
glm.pred[glm.probs>.5]=1
|
|
|
|
mean(glm.pred!=test_data$mpg01)
|
|
|
|
glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial)
|
|
|
|
glm.probs=predict(glm.fit,test_data,type="response")
|
|
|
|
glm.pred=rep(0,length(test_data))
|
|
|
|
glm.pred[glm.probs>.5]=1
|
|
|
|
mean(glm.pred!=test_data$mpg01)
|
|
|
|
glm.pred
|
|
|
|
glm.pred=rep(0,length(test_data))
|
|
|
|
glm.pred
|
|
|
|
test_data
|
|
|
|
glm.fit=glm(mpg01 ~ horsepower + weight + cylinders + displacement,data=train_data,family=binomial)
|
|
|
|
glm.probs=predict(glm.fit,test_data,type="response")
|
|
|
|
glm.pred=rep(0,nrow(test_data))
|
|
|
|
glm.pred[glm.probs>.5]=1
|
|
|
|
mean(glm.pred!=test_data$mpg01)
|
|
|
|
set.seed(1)
|
|
|
|
auto <- na.omit(auto)
|
|
|
|
train_bools <- (auto$year %% 2 == 0)
|
|
|
|
train_data = auto[train_bools,]
|
|
|
|
test_data = auto[!train_bools,]
|
|
|
|
train.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$cylinders)[train_bools,]
|
|
|
|
test.X = cbind(auto$horsepower,auto$displacement,auto$weight,auto$cylinders)[!train_bools,]
|
|
|
|
train.mpg01 = auto$mpg01[train_bools]
|
|
|
|
knn.pred = knn(train.X,test.X,train.mpg01,k=1)
|
|
|
|
mean(knn.pred != test_data$mpg01)
|
|
|
|
knn.pred = knn(train.X,test.X,train.mpg01,k=2)
|
|
|
|
mean(knn.pred != test_data$mpg01)
|
|
|
|
knn.pred = knn(train.X,test.X,train.mpg01,k=3)
|
|
|
|
mean(knn.pred != test_data$mpg01)
|
|
|
|
knn.pred = knn(train.X,test.X,train.mpg01,k=4)
|
|
|
|
mean(knn.pred != test_data$mpg0)
|
|
|
|
import(class)
|
|
|
|
library(class)
|
|
|
|
knn.pred = knn(train.X,test.X,train.mpg01,k=1)
|
|
|
|
mean(knn.pred != test_data$mpg01)
|
|
|
|
knn.pred = knn(train.X,test.X,train.mpg01,k=2)
|
|
|
|
mean(knn.pred != test_data$mpg01)
|
|
|
|
knn.pred = knn(train.X,test.X,train.mpg01,k=3)
|
|
|
|
mean(knn.pred != test_data$mpg01)
|
|
|
|
knn.pred = knn(train.X,test.X,train.mpg01,k=4)
|
|
|
|
mean(knn.pred != test_data$mpg0)
|
|
|
|
q()
|