diff --git a/hw1/.RData b/hw1/.RData new file mode 100644 index 0000000..353da23 Binary files /dev/null and b/hw1/.RData differ diff --git a/hw1/.Rhistory b/hw1/.Rhistory new file mode 100644 index 0000000..8056b14 --- /dev/null +++ b/hw1/.Rhistory @@ -0,0 +1,60 @@ +Auto = read.table("Auto.data") +dim(Auto) +range(Auto) +Auto = na.omit(Auto) +fix(Auto) +range(Auto) +dim(Auto) +Auto = read.csv("Auto.csv",header=T,na.strings="?") +Auto = read.table("Auto.data",header=T,na.strings="?") +dim(Auto) +range(Auto) +Auto=na.omit(Auto) +range(Auto) +Auto[1:4,] +Auto +range(Auto) +names(Auto) +fix(Auto) +plot(cylinders,mpg) +plot(cylinders,mpg) +attach(Auto) +plot(cylinders,mpg) +names() +names(Auto) +range(mpg) +range(names(Auto)) +sapply() +sapply(1:3) +sapply(1:3,range(x))) +sapply(1:3,range(x)) +sapply(names(),range(x)) +help(sapply) +sapply(names(Auto),range(x)) +sapply(names(Auto),range(X)) +help(sapply) +sapply(names(Auto),range) +help(sapply) +sapply(Auto,range) +cylinders=as.factor(cylinders) +lapply(Auto,class) +auto = Auto +lapply(auto,class) +summary(Auto) +plot(Auto) +pairs(Auto) +sapply(Auto,range) +origin <- as.factor(origin) +cols.qlt = names(auto) +cols.qlt +cols.qlt = names(auto) %in% c("name,"origin") +cols.qlt2 = names(auto) %in% c("name,"origin") +cols.qlt = names(auto) %in% c("name","origin") +cols.qlt +lapply(auto[, !cols.qlt], range) +lapply(auto[, !cols.qlt], function(x){ c('mean'=mean(x), 'sd'=sd(x)))}) +lapply(auto[, !cols.qlt], function(x){ c('mean'=mean(x), 'sd'=sd(x))}) +lapply(auto[, !cols.qlt], function(x){ c('mu'=mean(x), 'sigma'=sd(x))}) +lapply(auto[-(10:85), !cols.qlt], function(x){ c('mean'=mean(x), 'sd'=sd(x))}) +lapply(auto[-(10:85), !cols.qlt], function(x){ c('mu'=mean(x), 'sigma'=sd(x))}) +q() diff --git a/hw1/answers b/hw1/answers index 8abcf15..70f0d47 100644 --- a/hw1/answers +++ b/hw1/answers @@ -252,7 +252,11 @@ remains? scatterplots or other tools of your choice. Create some plots highlighting the relationships among the predictors. Comment on your findings. + I'll just make all the graphs, included at auto_pairs.png. There are a number of uncorrelated predictors, it seems, but many relationships can also be discerned. Mpg and cylinders; mpg and displacement; mpg and horsepower; mpg and weight; mpg and year, even; horsepower and displacement; really, there are many relationships, but the interesting ones are probably with the mpg. The strong linear relationships between horsepower, weight, and displacement make sense because they're pretty much correlated by design, as engineers make larger engines to handle more weight and so on. The relationships between this overall trend, is that as they increase, mpg decreases. We also see that mpg increases as the year increases, i.e., as we develop more sophisticated technology. + (f) Suppose that we wish to predict gas mileage ( mpg ) on the basis of the other variables. Do your plots suggest that any of the other variables might be useful in predicting mpg ? Justify your answer. +Well, I pretty much just answered that. The year is a great predictor: it appears we will likely continue to improve mpg slowly and in a linear fashion with time. There is a non-linear relationship that gives a strong mpg response as weight/displacement/horsepower decrease, so it's quite clear that these are a strong predictor of mpg. There's also a relationship with cylinders, but again, this is really just part of the trend of vehicles with more weight being designed with larger engines. Finally, it also seems that origin "3" makes cars with slightly better gas mileage than origin "2" and again 2 makes cars with better mpg than origin "1". I can't find it in the text, but I assume origin 3 is Japan, 2 is Europe, and 1 is US, just based on my own personal bias about society. +