> getwd() > setwd("Desktop/college-scorecard-raw-data-030216/") > list.files() > mydata = read.csv("merged_2013_PP.csv") > df = data.frame(mydata) > head(df) > nrow(df) [1] 7793 > ncol(df) [1] 1729 > df1 = df[,c(rep(TRUE, 900),rep(FALSE, 829))] df2 = df[,c(TRUE, rep(FALSE, 899),rep(TRUE, 829))] > write.csv(df1, file = "df1.csv") > write.csv(df2, file = "df2.csv") write.csv(names(df), file = "col_names.csv") > df[2692,21] [1] NULL Levels: NULL > df[2692,20] [1] NULL Levels: NULL > names(df)[20] [1] "LOCALE" > names(df)[20:21] [1] "LOCALE" "locale2" > df[2692,20:21] LOCALE locale2 2692 NULL NULL > > reading = df[df[,2]==19610,] > reading['normDist']=exp(-reading['mi_to_zipcode']) > zips = df[,2] > length(zips) [1] 1506704 > zips = unique(zips) > length(zips) [1] 29246 > write.csv(zips,"all_zip_codes.csv") dat = readLines("columns.csv") smalldf = df[,dat] Column descriptions 1 = id 4 = Name of college 5 = City 5 = State 7 = zip (or zip+4)