# Intro -------------------------------------------------------------------
# "Data Manipulation" means many things. It could actually be the title of this
# course! In this lecture we look at some common functions and strategies for
# manipulating data.
# Let's work with data from lecture 2.
load("../data/datasets_L02.Rda")
# The apply family of functions -------------------------------------------
#
# R is famous (or perhaps infamous) for its apply functions. Put simply the
# apply functions allow you to "apply" functions to groups of data. The four
# we'll focus on are:
# - apply()
# - lapply()
# - sapply()
# - tapply()
# apply() applies a function over columns or rows of a matrix (or array);
# lapply() applies a function over a list or vector; it returns a list;
# sapply() is like lapply() except it attempts to "simplify" output;
# tapply() applies a function to groups within a vector and returns a table
# of results.
# apply
# The syntax for apply is apply(X, MARGIN, FUN, ...) where X is a matrix, MARGIN
# is the row or column indicator (1 or 2), FUN is the function to apply, and ...
# are additional arguments for FUN.
# Example: Make a matrix called "x"
x <- cbind(x1 = rep(1:3,3), x2 = c(4:1, 2:5, 10))
x
## x1 x2
## [1,] 1 4
## [2,] 2 3
## [3,] 3 2
## [4,] 1 1
## [5,] 2 2
## [6,] 3 3
## [7,] 1 4
## [8,] 2 5
## [9,] 3 10
class(x)
## [1] "matrix"
# apply a function across rows
apply(x, 1, sum)
## [1] 5 5 5 2 4 6 5 7 13
# apply a function down columns
apply(x, 2, mean)
## x1 x2
## 2.000000 3.777778
# apply the mean function to the columns with the trim argument set to 0.2. Trim
# is the fraction (0 to 0.5) of observations to be trimmed from each end of x
# before the mean is computed.
apply(x, 2, mean, trim=.2)
## x1 x2
## 2.000000 3.285714
# apply the range function; this returns a matrix
apply(x, 2, range)
## x1 x2
## [1,] 1 1
## [2,] 3 10
# apply will work on data frames as well, but internally R converts it to a
# matrix first.
x <- as.data.frame(x)
class(x)
## [1] "data.frame"
apply(x, 2, mean)
## x1 x2
## 2.000000 3.777778
# By the way, R has built-in functions for calculating row and column sums and
# means:
colSums(x)
## x1 x2
## 18 34
rowSums(x)
## [1] 5 5 5 2 4 6 5 7 13
colMeans(x)
## x1 x2
## 2.000000 3.777778
rowMeans(x)
## [1] 2.5 2.5 2.5 1.0 2.0 3.0 2.5 3.5 6.5
# From the help manual: "These functions are equivalent to use of apply
# with FUN = mean or FUN = sum with appropriate margins, but are a lot faster."
# lapply
# The syntax for lapply is lapply(X, FUN, ...) where X is a vector or list, FUN
# is the function to apply, and ... are additional arguments for FUN.
# Example: make a list called "y". runif() generates random numbers from a
# uniform distribution. rnorm() generates random numbers from a normal dist'n.
y <- list(a=runif(10), b=rnorm(12), c=rnorm(15,mean=10,sd=2))
y
## $a
## [1] 0.16288853 0.13024549 0.43165707 0.79290806 0.04327717 0.11457253
## [7] 0.71970024 0.42679461 0.56654121 0.68265006
##
## $b
## [1] -0.82546748 1.59137759 -0.92957692 0.88255394 -0.90977392
## [6] -0.76758337 -0.65939087 -0.53950564 -0.01493112 -0.19115840
## [11] 0.63843027 1.03275926
##
## $c
## [1] 11.430433 10.432822 6.740472 9.945333 11.111744 10.126952 10.795949
## [8] 15.756198 7.651695 9.180152 10.423016 12.549550 10.753232 10.390357
## [15] 6.616830
# since these are random numbers, your list object will differ from mine.
class(y)
## [1] "list"
# Now we use lapply to "apply" functions to the list components.
# number of elements in each list item.
lapply(y, length)
## $a
## [1] 10
##
## $b
## [1] 12
##
## $c
## [1] 15
# mean of each component
lapply(y, mean)
## $a
## [1] 0.4071235
##
## $b
## [1] -0.05768889
##
## $c
## [1] 10.26032
# standard deviation of each component
lapply(y, sd)
## $a
## [1] 0.2795103
##
## $b
## [1] 0.8775297
##
## $c
## [1] 2.266138
# get the 2nd element of each component
lapply(y, `[`, 2)
## $a
## [1] 0.1302455
##
## $b
## [1] 1.591378
##
## $c
## [1] 10.43282
# That last one may seem weird. It turns out subsetting brackets are actually
# functions.
# the following are identical
y$a[2]
## [1] 0.1302455
`[`(y$a,2)
## [1] 0.1302455
# Obviously the first is easier to read, but the second implies we can use it as
# an apply function.
# Actually just about everything is a function in R. Even the math operators:
`+`(3,4)
## [1] 7
# We can create our own functions when using apply. Below we apply a function
# that calculates the standard error:
lapply(y, function(x) sd(x)/sqrt(length(x)))
## $a
## [1] 0.08838892
##
## $b
## [1] 0.253321
##
## $c
## [1] 0.5851144
# What we just did was create an "anonymous" function on the fly. We wrote a
# temporary program to calculate the standard error of the 3 list elements in y.
# A digression on writing Functions in R ----------------------------------
# R comes with many functions, such as mean, median, sd, cor, and so on. But R
# allows you to easily write your own functions. To do so, you use the
# function() function, with arguments of your own creation.
# Here's a simple (and useless) function that takes a number and adds 1 to it:
add1 <- function(num) num + 1
add1(num = 4)
## [1] 5
add1(num = c(0,1,1,2))
## [1] 1 2 2 3
# In the function argument we define an argument called "num". After that we
# create an expression that takes the value of "num". Sort of like f(x) = x + 1.
# We can write functions with more than one argument. Here's a function that
# calculates body mass index (BMI) in pounds:
bmi <- function(weight, height) (weight/(height^2))*703
bmi(weight=215, height=69)
## [1] 31.74648
# We can add yet another argument (with a default) to specify metric or English
# units:
bmi <- function(weight, height, metric=TRUE){
if(metric==TRUE) weight/(height^2)
else (weight/(height^2))*703
}
bmi(215,69,metric=FALSE)
## [1] 31.74648
# The if() function evaluates a condition. If TRUE, it executes whatever
# follows. The else statement that follows the if() executes in the event the
# condition in the if() function is FALSE.
# We can also add a check to return a specific error message if someone tries
# to use bmi() with something other than numbers:
bmi <- function(weight, height, metric=TRUE){
if(!is.numeric(weight) || !is.numeric(height)){
stop("please enter numbers")
}
if(metric==TRUE) weight/(height^2)
else (weight/(height^2))*703
}
# bmi(weight = 215, "69", metric = FALSE) returns an error with the message
# "please enter numbers"
# is.numeric returns TRUE if a vector is numeric. !is.numeric returns TRUE if a
# vector is not numeric. The stop() function stops a function and returns an
# error message.
# And down the rabbit hole we go! R programming is quite powerful. However, you
# can be an effective data wrangler and analyst without being a prolific R
# developer/programmer. On the other hand, I encourage you to learn more. The
# Art of R Programming by Normal Matloff is a great book to get you started. But
# for this class, this is enough programming. Our goal is to quickly and
# efficiently get data into R and prepare it for analysis. We can almost always
# do that with existing functions or simple functions of our own creation.
# Once we create a function we can use it by itself or use it with an apply
# function. Below we create a function called "spread" that calculates the
# difference between the maximum and minimum values in a vector. The diff()
# function is a base R function that calculates lagged differences.
spread <- function(x) diff(range(x))
# use on a vector
spread(y$a)
## [1] 0.7496309
# use with lapply on a list
lapply(y, spread)
## $a
## [1] 0.7496309
##
## $b
## [1] 2.520955
##
## $c
## [1] 9.139368
# Recall that data frames are actually lists, so we can use lapply on data
# frames. Let's use spread on the weather data frame. Now, obviously spread
# won't work on non-numeric functions, so we need to select only numeric
# columns. Let's modify our spread function to only work for numeric vectors or
# otherwise give a message. To do this, we'll need to use {} braces so our
# function can have multiple lines of code. We also add na.rm=TRUE to the range
# function to make it drop NA values before calculating the range.
spread <- function(x) {
if(!is.numeric(x)) "Not a number"
else diff(range(x, na.rm = TRUE))
}
lapply(weather, spread)
## $EST
## [1] "Not a number"
##
## $Max.TemperatureF
## [1] 70
##
## $Mean.TemperatureF
## [1] 66
##
## $Min.TemperatureF
## [1] 87
##
## $Max.Dew.PointF
## [1] 72
##
## $MeanDew.PointF
## [1] 79
##
## $Min.DewpointF
## [1] 81
##
## $Max.Humidity
## [1] 65
##
## $Mean.Humidity
## [1] 71
##
## $Min.Humidity
## [1] 80
##
## $Max.Sea.Level.PressureIn
## [1] 1.05
##
## $Mean.Sea.Level.PressureIn
## [1] 1.02
##
## $Min.Sea.Level.PressureIn
## [1] 1.23
##
## $Max.VisibilityMiles
## [1] 3
##
## $Mean.VisibilityMiles
## [1] 7
##
## $Min.VisibilityMiles
## [1] 10
##
## $Max.Wind.SpeedMPH
## [1] 28
##
## $Mean.Wind.SpeedMPH
## [1] 14
##
## $Max.Gust.SpeedMPH
## [1] 53
##
## $PrecipitationIn
## [1] "Not a number"
##
## $CloudCover
## [1] 8
##
## $Events
## [1] "Not a number"
##
## $WindDirDegrees
## [1] 359
# Recall the allStocks list. It's a list of 7 data frames. We can apply
# functions to each data frame using lapply:
lapply(allStocks, dim)
## $bbby.csv
## [1] 251 6
##
## $flws.csv
## [1] 251 6
##
## $foxa.csv
## [1] 251 6
##
## $ftd.csv
## [1] 115 6
##
## $tfm.csv
## [1] 251 6
##
## $twx.csv
## [1] 251 6
##
## $viab.csv
## [1] 251 6
lapply(allStocks, head, n = 3)
## $bbby.csv
## ï..Date Open High Low Close Volume
## 1 26-Mar-14 67.76 68.05 67.18 67.25 1785164
## 2 25-Mar-14 67.61 67.93 67.34 67.73 1571625
## 3 24-Mar-14 67.73 68.00 66.99 67.26 1742341
##
## $flws.csv
## ï..Date Open High Low Close Volume
## 1 26-Mar-14 5.63 5.71 5.43 5.52 158853
## 2 25-Mar-14 5.43 5.67 5.34 5.57 255168
## 3 24-Mar-14 5.68 5.74 5.40 5.40 219552
##
## $foxa.csv
## ï..Date Open High Low Close Volume
## 1 4-Apr-14 34.01 34.09 32.82 32.87 18400889
## 2 3-Apr-14 33.75 34.15 33.46 33.74 17763197
## 3 2-Apr-14 32.94 33.74 32.83 33.59 18810055
##
## $ftd.csv
## ï..Date Open High Low Close Volume
## 1 26-Mar-14 32.12 32.75 31.69 31.79 168393
## 2 25-Mar-14 31.84 32.10 31.17 31.77 147837
## 3 24-Mar-14 31.73 32.45 31.21 31.60 100213
##
## $tfm.csv
## ï..Date Open High Low Close Volume
## 1 26-Mar-14 34.19 34.50 33.31 33.46 611916
## 2 25-Mar-14 34.19 34.54 33.66 34.04 498802
## 3 24-Mar-14 34.51 34.82 33.76 34.08 807884
##
## $twx.csv
## ï..Date Open High Low Close Volume
## 1 4-Apr-14 67.29 67.69 66.42 66.51 3889885
## 2 3-Apr-14 67.22 67.61 66.72 66.75 4714218
## 3 2-Apr-14 66.23 67.16 66.02 67.06 5394136
##
## $viab.csv
## ï..Date Open High Low Close Volume
## 1 4-Apr-14 87.41 87.75 84.48 84.74 2801522
## 2 3-Apr-14 87.73 88.03 86.78 87.16 2039187
## 3 2-Apr-14 87.43 87.86 86.55 87.37 3017323
# column means for all but 1st column, rounded to 2 decimal places
lapply(allStocks, function(x) round(colMeans(x[,-1]),2))
## $bbby.csv
## Open High Low Close Volume
## 72.08 72.71 71.50 72.08 2055076.43
##
## $flws.csv
## Open High Low Close Volume
## 5.58 5.69 5.46 5.58 200264.26
##
## $foxa.csv
## Open High Low Close Volume
## 32.42 32.79 32.14 32.45 12481889.39
##
## $ftd.csv
## Open High Low Close Volume
## 32.08 32.60 31.62 32.11 224690.97
##
## $tfm.csv
## Open High Low Close Volume
## 45.01 45.62 44.42 45.00 742273.83
##
## $twx.csv
## Open High Low Close Volume
## 63.73 64.26 63.23 63.73 4817040.01
##
## $viab.csv
## Open High Low Close Volume
## 78.32 79.05 77.65 78.32 2777436.55
# Proportion of times stock closed higher than it opened?
lapply(allStocks, function(x) mean(x$Close > x$Open))
## $bbby.csv
## [1] 0.5099602
##
## $flws.csv
## [1] 0.4501992
##
## $foxa.csv
## [1] 0.501992
##
## $ftd.csv
## [1] 0.4956522
##
## $tfm.csv
## [1] 0.501992
##
## $twx.csv
## [1] 0.4780876
##
## $viab.csv
## [1] 0.5219124
# Biggest change in Open and Closing price:
lapply(allStocks, function(x)max(x$Close - x$Open))
## $bbby.csv
## [1] 2.66
##
## $flws.csv
## [1] 0.54
##
## $foxa.csv
## [1] 3.65
##
## $ftd.csv
## [1] 1.65
##
## $tfm.csv
## [1] 2.06
##
## $twx.csv
## [1] 2.09
##
## $viab.csv
## [1] 3.93
# Largest volume on what day:
lapply(allStocks, function(x) x[which.max(x$Volume),])
## $bbby.csv
## ï..Date Open High Low Close Volume
## 53 9-Jan-14 72.22 72.26 68.83 69.75 17536166
##
## $flws.csv
## ï..Date Open High Low Close Volume
## 187 28-Jun-13 6.17 6.24 6.07 6.19 935794
##
## $foxa.csv
## ï..Date Open High Low Close Volume
## 202 18-Jun-13 31.25 32.07 30.96 31.56 35906383
##
## $ftd.csv
## ï..Date Open High Low Close Volume
## 100 31-Oct-13 34.41 36.39 33.58 35.1 2051481
##
## $tfm.csv
## ï..Date Open High Low Close Volume
## 84 22-Nov-13 42.49 42.58 40.03 40.87 10226520
##
## $twx.csv
## ï..Date Open High Low Close Volume
## 235 1-May-13 59.5 59.97 58.23 59.48 12382842
##
## $viab.csv
## ï..Date Open High Low Close Volume
## 170 2-Aug-13 79.05 80.93 78.35 79.17 9805627
# sapply
# The syntax for sapply is pretty much identical to lapply, except the output is
# simplified.
# Example:
sapply(y,mean)
## a b c
## 0.40712350 -0.05768889 10.26031560
sapply(y,sd)
## a b c
## 0.2795103 0.8775297 2.2661384
# apply a function of our own creation, one that calculates the standard error:
sapply(y, function(x) sd(x)/sqrt(length(x)))
## a b c
## 0.08838892 0.25332101 0.58511442
# Watch what happens when we use sapply() with our spread function:
sapply(weather, spread)
## EST Max.TemperatureF
## "Not a number" "70"
## Mean.TemperatureF Min.TemperatureF
## "66" "87"
## Max.Dew.PointF MeanDew.PointF
## "72" "79"
## Min.DewpointF Max.Humidity
## "81" "65"
## Mean.Humidity Min.Humidity
## "71" "80"
## Max.Sea.Level.PressureIn Mean.Sea.Level.PressureIn
## "1.05" "1.02"
## Min.Sea.Level.PressureIn Max.VisibilityMiles
## "1.23" "3"
## Mean.VisibilityMiles Min.VisibilityMiles
## "7" "10"
## Max.Wind.SpeedMPH Mean.Wind.SpeedMPH
## "28" "14"
## Max.Gust.SpeedMPH PrecipitationIn
## "53" "Not a number"
## CloudCover Events
## "8" "Not a number"
## WindDirDegrees
## "359"
# Notice "simplification" has resulted in everything being converted to
# character! Perhaps not what we wanted.
# tapply
# The syntax for tapply() is tapply(X, INDEX, FUN, ...), where X is usually a
# vector, INDEX is a list of one or more factors, each of same length as X, FUN
# is the function to apply, and ... are additional arguments for FUN.
# A good way to think of tapply is...
# 1. take the 1st vector
# 2. split into groups according to 2nd vector
# 3. apply the given function to each group.
# Example:
# 1. take weather$Mean.TemperatureF
# 2. split into groups according weather$Events
# 3. apply the given function to each group.
tapply(weather$Mean.TemperatureF, weather$Events, mean)
## Fog Fog-Rain
## 52.00515 62.77273 60.56250
## Fog-Rain-Snow Fog-Rain-Thunderstorm Fog-Thunderstorm
## 36.85714 76.00000 74.00000
## Rain Rain-Snow Rain-Thunderstorm
## 62.27174 35.00000 75.87500
## Snow Thunderstorm
## 26.28571 82.50000
# So we see that tapply is a sort of short cut for lapply or sapply when you
# have uneven or "ragged" groups, that is groups of unequal size, that are not
# in a list.
# tapply also allows anonymous functions. For example here's how we can find the
# lowest three minimum temperatures in each Event group
tapply(weather$Min.TemperatureF, weather$Events, function(x)sort(x)[1:3])
## [[1]]
## [1] 16 16 17
##
## $Fog
## [1] 31 32 35
##
## $`Fog-Rain`
## [1] 31 33 39
##
## $`Fog-Rain-Snow`
## [1] 30 30 31
##
## $`Fog-Rain-Thunderstorm`
## [1] 66 67 67
##
## $`Fog-Thunderstorm`
## [1] 61 NA NA
##
## $Rain
## [1] 20 26 26
##
## $`Rain-Snow`
## [1] 28 30 32
##
## $`Rain-Thunderstorm`
## [1] 54 60 61
##
## $Snow
## [1] 12 12 13
##
## $Thunderstorm
## [1] 73 73 NA
# Notice we got a list. Also notice the NAs for the "Fog-Thunderstorm" and
# "Thunderstorm" events. Why is that?
summary(weather$Events)
## Fog Fog-Rain
## 194 22 16
## Fog-Rain-Snow Fog-Rain-Thunderstorm Fog-Thunderstorm
## 7 4 1
## Rain Rain-Snow Rain-Thunderstorm
## 92 4 16
## Snow Thunderstorm
## 7 2
# We can use more than one variable for grouping. We just need to wrap the
# grouping variables in a list. Let's demonstrate with a data set that comes
# with R: mtcars, Motor Trend Car Road Tests from 1974.
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
# The tapply function allows us to calculate means of groups determined by
# multiple grouping levels and returns the result in the form of a table. Here
# we find mean mpg by cyl and am:
with(mtcars, tapply(mpg, list(cyl,am), mean))
## 0 1
## 4 22.900 28.07500
## 6 19.125 20.56667
## 8 15.050 15.40000
# In later lectures we'll see that there are other friendlier, easier-to-use
# functions for aggregation.
# Deleting columns from data frame ----------------------------------------
# To drop unwanted columns from a data frame, assign the NULL value to the
# column. Let's drop the WindDirDegrees column from weather
weather$WindDirDegrees <- NULL
any(names(weather)=="WindDirDegrees")
## [1] FALSE
# remember the election data? It's a mess:
str(electionData)
## Classes 'tbl_df', 'tbl' and 'data.frame': 114 obs. of 84 variables:
## $ State : chr NA "Alabama" "Alaska" "Arizona" ...
## $ Total : chr "Elec Vote" "9" "3" "11" ...
## $ Total : chr "Popular Vote" "2074338" "300495" "2306559" ...
## $ Elec Vote : chr "D" NA NA NA ...
## $ NA : chr "R" "9" "3" "11" ...
## $ NA : chr "O" NA NA NA ...
## $ Pop Vote : chr "D" "2" "2" "2" ...
## $ NA : chr "R" "1" "1" "1" ...
## $ NA : chr "I" "-" "-" "-" ...
## $ Margin of Victory: chr "Votes" "460229" "42036" "208422" ...
## $ NA : chr "% Total Vote" "0.22186789231070347" "0.13988918284830029" "9.0360576078912361E-2" ...
## $ Obama : chr "Democratic" "795696" "122640" "1025232" ...
## $ NA : num NA 0.384 0.408 0.444 0.369 ...
## $ Romney : chr "Republican" "1255925" "164676" "1233654" ...
## $ NA : num NA 0.605 0.548 0.535 0.606 ...
## $ 0 : chr "Independent" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 0 0 0 0 0 ...
## $ Johnson : chr "Libertarian" "12328" "7392" "32100" ...
## $ NA : num NA 0.00594 0.0246 0.01392 0.01522 ...
## $ Stein : chr "Green" "3397" "2917" "7816" ...
## $ NA : num NA 0.00164 0.00971 0.00339 0.0087 ...
## $ Goode : chr "Constitution" "2981" "0" "289" ...
## $ NA : num NA 0.001437 0 0.000125 0 ...
## $ Harris : chr "Socialist Workers" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 ...
## $ Alexander : chr "Socialist" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 ...
## $ Lindsay : chr "Socialism and Liberation" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0.00162 ...
## $ Write-ins : chr "-" "4011" "2870" "7312" ...
## $ NA : num NA 0.00193 0.00955 0.00317 0 ...
## $ Anderson : chr "Justice" "0" "0" "119" ...
## $ NA : num NA 0.00 0.00 5.16e-05 0.00 ...
## $ Hoefling : chr "American Ind." "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 ...
## $ Barr : chr "Peace & Freedom" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 ...
## $ None : chr "-" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 0 0 0 0 0 ...
## $ Carlson : chr "Grassroots" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 0 0 0 0 0 ...
## $ Morstad : chr "Const. Government" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 0 0 0 0 0 ...
## $ Miller : chr "American Third Position" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 ...
## $ Fellure : chr "Prohibition" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 0 0 0 0 0 ...
## $ Stevens : chr "Objectivist" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 ...
## $ White : chr "Socialist Equality" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 ...
## $ Barnett : chr "Reform" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 0 0 0 0 0 ...
## $ Terry : chr "Independent" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 ...
## $ Reed : chr "Independent" "0" "0" "17" ...
## $ NA : num NA 0.00 0.00 7.37e-06 0.00 ...
## $ Litzel : chr "Independent" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 0 0 0 0 0 ...
## $ Tittle : chr "We the People" "0" "0" "6" ...
## $ NA : num NA 0.0 0.0 2.6e-06 0.0 ...
## $ Duncan : chr "Independent" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 0 0 0 0 0 ...
## $ Boss : chr "NSA Did 911" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 0 0 0 0 0 ...
## $ Washer : chr "Reform" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 0 0 0 0 0 ...
## $ Baldwin : chr "Reform" "0" "0" "0" ...
## $ NA : num NA 0 0 0 0 0 0 0 0 0 ...
## $ Christensen : chr "Constitution" "0" "0" "14" ...
## $ NA : num NA 0.00 0.00 6.07e-06 0.00 ...
## $ NA : num NA NA NA NA NA NA NA NA NA NA ...
## $ NA : chr "State" "Alabama" "Alaska" "Arizona" ...
## $ : chr NA "AL" "AK" "AZ" ...
## $ : chr "EV" "9" "3" "11" ...
## $ J : num NA 3 3 3 3 3 3 3 3 4 ...
## $ S : num NA 5 4 4 4 4 4 5 4 3 ...
## $ H : num NA 13 11 15 9 10 11 8 13 11 ...
## $ G : num NA 6 11 6 9 9 5 17 5 11 ...
## $ : chr "State Code" "1" "2" "4" ...
## $ : chr "Blanks" "0" "0" "0" ...
## $ : chr "EV" "9" "3" "11" ...
## $ : chr "Meth" "0" "0" "0" ...
## $ : num NA NA NA NA NA NA NA NA NA NA ...
# It appears to have "junk" columns that consist of nothing but missing values.
# View the data in RStudio by clicking on it in the Environment panel.
# We would like to identify which columns consist of all missing data and then
# drop those columns from the data frame. We can use a combination of is.na(),
# all(), which() and sapply()
# Apply the function all(is.na(x)) to each column of electionData. Notice that
# electionData is a data frame, and that a data frame is a type of list. Thus we
# can use lapply and sapply on data frames.
sapply(electionData, function(x) all(is.na(x)))
## State Total Total Elec Vote
## FALSE FALSE FALSE FALSE
## <NA> <NA> Pop Vote <NA>
## FALSE FALSE FALSE FALSE
## <NA> Margin of Victory <NA> Obama
## FALSE FALSE FALSE FALSE
## <NA> Romney <NA> 0
## FALSE FALSE FALSE FALSE
## <NA> Johnson <NA> Stein
## FALSE FALSE FALSE FALSE
## <NA> Goode <NA> Harris
## FALSE FALSE FALSE FALSE
## <NA> Alexander <NA> Lindsay
## FALSE FALSE FALSE FALSE
## <NA> Write-ins <NA> Anderson
## FALSE FALSE FALSE FALSE
## <NA> Hoefling <NA> Barr
## FALSE FALSE FALSE FALSE
## <NA> None <NA> Carlson
## FALSE FALSE FALSE FALSE
## <NA> Morstad <NA> Miller
## FALSE FALSE FALSE FALSE
## <NA> Fellure <NA> Stevens
## FALSE FALSE FALSE FALSE
## <NA> White <NA> Barnett
## FALSE FALSE FALSE FALSE
## <NA> Terry <NA> Reed
## FALSE FALSE FALSE FALSE
## <NA> Litzel <NA> Tittle
## FALSE FALSE FALSE FALSE
## <NA> Duncan <NA> Boss
## FALSE FALSE FALSE FALSE
## <NA> Washer <NA> Baldwin
## FALSE FALSE FALSE FALSE
## <NA> Christensen <NA> <NA>
## FALSE FALSE FALSE TRUE
## <NA> J
## FALSE FALSE FALSE FALSE
## S H G
## FALSE FALSE FALSE FALSE
##
## FALSE FALSE FALSE TRUE
# But we only want the columns that evaluate to TRUE:
which(sapply(electionData, function(x)all(is.na(x))))
## <NA>
## 72 84
# so we want to drop columns 72 and 84. Save those values into "drop":
drop <- which(sapply(electionData, function(x)all(is.na(x))))
drop
## <NA>
## 72 84
electionData <- electionData[,-drop]
# Deleting rows from data frame -------------------------------------------
# Look again at electionData. Several empty rows read in with all NA.
tail(electionData)
## State Total Total.1 Elec Vote NA NA.1 Pop Vote NA.2 NA.3
## 109 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 110 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 111 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 112 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 113 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 114 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## Margin of Victory NA.4 Obama NA.5 Romney NA.6 0 NA.7 Johnson NA.8
## 109 <NA> <NA> <NA> NA <NA> NA <NA> NA <NA> NA
## 110 <NA> <NA> <NA> NA <NA> NA <NA> NA <NA> NA
## 111 <NA> <NA> <NA> NA <NA> NA <NA> NA <NA> NA
## 112 <NA> <NA> <NA> NA <NA> NA <NA> NA <NA> NA
## 113 <NA> <NA> <NA> NA <NA> NA <NA> NA <NA> NA
## 114 <NA> <NA> <NA> NA <NA> NA <NA> NA <NA> NA
## Stein NA.9 Goode NA.10 Harris NA.11 Alexander NA.12 Lindsay NA.13
## 109 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA
## 110 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA
## 111 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA
## 112 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA
## 113 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA
## 114 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA
## Write-ins NA.14 Anderson NA.15 Hoefling NA.16 Barr NA.17 None NA.18
## 109 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA
## 110 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA
## 111 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA
## 112 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA
## 113 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA
## 114 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA
## Carlson NA.19 Morstad NA.20 Miller NA.21 Fellure NA.22 Stevens NA.23
## 109 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA
## 110 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA
## 111 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA
## 112 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA
## 113 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA
## 114 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA
## White NA.24 Barnett NA.25 Terry NA.26 Reed NA.27 Litzel NA.28 Tittle
## 109 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA <NA>
## 110 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA <NA>
## 111 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA <NA>
## 112 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA <NA>
## 113 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA <NA>
## 114 <NA> NA <NA> NA <NA> NA <NA> NA <NA> NA <NA>
## NA.29 Duncan NA.30 Boss NA.31 Washer NA.32 Baldwin NA.33 Christensen
## 109 NA <NA> NA <NA> NA <NA> NA <NA> NA <NA>
## 110 NA <NA> NA <NA> NA <NA> NA <NA> NA <NA>
## 111 NA <NA> NA <NA> NA <NA> NA <NA> NA <NA>
## 112 NA <NA> NA <NA> NA <NA> NA <NA> NA <NA>
## 113 NA <NA> NA <NA> NA <NA> NA <NA> NA <NA>
## 114 NA <NA> NA <NA> NA <NA> NA <NA> NA <NA>
## NA.34 NA.35 .1 J S H G .2 .3 .4 .5
## 109 NA <NA> <NA> <NA> NA NA NA NA <NA> <NA> <NA> <NA>
## 110 NA <NA> <NA> <NA> NA NA NA NA <NA> <NA> <NA> <NA>
## 111 NA <NA> <NA> <NA> NA NA NA NA <NA> <NA> <NA> <NA>
## 112 NA <NA> <NA> <NA> NA NA NA NA <NA> <NA> <NA> <NA>
## 113 NA <NA> <NA> <NA> NA NA NA NA <NA> <NA> <NA> <NA>
## 114 NA <NA> <NA> <NA> NA NA NA NA <NA> <NA> <NA> <NA>
# which rows have all NAs?
which(apply(electionData,1,function(x)all(is.na(x))))
## [1] 54 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [18] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
## [35] 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
## [52] 107 108 109 110 111 112 113 114
# Let's drop them the same we dropped the columns with all NAs.
drop <- which(apply(electionData,1,function(x)all(is.na(x))))
electionData <- electionData[-drop,]
# Let's look at the tail again.
tail(electionData)
## State Total Total.1 Elec Vote NA NA.1 Pop Vote NA.2 NA.3
## 50 West Virginia 5 670667 <NA> 5 <NA> 2 1 -
## 51 Wisconsin 10 3068434 10 <NA> <NA> 1 2 -
## 52 Wyoming 3 249061 <NA> 3 <NA> 2 1 -
## 53 Total 538 129081071 332 206 0 1 2 -
## 55 Election Date: <NA> 39757 <NA> <NA> <NA> <NA> <NA> <NA>
## 56 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## Margin of Victory NA.4 Obama NA.5 Romney
## 50 179386 0.26747402213020771 238269 0.3552717 417655
## 51 213019 6.942270878239519E-2 1620985 0.5282776 1407966
## 52 101676 0.40823733944696278 69286 0.2781889 170962
## 53 4984698 3.8616800754620324E-2 65916787 0.5106619 60932089
## 55 <NA> <NA> <NA> NA <NA>
## 56 <NA> <NA> <NA> NA <NA>
## NA.6 0 NA.7 Johnson NA.8 Stein
## 50 0.6227457 0 0 6302 0.009396616 4406
## 51 0.4588549 0 0 20439 0.006661053 7665
## 52 0.6864262 0 0 5326 0.021384320 0
## 53 0.4720451 0 0 1275882 0.009884346 469644
## 55 NA 486669 NA <NA> NA <NA>
## 56 NA 3.7702584602819105E-3 NA <NA> NA <NA>
## NA.9 Goode NA.10 Harris NA.11 Alexander
## 50 0.006569579 119 0.0001774353 0 0.000000e+00 0
## 51 0.002498017 4930 0.0016066828 0 0.000000e+00 0
## 52 0.000000000 1452 0.0058298971 0 0.000000e+00 0
## 53 0.003638365 122130 0.0009461496 4117 3.189468e-05 4428
## 55 NA <NA> NA <NA> NA <NA>
## 56 NA <NA> NA <NA> NA <NA>
## NA.12 Lindsay NA.13 Write-ins NA.14 Anderson
## 50 0.000000e+00 0 0.000000e+00 50 7.455265e-05 12
## 51 0.000000e+00 526 1.714229e-04 5170 1.684899e-03 112
## 52 0.000000e+00 0 0.000000e+00 2035 8.170689e-03 0
## 53 3.430402e-05 9403 7.284569e-05 132331 1.025177e-03 43039
## 55 NA <NA> NA <NA> NA <NA>
## 56 NA <NA> NA <NA> NA <NA>
## NA.15 Hoefling NA.16 Barr NA.17 None NA.18
## 50 1.789264e-05 5 7.455265e-06 31 4.622264e-05 0 0.000000e+00
## 51 3.650070e-05 0 0.000000e+00 88 2.867912e-05 0 0.000000e+00
## 52 0.000000e+00 0 0.000000e+00 0 0.000000e+00 0 0.000000e+00
## 53 3.334261e-04 40614 3.146395e-04 67396 5.221215e-04 5770 4.470059e-05
## 55 NA <NA> NA <NA> NA <NA> NA
## 56 NA <NA> NA <NA> NA <NA> NA
## Carlson NA.19 Morstad NA.20 Miller NA.21 Fellure
## 50 0 0.000000e+00 0 0.000000e+00 11 1.640158e-05 0
## 51 0 0.000000e+00 0 0.000000e+00 0 0.000000e+00 0
## 52 0 0.000000e+00 0 0.000000e+00 0 0.000000e+00 0
## 53 3149 2.439552e-05 1094 8.475294e-06 2710 2.099456e-05 518
## 55 <NA> NA <NA> NA <NA> NA <NA>
## 56 <NA> NA <NA> NA <NA> NA <NA>
## NA.22 Stevens NA.23 White NA.24 Barnett
## 50 0.000000e+00 0 0.000000e+00 0 0.000000e+00 0
## 51 0.000000e+00 0 0.000000e+00 553 1.802222e-04 0
## 52 0.000000e+00 0 0.000000e+00 0 0.000000e+00 0
## 53 4.012982e-06 4091 3.169326e-05 1279 9.908502e-06 956
## 55 NA <NA> NA <NA> NA <NA>
## 56 NA <NA> NA <NA> NA <NA>
## NA.25 Terry NA.26 Reed NA.27 Litzel NA.28
## 50 0.000000e+00 3806 0.0056749475 0 0.000000e+00 0 0.00000e+00
## 51 0.000000e+00 0 0.0000000000 0 0.000000e+00 0 0.00000e+00
## 52 0.000000e+00 0 0.0000000000 0 0.000000e+00 0 0.00000e+00
## 53 7.406198e-06 13107 0.0001015408 2910 2.254397e-05 1027 7.95624e-06
## 55 NA <NA> NA <NA> NA <NA> NA
## 56 NA <NA> NA <NA> NA <NA> NA
## Tittle NA.29 Duncan NA.30 Boss NA.31 Washer
## 50 0 0.000000e+00 1 1.491053e-06 0 0.000000e+00 0
## 51 0 0.000000e+00 0 0.000000e+00 0 0.000000e+00 0
## 52 0 0.000000e+00 0 0.000000e+00 0 0.000000e+00 0
## 53 2572 1.992546e-05 12558 9.728770e-05 1008 7.809046e-06 1016
## 55 <NA> NA <NA> NA <NA> NA <NA>
## 56 <NA> NA <NA> NA <NA> NA <NA>
## NA.32 Baldwin NA.33 Christensen NA.34
## 50 0.000000e+00 0 0.000000e+00 0 0.000000e+00
## 51 0.000000e+00 0 0.000000e+00 0 0.000000e+00
## 52 0.000000e+00 0 0.000000e+00 0 0.000000e+00
## 53 7.871022e-06 4990 3.865788e-05 4456 3.452094e-05
## 55 NA <NA> NA <NA> NA
## 56 NA <NA> NA <NA> NA
## NA.35 .1 J S H G .2 .3 .4 .5
## 50 West Virginia WV 5 3 4 21 5 54 0 5 0
## 51 Wisconsin WI 10 3 4 17 6 55 0 10 0
## 52 Wyoming WY 3 3 11 11 5 56 1367 3 0
## 53 Total <NA> 538 3 4 11 6 <NA> 75787 538 <NA>
## 55 <NA> <NA> <NA> NA NA NA NA <NA> <NA> <NA> <NA>
## 56 <NA> <NA> <NA> NA NA NA NA <NA> <NA> <NA> <NA>
# The last three rows contain column summaries which I don't want or stray
# figures not relevant to the main records of the data set. We should drop them.
keep <- 1:(nrow(electionData)-3)
electionData <- electionData[keep,]
# looks better.
tail(electionData)
## State Total Total.1 Elec Vote NA NA.1 Pop Vote NA.2 NA.3
## 47 Vermont 3 299290 3 <NA> <NA> 1 2 -
## 48 Virginia 13 3854489 13 <NA> <NA> 1 2 -
## 49 Washington 12 3125516 12 <NA> <NA> 1 2 -
## 50 West Virginia 5 670667 <NA> 5 <NA> 2 1 -
## 51 Wisconsin 10 3068434 10 <NA> <NA> 1 2 -
## 52 Wyoming 3 249061 <NA> 3 <NA> 2 1 -
## Margin of Victory NA.4 Obama NA.5 Romney
## 47 106541 0.35597915065655383 199239 0.6657055 92698
## 48 149298 3.8733538998295236E-2 1971820 0.5115646 1822522
## 49 464726 0.14868776867563627 1755396 0.5616340 1290670
## 50 179386 0.26747402213020771 238269 0.3552717 417655
## 51 213019 6.942270878239519E-2 1620985 0.5282776 1407966
## 52 101676 0.40823733944696278 69286 0.2781889 170962
## NA.6 0 NA.7 Johnson NA.8 Stein NA.9 Goode
## 47 0.3097264 0 0 3487 0.011650907 594 0.001984697 13
## 48 0.4728310 0 0 31216 0.008098609 8627 0.002238170 13058
## 49 0.4129462 0 0 42202 0.013502410 20928 0.006695854 8851
## 50 0.6227457 0 0 6302 0.009396616 4406 0.006569579 119
## 51 0.4588549 0 0 20439 0.006661053 7665 0.002498017 4930
## 52 0.6864262 0 0 5326 0.021384320 0 0.000000000 1452
## NA.10 Harris NA.11 Alexander NA.12 Lindsay NA.13
## 47 4.343613e-05 0 0.0000000000 0 0 695 0.0023221625
## 48 3.387738e-03 0 0.0000000000 0 0 0 0.0000000000
## 49 2.831852e-03 1205 0.0003855363 0 0 1318 0.0004216904
## 50 1.774353e-04 0 0.0000000000 0 0 0 0.0000000000
## 51 1.606683e-03 0 0.0000000000 0 0 526 0.0001714229
## 52 5.829897e-03 0 0.0000000000 0 0 0 0.0000000000
## Write-ins NA.14 Anderson NA.15 Hoefling NA.16 Barr
## 47 1427 4.767951e-03 1128 3.768920e-03 0 0.000000e+00 9
## 48 7158 1.857056e-03 73 1.893896e-05 0 0.000000e+00 0
## 49 0 0.000000e+00 4946 1.582459e-03 0 0.000000e+00 0
## 50 50 7.455265e-05 12 1.789264e-05 5 7.455265e-06 31
## 51 5170 1.684899e-03 112 3.650070e-05 0 0.000000e+00 88
## 52 2035 8.170689e-03 0 0.000000e+00 0 0.000000e+00 0
## NA.17 None NA.18 Carlson NA.19 Morstad NA.20 Miller NA.21
## 47 3.007117e-05 0 0 0 0 0 0 0 0.000000e+00
## 48 0.000000e+00 0 0 0 0 0 0 0 0.000000e+00
## 49 0.000000e+00 0 0 0 0 0 0 0 0.000000e+00
## 50 4.622264e-05 0 0 0 0 0 0 11 1.640158e-05
## 51 2.867912e-05 0 0 0 0 0 0 0 0.000000e+00
## 52 0.000000e+00 0 0 0 0 0 0 0 0.000000e+00
## Fellure NA.22 Stevens NA.23 White NA.24 Barnett NA.25 Terry
## 47 0 0 0 0 0 0.0000000000 0 0 0
## 48 0 0 0 0 0 0.0000000000 0 0 0
## 49 0 0 0 0 0 0.0000000000 0 0 0
## 50 0 0 0 0 0 0.0000000000 0 0 3806
## 51 0 0 0 0 553 0.0001802222 0 0 0
## 52 0 0 0 0 0 0.0000000000 0 0 0
## NA.26 Reed NA.27 Litzel NA.28 Tittle NA.29 Duncan
## 47 0.000000000 0 0.000000e+00 0 0 0 0.000000e+00 0
## 48 0.000000000 14 3.632129e-06 0 0 1 2.594378e-07 0
## 49 0.000000000 0 0.000000e+00 0 0 0 0.000000e+00 0
## 50 0.005674947 0 0.000000e+00 0 0 0 0.000000e+00 1
## 51 0.000000000 0 0.000000e+00 0 0 0 0.000000e+00 0
## 52 0.000000000 0 0.000000e+00 0 0 0 0.000000e+00 0
## NA.30 Boss NA.31 Washer NA.32 Baldwin NA.33 Christensen NA.34
## 47 0.000000e+00 0 0 0 0 0 0 0 0
## 48 0.000000e+00 0 0 0 0 0 0 0 0
## 49 0.000000e+00 0 0 0 0 0 0 0 0
## 50 1.491053e-06 0 0 0 0 0 0 0 0
## 51 0.000000e+00 0 0 0 0 0 0 0 0
## 52 0.000000e+00 0 0 0 0 0 0 0 0
## NA.35 .1 J S H G .2 .3 .4 .5
## 47 Vermont VT 3 3 6 17 7 50 0 3 0
## 48 Virginia VA 13 3 5 15 4 51 0 13 0
## 49 Washington WA 12 3 4 7 5 53 0 12 0
## 50 West Virginia WV 5 3 4 21 5 54 0 5 0
## 51 Wisconsin WI 10 3 4 17 6 55 0 10 0
## 52 Wyoming WY 3 3 11 11 5 56 1367 3 0
# Adding columns/variables to data frames ---------------------------------
# We often want to add columns to a data frame based on calculations using
# existing columns. This is very easy to do in R. Simply call the data frame
# with a $ operator followed by the name of a new variable. Then assign to it
# the expression that derives your new variables. Let's create a new variable
# called Temp.Range that is equal to the maximum temperature minus the minimum
# temperature for a given day.
weather$Temp.Range <- weather$Max.TemperatureF - weather$Min.TemperatureF
# Notice you have to include the name of the data frame each time you refer to a
# variable. In a moment, we'll present another way where we don't have to do
# this.
# Whenever you derive a new variable it's good to look it over and see if it
# makes sense!
summary(weather$Temp.Range) # summary stats; min = -33?
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -33.00 16.00 19.00 19.46 24.00 38.00
# which record is this?
weather[weather$Temp.Range == -33,]
## EST Max.TemperatureF Mean.TemperatureF Min.TemperatureF
## 340 12/6/2013 66 83 99
## Max.Dew.PointF MeanDew.PointF Min.DewpointF Max.Humidity Mean.Humidity
## 340 63 54 41 100 94
## Min.Humidity Max.Sea.Level.PressureIn Mean.Sea.Level.PressureIn
## 340 87 30.13 29.98
## Min.Sea.Level.PressureIn Max.VisibilityMiles Mean.VisibilityMiles
## 340 29.89 10 6
## Min.VisibilityMiles Max.Wind.SpeedMPH Mean.Wind.SpeedMPH
## 340 2 18 11
## Max.Gust.SpeedMPH PrecipitationIn CloudCover Events Temp.Range
## 340 24 0.7 8 Rain -33
# Any other records less than 0?
weather[weather$Temp.Range < 0,]
## EST Max.TemperatureF Mean.TemperatureF Min.TemperatureF
## 340 12/6/2013 66 83 99
## Max.Dew.PointF MeanDew.PointF Min.DewpointF Max.Humidity Mean.Humidity
## 340 63 54 41 100 94
## Min.Humidity Max.Sea.Level.PressureIn Mean.Sea.Level.PressureIn
## 340 87 30.13 29.98
## Min.Sea.Level.PressureIn Max.VisibilityMiles Mean.VisibilityMiles
## 340 29.89 10 6
## Min.VisibilityMiles Max.Wind.SpeedMPH Mean.Wind.SpeedMPH
## 340 2 18 11
## Max.Gust.SpeedMPH PrecipitationIn CloudCover Events Temp.Range
## 340 24 0.7 8 Rain -33
# a plot against index value is also useful:
plot(weather$Temp.Range)
![](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABUAAAAPACAMAAADDuCPrAAAAY1BMVEUAAAAAADoAAGYAOmYAOpAAZmYAZpAAZrY6AAA6Ojo6kJA6kNtmAABmAGZmOgBmtv+QOgCQ29uQ2/+2ZgC2Zma2kDq225C2/7a2///bkDrb/7bb////tmb/25D//7b//9v///90HtyaAAAACXBIWXMAAB2HAAAdhwGP5fFlAAAgAElEQVR4nO3d64LrNhKu5/LE2V6JvWNnlMN0ppet+7/K6EgCBEACIIBiUe/zw9aSSKAIgl+LOlByBQBUEe0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKwS7QIAwCrRLgAArBLtAgDAKtEuAACsEu0CAMAq0S4AAKySzu1f5O23zj0BwGDSs/Ev8ZGhAM5E+jX984cs/es//boDgMGkW8v//OUn5t9/3P7963+79QcAg0m3lr+DuLxH6p/d+gOAwaRby5fwhP32JJSXQQGchvRq+PZ08/fgzi/O4QGch/Rq+PZsMzxd/+ZtJADnIb0aJkABnJ30arjJKXzwOSgAqNYq3uaIat7iW4M3kbRHG8C5tAu4V0a1bnAS/xhT+Kx0RYc/GAA+lqUAjX6QvuwlUAIUQDuWAvSZmL5f/l3UAgEKoB1TAepeiump9FP0BCiAdowF6HXn5ewIUADt2AvQAiPeMwPwuQhQAKhkN0D//qPsE0wPBCiAdghQAKhEgAJAJUMBGvkUqJR+kp4ABdAOAQoAlQwF6P278AQogOOwFKCP56DT5UR4DRSAMlMB+vga0vvb7wTo+fBhXRhjLEAfPw3/zE0C9HT4ugOssRagj2vaPU7jCdCzmYKTBIUV5gL0/isejx+DJ0BPxolNdhOMMBigj9P43wjQs3H3DfsJNlgM0Mdp/L/+XwL0XAhQ2GMyQJ+n8YU/h/TAgXlcBCjsMRqgj9N4AvRUCFDYYzVAHx8JJUCjjH4WiACFPXYDtMonHJhWP01JgMIeAvRszH6ako8xwR4C9GQMx5DZ6MfnIkBPxvKJsNUXH/C5CNCTsRygVt/+wuciQE/GdIACxhCgJ0OAAuMQoCdjN0A5f4e9WUCAnozZAOUdJBicBQToyVj9GBOfYYLFWUCAno29OXhnNffRksFZQICejrmzoDuzrzygIYOzgAA9H4P5afHQQXMGZwEBiiMweOigOYOzgADFERg8dNCcwVlAgEKN81pD/qFj8QUK5CFArwQocrnvdmUfOibfIkMeAvRKgCKT93mr3A+w2PyQFvLwMaYrAYo8i4MlLxkNHmEoYO/vIwEKHcvTtaxzc4PneChh7hUaAhQ6gizMOXII0LMzlp8EKJRUZSEBimMhQKGDAMUJEKBoLPMkjACFirYvEhCgaCv3bQACFBoav01FgKKp7A+iVH0iiY8xYZ/WH5QiQNFSQcJVTeXW8x+fpflfYAIULZWcY1edTDU+A8Nnaf4aEAGKloomaFUUkp+oR4Duw3HXGe/y4MgI0H04pjsjQHFkBOg+ezZ31Lnj0c5Ri+ohQDFU4dFCgO6zY3NHvXtxtHdJyuohQDFS6dFCgO5Tv7nTLuqcbKP6yVVYj7PUMerHmRUfLc3nJwGau6LMN1tUotxPruJ6jvYHACdWcbS0np8EaPmKPZPhaKfA5fUc7SUInFfN0dJ4fhKg5SsSoBurkJ8YoupoaTs/CdDyFQlQ4AgOMDsJ0PIVCVDgCA4wOwnQ8hXPGqCxc5sDTFG3AF4cqDRk6OITqGPHB5idBGj5iicN0Oir6weYok7/JGilIUMX66RvxweYnQRo7ooy32xRiXI/Kz0vD4D55shyQvECkWHI0MU66dzxAWYnAZq9pixu9KEVE6m5eJjYOsDBYtWQoYt10r1j/dlJgOavOuYEUulENXk2dJQT5wOcrlk1ZOhinfTvWH12EqAF647ZUzozIj3Xj5GfBGi9Eweo+uy0F6CX6W/Ob+Urc+SlHD6fDl/gcZ05QLUZC9Av8ZVm6En3YgOHn+uHL/C4CNB+TAXozx+y9K//FLWgsheHnmXUdrY617XPk541xG9jk60A1Zhs9X1aCtB//vIT8+8/bv/+9b8lTWgceXPaH7mztbk+dAtSCNBqpgJUY7Lt6NNSgH4HcXmP1D9LmlA48mSeTf07r+/MWSGSn5WNtrRSINYNGbpYJxUda0y2PX1aCtBLeMJ+exJa9DLo+ENv6HG/p7PkLDpKch0jx00aMnSxToo71phsu/o0FKC3p5u/B3d+lZ3DawTowN53dZY6jznMufMhXkmwacjQxTop7Vhjsu08atoV8mqxdYNvt2eb4en6d9nbSATo6trRqX6YAE0ViG1Dhi7WSWHHBCgBmu7x4AE6slEghgDlFD7dIwEKrCJAeRMp3eORAjTn1MpfpuhkjLPtY8rdLwP3n4jTmWaAVm2zpQCNf4wpfFa6ggCdHt2eLv4yOWsUNY/xcvfLwP0n4namGKB122wpQKMfpC/7KhIfY1osuDJdxJlYr39NN3LrIEEPJXe/DNx//h/osYfLXMK1cIK7Kzcvp3WDs0di+n75d1ELCsfz0DDJ7Cxnms7LTBP7PdOyq+BF00PJ3S8D95/zjM/5Q70oorfiCe6t27ya1g265ksxPVm4mMhc7WE6yzlRcu5fNLq5IbztdEy5+2Xg/nNn6pSg4w4Xp+eSCe6t2ryY1g0u2Luc3dAJkdVZYYAuGiVAjTIRoGMPF6frggnurdm8ltYN1pMI7ZoOoSZAt9coah7j2QhQHbXbTIB+IgL0IxGga4XEb5es10bzBqMe78iXfYTprn5zFQK4X5fbs2V5TtMrQPO3kb+AWzZHqDRA9w151tr5Adp5/39IgH7L87Og36/RLLyecv3mKjyF7djl5mxZ9N0tQPO3UWEHGLM9QoUBum/I89bODtDe+/8jAvT5tFN++fd8afpBV6Sf9tu4A7hnl96sTTwur7dEn8tsrVHUfLjg5jYq7ABjMkYod7+897o8l6sa8swd5kTiVn7mNFevaIJ76zWvpHWDs/cb8L/+36/kvAy6In3t6O7Qt8v1+Rg7gIpmcP7RM9/Ma5AXVROyRih3J4Z/QLuU81rwuYATpTuaq1cZ0ZYC9Pa88x6bjx+We+Xm95gr0tc+v9+hc5cySfftL7O6RlHzy36C23uW/FR5I5S7ExfLlQ95zas4q09Ac5urVzTBO1bTvMHJ1+uE/TJ/Ael2Uj/iYiLnC9Dr2mRxT6rmZYqmV9bCBGg7mSOUuxOXu75TOW5Xa4sN2f81+WkpQKfL2d2eiU4n7mMuZ3fCAD1C3wRoO81HaF+Djcs57v43FKDTBZXdp51jLqhMgCr3c9wD6CgIUB0WA/R2Dm8vQEtPh5tNGbcrWSjo21ml6lQnt5+9S36q5vOFAM1iK0BfH5y/uKfwNgLUn5abEdZuyrhdLfMz1W6kb2eVjOKza4vf3rPkp2o/X3Y1SIDWt9i6wbfbmXv4eudl0JtIMt/c1cA7yIJ2m3cZdixOGK52H/btNxMstbe4nDcaMpf8VD3my44GG++w4+5/QwF6//zS8jNL34OuSL8zNfz9nzcbmgSV15X7ZHK9+2XffjPzzeq6Ev20WPJTdZsvdQ023mGH3f+WAvR+PeXflncMuiK9TOrW9lvKKmhfl7GO5/9nJKjbt7OYeLfrK4v202TJT9V+vuyc80132FH3v6UAfX4Dfn4SevH/maN+c3ftvqoA3ddlrOP5/1sBuui7W4AWbOMxj58jaT5f9jXYeIcddP+bCtB7gjq/4fFVnJ8dNre42/wAbd7x/P/NAE020zhAcSgDp+ZJ2ArQ27NO55T9q/ya9ATouAANnjIc9DmEEZmjt/dZY/z2SXSYgdYCdCcCdFiABmeAe08JP1vm6O0c5HMHaI8ZSICO7vZDAlTmLiR+Bwpkjt7eQT51gHaZgQTomG5lvrn417iO5yQrzM9FM/PNzDUK1kNU7ah36sekPttGgA7qV5wbA5+MuV25CVrYvd/M4r74Covbp35u013m6O0f5BOfJ/SZgQToqI7fwn8N61gCNc1krU2ANjUsQEdOzcEI0Ab0poU/LQdO0nhwF3fvrJKzNgHa1LgAHTk1xyJAGzjhvDgmArSpgQF6WgRoA91m1aH/bo8qznuW6t4du2OA5RNudwg6j0nj5osCdHffa+PWSb9+ppYJ0AZ6zYRxM63CqOLcfo4QoJEcmHruPCatmy8J0N19r41bJ/36iW8DAVqp3/GyuHEgo4rz+nH6mp4W+Xd09+5xOnico6jzmDRvPnP0nJGvTyPxYvj5z5FTp1vLXWYgAdqkVZlv9uhgj1HFLfoJjonRf2SC/HzUNSTNOzSfOXrTtsrinLW0Jyc2gxOI5vrtDq/lLjOQAG3c6uESdFRxy35kMt25uKOv+Whx0yeMgw7l9Gg+c/S8xar6fgfwdQqa7gnab3f4LfeYgQRo41YJ0Ot8+PmTdWh+ni1Ac0fPXYwAXbTcYQYSoI1bJUA795PrbAE6qu8zB2gHBGjjVo+QHR4ClAAtb4AAreihUYutG2yp4yDKLHMld+HpX+VnGYk1YuX0PJ8ckxqysLbk9P+jBeha8Rsbtrnd1Zvmj6kToOUzMtFy7F9Bnfn95NS10bK/xVWbSYA2ajXzsHbWie29sjYiDUXuFvfO3FbL+xwSoAXDdNwAXSt+Y8O2t7t20xZDOgdo+YxMthz+a0fNWXWtt+w2UbuZBGiTVpf7IWcV54b/L+9GWUPRu2W+M6/Nqj479JPsWJwbOctON6/TUTwv1rrIrebX9vHG/s+YHpWbtpwv07Sc/105Ulsz/VpXc96Rstqyt83FB16y2b2aN9hSp0Pbz8+MXvwdG0u7zEoTM8S7u3Zu1PbZqp9kx958z1tart5x0rnWrIyMFL+RJFlBU7Vp4XwRxztKs5tLtdxu6uRm7krL0WOkOCII0EbtupNtu5vF7JH57sIETZyjLNufimsh0ee1dT/Jjl+jHBQQLB4xptaV5lOjt/XY9sPbfScl5os/blVjlZjpyT6rml1dMNVyupbMGiqWzmqxdYMtdQ/Q65EDNKfB+j7nnhr1k+w4M0BjURA81qXQtea7B2jNpm0G6EaX+S3HHymuOT/xki0ToBX6BaiXfAcL0JyGCnRtPKfj3AA9pP4BWmEl2HZ2mRmg+5pt2gQBuoIAbYAA3YMA3dNcvNmmTRCgK3Zu7trpwK4AfTWrGaDJbVtdJntAc5rfbsNvS2obbVFMbW+bAZreKveBjCmW0cpyLIOWS/d1vHHnwY3ichGgOvYewKkd3yZARTFA09u2ukzugOY0n9HIoq3KRpsUU9vbVoCuTTLnka3ak834D0QGck+AphqfH10vLhsBqmPn8SuLG+5DzgGy3YvTwnK+lVXqNbR5d0Y7K5Panf5Fjec1n9uKd7zXHIhtiqntbW3XeNMoTFenhYz8XNyIPuCOaLTlsokUafy1k14NzN1c5+5rFE/wjSaqmyNAC9aV+WbkwcVkzGzsHQNTI0Fn+Q1t373ZysoQzcsUV9lguk/tiBObr8Era7RVMbW9rY3eNAHCWRSfJ9m9Rh+Y8yzZcsm+jjX+av396NTXu+utNjf7yj5SVpuobo4ArVo3laDi7ouN1tylJZRfVnSNsobWty24v7TKnOYzG4oMU2GCNiumtre10XMfW0bfxqobvcYemJ4ZrrRcsK9jjS8bWDS3K0GL5uBGE7XNEaBV60bakcJdEJ9X16I2vIby7k61Eb+dXqaoynaZJQuvFk0F6NrouRsWCdDcgS8P0HTL+fs6HqCLBpb/2m422VvpkbLaRGVzBGjVug2HbezxvKOIHYW23EY3NI0GaNXiRQ1VBWhmibm9Ztawo78jIECr1iVAmzdf1hYBWtcMAdoYAVq1LgH6/n/Wec+BA3TagsJTuNzFRwWoX09i4eVCvmCBzVpjrcX+tRagRaN+PARo1boE6Ot/eQeAgQDN3ZRgxaLeCovLb2hRT3zhYKHllhcNgr/w2r/SAVo46gdEgBasK/PN3ZX0brZ9EeEyzvGRWim/+VziPlV6tVjUqPiJsrgzOzwyFy/c9PTiqw0t64ku/IpIfwj9/y13bE6lc5NuBzL/K/xTINE2TCJAS1aWxY0mDjGLcorIOkrrm8/lHf7OjcIGrov8lOnRghYyFm8WzSsNhfVEFp6G67nQHJtehnqt5NT5+gsm7zvnhuT9p21ZsMxFzsVtdXhQBGjR2m9tqunbbPsiFsvEzxPrm88loeoGpnuuJcdyyZaXbnp68bVHgtvhwsEDcg3HMtZioky/sFiJyYIjD1hNUAK0bPWa41Wr2fZF+MtUxUh9gWFbyUDJb2C64+re3l4/fnu9t8LiCh6J1ZOMqFTIleVZPECDGZIsePkAATq12LrBlqzupkOyP//HBGh/WfUsF3oH6PTfFgEa+VduGxsdHhQBilr25z8BSoDuRIB27zFxFtO1rWa9rjWUnv+bp27BY7sLrmogdhab00bBke+0md+Jv8zav7brmZZXD9Cg8uVaO6dAMyWVEKC9O2w3L/LbatbrakPJwza5lriaFlzXQCTXsprID1Cn0fxe/GXW/rVdT7xHjQAN64gH6FbP3RVVMixA//5D5Jd/X79+/W/rDkuM3jvTTmgwL/LbatbrekOJQyG91jQrw+N5rZ/dha6uJs8w8RIqu7uM/Jxu+Fu+0ou/MWv/2q4nKGBeyKlj2vJEK2tFPhZe+1ekklgbDY+UfcoqGRSg9/h8BqjIn617LDB45xRNyWZtNet1q6HCo9mJzcdRlt3P7kLXVnSf2Ln35HW4vrCfEO//uk/9tlbKiaaVehYR5S/kbLBEF1jlL7z2r8hWRdqIPKyisJIxAfrMz3uAXkQ1QYcHaLuu89tq1utmQzLJWctNTS9Bdxdc34CEspqIbvlaXe+Qui7+u7rS1e9g+a/tepZr+wsFW563VdEu1/4VVhKpKbFhwxVWMiRA//lL5Nf//vxxC9B7gv7rP637zEaAtu0ydryZCtAgRCSziaykGRegiXoWy4epFmx5xlbFu1z7V1hJWEFqw0Y7YoB+i/x+vT4D9H4S//ue9i/TkP9WUVykuo5OH6BFax00QOf/u7ebGBigOW1tL9/PRiVmCl1bvFEB4V0Xub919ArQ29PR+jeSvvy/mcUZSoD26TJrLQKUAA1u5z080AED9BaZ9+ecrwC9hWDtOfzPH7JU2FTx5r77yX8g1V39SIdd1QfodtnOEjoBuigxZ6CzCo03tCNAc8bSXXi6o3mAJirxxzKyfMYUzlG7i/Kn2kYPpVuytvEHDNC//3i8b/QO0O/aAL2/lOqu+3hrquzZbOnmJgc6b/o1CdBIV9W9bpftLqEboJJZ8VqX3jLRhuoDNKOyQQGa3DTvgdjymyObo3YX5U+1jR5Kt2R1408coN9BXN4jteg9/cLNlfmQkrwHEuvvy8/3/2W6K2uNcMntsr0l6spPruVMWC8//TX8EvMGOqPQVEPu8E5LpHvabjBZl7z/u56fi41Z+9d6JVNAPP8XKWUxxJVqd1H+VNvoIfNgdJdfDMF6oRuN5XRZImxwcQp/qX0N9BIm7y2bi14GLdvctSjIa7F0367VkN/WVlhck2UvlqgrP7lWcua6a8QL2BjozULTDb0fEefGSk/bDabqyo4tf2PW/rVWybOj90qyXF4yojxL7S7Kn2obPeQW4Cw/7+rIKmWTfkSAvpLvFaClmTd55bDvqyyNSwM0sWbygUgTbyUdx7vKbyux5HbZyyXqyk+uJa7EA14B2QO9VehKQxJY6SenwURd+b34y6z9a6WS1yKp5cP/VqrdRflTbaOH/INxWsb5YxJZpWgiDAnQ78fb5c8ALT7rnrxeCVg2XfR6wPAAvRbsi4wactuKL1keoJXlJ9ea52ZQWnh/SYBuFbrWkCysdZPVYKKu/F78Zdb+la7kvUxi+fEBGmzHYr2VSbPaQ/MA3ZpLyd7biDX4+PT8I0Dvn0OqPIO3GaB7teyqIkBHqw7Qsnb3Ux+paO+RAF3e1gvQ2vWGB2iJMQH6+irnU+2HmEyewu9GgMYe2tvufuojFe2dAM3q2FyAPj+BtC8/Vd5Eev43eELf9OiRhVgNtV35bZYGaLyexb3JyquqFO927tZvFxBvaEfhzjG4Z+OXO6isrcjO8B6L3b7WBWiytJxdNK3s793N9WI9BGUkj9J0c/YCdIrQPd+Dj3+Mqeh7oRUBOh3SsnigqslYJ0thDbVdLdosCtB4PcG96cqrqvRvZ259RgHRhvYUvsy8uo331y5tK1y+W4CmS8vYRRKqDNBYGcmjNN2cxQBtIPpB+rJELg3Qx9/M101/P9a1GO/j3cGy7b1dTevOG7HRljhTS5Z3xe6dJm3JgZ+uUrx2M7c+XmpiGT8/N9dLcob0vf3FbSwqKK0nnJrBc7X55nI1mUMsLz9TpRVMKu9G/sTemJROY1lz0Jtl5QdV0Nq+9SMttm5w5r2U+vT8aGm2ws2dhloWQ73r2Fv04PcU1FjfVThFt9ty6/HXjbbYYDJ6bbrN+bmU3vq8IzF+FG+vt96gd/DWNTFVUFqPuyGybO/qT97lA1dn12XMq9XStnZRWOezlfyJvT4pvQ3NGTp3y4sPqrCxfetHWmzdoGu+FNNT74uJzD1dIzut4fjPzwdiCVrXlV9vZluRJeJHz3tI3DldF0T+7UUBORXHbweLLRrKXC+7wfI2lhOqrJ7gD89yjJzjJP1A1rxaL22joegEiezp1f5XJuXKUbrZXM7Gb7W1s4GwxdYNLoy9nJ07zOHf93bjvxKg1V3Fpv12W+ESYwN0nt6ZFecGz7KhvQG6bFAvQFNjlEwJL0HKuoqVtt5QMkALJvbapFw8XNJcVt9bTe1tIWgxuCdy5j3ml5GCbssHbPdRltn+WoDubbumTcUAjdyuaqDXeu3aaBagXaZmq67SAdqsjIFDsaQUoFVPH8tLIUBjt+vXJUCbtkGANirjEwN0SIIGKvd+kL5rcew8FlvMvy8/QNf+GMTLOU6Arg/Xyu21FeMrxYYnZ73tQoNO/DbcVcsrWKyRU09sNqQmQlY5WYUWz6T4BMneS6tlxFo5YYA+roT8erv8Hqa/u3eUeVxS2Tn//+evnl/lvKb209rudx6L71/vvuwAXR4uGeUcJkBXj5a1AM06zJIBurZSvOPM/uaF3KFx7y2tYLlGRj3LuZDuOLecnELbB2hBc+npsae+fYYE6C325o+7fz+is+6aTO83kKZVBwToIwdfN+d5ffVv+Ctc/cXdxZb3iXuorNQo/hOy9+GyWo7XbWzjVsTXDe51Zu9qfkbri7Tpt7++YqIBd6TSKy3Xyyh0uZozNZzS3Y6zK5j26TQrNuuZ5sH8v0TH2eVkFFoTGOEYuTdLilqZHjvq22dIgF68sHz+q/BL7K81397rdg/Q6zQtZUqItd3lH83BYumD1j0Co1W8//u+OTWRLmfPoZN5ME6HrXv8Jlpa/duwaELcdNrYb4sG3oVt7uzlxmT1Fz+I/crzyr4u155mWEY94sXmHKXh8gXlbBZaF8LhyiVjlGzJa2P/H4lKIwJ0cRml5wWUKq5Lfz9/vzfkXNGpf4A60/S9vp8oyQ7Eux0u7kWQdzCk6vaProxyVtvcEF83uHer8vXhCtr0b2+sGDbgpGZegrqFZ/W3XCjc+OyygwoK6nH+aL2W98e/rpzcQqtXjjVTmKCJ6bGnvj0sBejl/brp83fmX7d6B+jV3TMdAjSYXvEanN4XlayXUz+r4usG927O3e2DxR/h6Xb+UeaUUBKgi42pCtDYgOSWHakgt55gsK7+DmgdoLtm0rxybK4UtZmcHrvq22FEgN5Szj+Fv4dfeYC6zVxeCTomQBe3Gwdofg3lAXoEOw6W0hVLA7Siv7KUyi47djsvQP3b+aWpalHUMTZsRIDe4859E+mRg+W/jOQ9kX0lKAF6lImURoBWdkyAdm9jvyEBen/x8pVz948x3U/EvwqvQ3ddvhJweeSwUoDOZwvFAbpY1b1js4bSAM1svJa/MT5vsVR9m+1nreh06Qfo1sYvl4j15zW+bHC50eFyDQI0vh1jA7R8LDcbjN72W1lrc/HY5kKF9eUbEqCPt31mfz4StfhzoH6APi8FqhGgi6MlubAEx0K4qnfHZg2FAZrbeCW3eQnEa+oQoH4Z04LbGx8sERRHmH4AACAASURBVOnPWSbSYLjVy+X2B2hiO4YGaMVYbrYYu+23stbm8rFov+s7r5ExAfr8APzTPThvp/HFT0CXL6U+fp1u0JtI7uruDg7b8xb21rtGVnUmwHph78NR5ptTs2E/izY7TJtF8+8Nc2+EC5YOf9aK3lZ6R981qCS9YqI/fxunW3MLcp22P3L8525vYkNfW+T3763idxdvpnr4wxYKxjK3yUV+OjfW2pwWmDZ+o6h+h8KgAJ2+z/mMu++q7yF9+b/nef820/85IECDYzQ5q5cLL3ZbsGrB3PYOGPGnWHx67D9wMgp634wcz5FFS6dvxoqLrXwXkKgkvWK0P38bIwvJ1YnNR0Nex9nbm9hQt6nldridO1VEmqke/uX6RWOZ22hkYN8DmWxzOSrRDYvuvOz6sg0L0AbuT2Pd563fjynTP0DnY0SuV2+6Jnbc+7HFYsGq/jTPrUEWzSbKKWm8wuIQlfd97m338dRwbXSyteJyKyPDE185OjyxHTY9EhYlr4W8XS0125tYw7s7+dhWx9XD/149frtkidg6y6JiMyrepvccJbVh8Z2XXV8uSwH6fCnVOY1/JOiAAL06+8l5dpGYkd7C7mLBqkUTLzxgJHjMX76g8XIlAbo6XBu9bKwYbGU4PCUHfWyHvR8IikoGaMX2JtZYdBR9zF8m1kz18F+X/bYL0LCo8gBd3TACNOKeoO5zUOft/Tz7N3dHLkUO9uq2Knrr2fxGgPaT2Mo2B/3GMegH6Guptsdq553YqIY2VVYE6Fp/5wpQ500k2fPTnMFvw38RoLm99WyeAH3dJkCjS5T3Q4C6/I8x7fpt433KNvddb6qF9daCtesCNFZEjqJZ7XeS02W7AE32Vl5GcNOZdHkrRpYJ2tgI0GlJ7191u7FlgPabSJGJHXS13Xt8RsXWyg/QcO+HbaVmSJ4hAfotPiMBGh3a7Ckdrl0VoNX7t+TY8zvJ6rJZgCZ7qyhjedObdVkrhsuEbeQE6GLN2t3YMEA7TqTlEpGuMnqPzKjEWtkBGi1ksVRyhuQZEqAXzcz0lGyuzAeKRO7dzs/lCsGqGW3Fi8iRW2jQSV6XXvP+fM/pMtF3zgPpMpZ3T/VED8LoisEyr1WdY2yxP72Ndm5d5y7Fu5nemHgFW1UWttRhIi2WWJv6K737rbxG8PWAv1bWqDj7XNxC4gtFZkieEQFa+mn3jgo2N7WXdhzYwaqbbe05gLIPmXDm5nTpx8NiGmZPxmRvNWUs7vYKC1rJPKafLUz/W/7luPobPR2G/gFZvxuzd2JuO10mkrdEpKu83v1+3KFcrpU1KtMOcHbFYnlnL0VmSJYRAfr3H+VfO+qkKEATq8mkeO1g1a22kkXkyCo06CS7S7d5CWTXmOitqoz03fEE3azVWSa2eLjZ7nLiLpe3MdmbV6rzRPLnQtBVZu9+P96/1vfuZnOJ5d1ZUZmgBGjOorG/W+tNJdaOHVprbe2a91mFBp3kdxmf7VldJvrOeWC1jOTdscMjbz9Oy8QWX262ePe5i2VuzGoFO/SeSN72B13l9r7cZ+78Kq3I2RHJ5Y0EaPDZIz1NArT/2q2bKelkSJeJvnMeKGp77+FR1kZyIPcEaBsD9+qOAN1saG8h4RIWArTq94/66BOgkT9vq2vnP6mITkZv5fy2Mjs5RYBOTz5kuuO68kwkerffhr9EvJnl8uEDDfZWneZ7Nb0l/ijs6D028Bt9bz0VDnbjNXeGrBfZUqTB45zDdwlQ/0jbXju2fHYRi5UL2srs5AwBKsGwvA6PxHDF96B7r79EvBnvXvdBCRVu0G6t92p6SxajsKN3L88if47W0vu5z4JeF6tGArR0Fw0J0PsXkY7xHLRHgE5j7e/k5NrR5XOLWKxc0lZmJycIUHFibm7i+QTQW+K6+Jdz93QQegfkdGekGeewFG8gpzqmw3rP3qrTeK+m591yFHb07o6UP5yJvhePRZ99eMuFAVp8QI0I0NeV7GYmPkjvHUzFy6XXzm03uvDi30VtZXbSps26vnMeyG/zHXzX4OgI/mgEd79Xnebs3EZ6j8d7E7et6Ugu26Td2u7V1dntj8KO3svb2pjJsTuyZshqkVmLFQgbtBmg82DK6lqJv67Jtcv+Gi+aWazc6HmF30nmdjeyMk61ZfgpKPON5K4K73YOrFcj7v9jzYTPl8Q9SuV9TMtivUGa7tX0vAtHYUfv7k6YOlqb88Hh4fcarpo5Q1ZrzFmqRNig0QC9zgXnNrl4XhNdu3Af+c30CdCgk5ztbiXZW3UZbqiJ20pNgC7bcB5Z9hXU7PxruTHDA7TpXl0P0FhXdb0vBn6j7/CxRa+RVfNmyGqJOUuVaN5gS3X7L7tJb9HE2qX7aG0KtArQRa3tjrTyvnMe2GwwaMI7BMOFwrudfv02rn5FiwBND+RiY8YHaMu9uhGgsa7qevcHfqPvyGPL3REumjVDVivMWapE8wZb6jBvywOxbPm1lZsF6Kms/EmL3t4I0CAy1wJ0rVd3h9neW1sBGnukVUdFAVr0cNYSa2u00bzBllQDNPwjrBKgy7/KEtZ1OEUl2gjQfaOuu89Sz85bBejUpvc0UjZ7+JAA/ft/2ngNtLzJ1eYjx4xGgPpliKuwmHHKSrQVoO6i2ZT3WeSvb0a8FTTvtlrQw3kD1L8iqJE3kXKblPlmxnLOrC+vZtFZbt+xFpzJeXVuHJFfc/7i4fll9IHY3c5w+EPzSMFYM9u9enu+bJMW9q29n7h/fd0bNTMy1friaXZODxu9bxdXXv6YAL14+XmuAM2czd7Bk7F8XmfFbQVRMM/PLoPTQvGsTo5K4oHY3W4kOAkq3t8tr5mCXkVKNyneYJ8JnVuBMzDz7Nk1u5crLvJzu4eN3reLqzigshYrEGnQ/0Uk/9fdx+oy3+YNy+s5a/nMzkrbWhzxTmoeN0FrzqsSo5J4IHa3O2GXS8Sbyeg13lHOJvnt7Vm7ieWGeAlaNCMjTce6ee6EzR42et8urrT8IQF6efwY8dfjF4kvmvnZabrlDHlsWrTorLCtzwjQ9KgkHojd7R9K/hLxZrZ7jR2eJgM0nIbX9bEpaTnsRV7nW9s9bPS+XVxh+SMC9J+/Ht+E/36euhf+jmZbehFxhEl/DWfnNXYIHMxBhq4l+wF69WdMw9kTTNHoI0cxIkBfF2P6+eOXf18fcXqyU/jingnQIgcZupYI0NVmndsH2dakQQH65/y/16m8ju67QDzLe0dVkTY8QJNnRPmnSu6So4au8ERuT/PlJ6YHCZXq2bM+uNUB2nmfpTpt3mJwzys53xem//lD79J2vUdXFpb3jqlixegAXW769gPrTQwauvzq9jf/6mOty8VjxgN0Y3BrA7TzPkv22rzF4J73U8/L86nnzx/n+hiT17xc33ty+ufzf8+wkgFFrHKm17Mg56Dsk59Bv+sPxJqYh3Fcfi5utG4+2KS1LpeP+TtRTeXs2RrcYIo6/9rRbCcjAvT91PP1yx7fJ/scqNu6vP/rROXyYBn8J3LBn2du1neoKzn78yPA+8MzaOg6B1Rsk9a6DB9TCoulqtmzPbjBFA1WrGq2jxEB+n7j/Vse7yJp/kJS7wCN/Td1Iq/EL0NcHfqK304/EG9i8NB1PkWObdJal5HHDjeX8ivJGNzUFN3ZbBdDAvTnj0d0Pn/Y41tO+ybSeoBec+bBAH4ZPfOzWYAOHroRARrshnSXsccONpcKKskZ3PgU3d1sD0MC9PFVzt/nb3Se9WNMGwHavf/DaRagOUu2MyRA/dulAWpXp605d4D+89cjNV+Xptd7AkqAjkWA5jZPgB6z2aJ+G7UYvff78fLn+6momoMHaHCykn9+1OacrvGZ4UqAzv3sC9DyijfWWDycLLu+HwI0djtr1fTwbzebs/NqZlP+spkttm6wpWMHqMhiBwZ35K9apU0rbnup204/uwK0vOKNNZYPJ8uu74cAjd3OWXNl+Debzdl5FfOfAG3aurz/66Soszs289O/EdyRv2qVNq3EGowE0fvx7SflsSaWj2ZXvLHGa+fJVZw9WNPZysKxTVrbzOwJZEL11qwP/1azOTuvZv6rBOj/ddLPgc4J6kRp9n4JpkD+VGtziHU4UKOb7uXTrr8N5RVvrLEM9mXZ2Z2tLhzbpKzNLDiwj6tya/YlZM7Oq5r/CgGqeTmm3hNQFpb35tb2OnrjD2asWqXHqWJs052/Mtv5ubpkecUba6ztsaLO1heON58ekPzBsqBuazaHf7XZnJ1XNf/7B+jXfZPmj87fL6583gB19qK7J7MmzCkDNLbpzpPPrINoLVjit9faWl3DPaWOl53Z2WY/4SatDciZ8rNya7aHf30At9Y+aIBOF6N/vvd+/zzTqQN0h3MGaNd++gVo5OGWAYpi+0bUaoC+Pvl5d//0/OPZqOI3OQ89lwnQAS0RoFZ9ZoB+PZPz/sTz1/9qP/28lm3u6NMmp6ewa8sButwaAnR6wJd4eKOrD/GRAfr6BtL1/gn6X/59eT8R1VOwucl53YubMEHXhgM02BoC9H3/aoIOn4DH9qkB+jpf//lD/jdR/RbnQ/7mTvN22AR2exT3/9fNup0ad1TbppVkq7ER3dlPeUsba6w9XNRZxsLvRcS5EWuBBH3YN2ty1q7qoW+A/v3HOzIfL4Y+v86pqeAPi8w3e1SS7lPCI2rzGGpztPU4ZmMD2a6f8pY21lh7uKizzYXd/JwWjaw/cAIe275Zk7N2TQ8jA1TxzaO3qmfmIxP0JXVH/qptCtgvOpDt+ilvaWONtYeLOtta2E3NSIKqTMBj2zdrctau6GFggOo//zx6gF6D3Ze/P9skUvP8TAxku37KW9pYY+3hos42+5n/T4Bm2TdrctaumU111ay06Nz2ArTRE9D3RUWrXlA9eIBqd93BubamIQL0FIwF6Jf4SjOUAB3tXFvTEAF6CqYCdPpa06zwU6V5m7t4Il/zrH/H2Ub1oeMMS0W3nRAECc4bFtM+axagx5sHZ2UpQB8fxHcS8/HOflmrVS//r64UD609UVZ76Hh/WMq77YQATQgCVNoF6AHnwVlZCtDvIC7nD+rnFiebi1xfU1mur/m3lZ+LG+l7MznrlOene+MQKrfm/KbZ5e624OHnzbqm66YfinQP0JjKb3NewhXnhM4sTjIWkff/3hN7c+FF0ztDo2r+O7H5rvwYOJoT3n+o3/tt8Wevftz4mzWQoQC9Pd0Mf06p8EfmswL0/f+3zPbi07YyQbe7DitxX7Q90JFTszUfITwwEg8Xtxu/jQ4MBeitsfB0/bussYIAveZM3z4BmtV1WMlBA7Rmaz7DeoBWjxsBOlDfAG1qdIDmLN4pQCscOECR1mOqEKADGQrQwafwWYsToNiFADXOUIAOexPp+d+s0ye1AA3qI0AzHO/FhCMF6MborL3aYFKTTbEUoPGPMYXPSlfkBmjuRNEK0LA+AnTbAQ//AwXoxujIwp4KD6HNplgK0OgH6cvekMoKULmK8zR0c+Gw6fi9DU0diBOaU7HkZ1xk1NT1mCp1bW6MjjhPLJILWdJoNlgK0Nib+oWXeMrZ3CmJJCOJ4ruh86EaPULcuW1/enfQ/c9alR5TpabNjdFxYvN9cFQWdxCtZsP4AP35Y8fPIs2XYnrqcjGRufmMNbyFN+5tJfXCgfTt1riDvrfSY59VtLkxOm5qniJBW80GYwF6HXI5O3f2ZbyPFJuqXYMssfPJz1UHDdAuU6W8TQJ0fzttbDW4N0ALSETeevHbR3H0+o6JUVtDgO5vp43mDdYjQDFj1NYQoPvbaaN5gy0dK0DXQn39sfjtwq52nTvuWTln3V3FJVqM3/5My/E9WoDu3v8bDRCgVQ4VoGtPi1efMpfXF2tutYuaBluuu6u4VJPx2x8pGN+DBeju/b/VAAFaJTtuylaoMvUS2clrj1XUF2tuvYuKBluuu6u4rTbJz8j4bozO8+HHfwflZ1BW2wZazQYCNL6YLG70KEXmmwWPeY/n1RdrbtcE2rNyzrp9sm7EXrVhdULER2dKUCdKu9m9/zMaaDQbhgXo6/eMlH8bPndzZTKklGU3m+cXZfXFmtt1CrNn5Zx1O51tD9irNkTHd2N0ZGF4fa0baLMpgwL0e662+jNMLS4umr25/afJrgAtq48AndoiP+/i47sxOuPyc0yAtpkNYwLU+zXiwm9fToYGaH/7AnR3Vx8ZoHg6+viOCdAmhgTo/fT9lXPOzWKRXzUmQGu7IkA/2NHHlwD1XdzfzvwqvAKd4/4ctHbdp9abu+Ms4AgBWntCdvIAbXJud+QaThWgsZFy77MfoLfcc2PvUv9O0q2l2hcAnhpv7vw8eF8pSgFaXf65A3TPXrVRw5kCNDZS3n1nCFAv9Qp/xshzO4vf9T5+282VOY3K23XWCdZee6xCrDlnzpUfpnvqy1m38faX2bVXbdSgOr4ZCuqLjdQ0te//67yBxgL0/gJA+Mty+Zpu7s5puHaQND6AUrNsulHayZ76ctZVDLEjhEv3Go7wR2JNdn2J5wbv+2vmdplBr4H+lvxXodtJ/J6noI0DdF/DMil7rEKsOfe+mgStri9n3cbbX+AIp7f9a9Ab3zy59aVenRq2gUMC9HbiPb8I+r3vOeS+p6CHCtDr2i5uvPsjzbkzrOp5ZHV9OeuqHd+fEaB645sps75kgI7awCEBek/Q11n8/jfSdzlWgOoyXn4vRxiWI9RgQzpAw9vdC2jUYnBPi0/AtyourK5NYwanufHyeznCsByhBhsI0LGsBmiP85EzHqUNxmljWIacGZ5x1/RBgI5lNEDnQWvaaPy2YS3GaX1YuuyJwhow+4gAPZDGCSTzzYbtpjtqetwOK3+YJuO0Oix99kRZDXDERmro6BGgu1qTxY0uek2IQeUP02icVobF+J/MM4qN1MjRI0D3NWf6hG5M+cO0Gqf0sBh/0eaUYiM1cPQI0J3tDdhP/Q7bcx2kzcYpOSzG3zY8p9hIjRu9QQHqXFBZdN4+ejI5IXlLIU//cWJPYGFIgP7zlxCg9Ths8xCgGG5IgH7JeQJU4dQqcdgaOMkbW6Lb26kDVG3PG5hyMT3LHhGg9yegO64f0tLuzZ3/CjSoJrvP2G2NSgqNLdHr7cwBqrbnDUy5mK5ljwjQnVdQamnv5k47YeQkEjc1VSspM7ZE58mn9Eq36J4YTW3PG5hyMX3LHhSgitcP8ezcXKUDKDIFDnEorxtb4jM25XWzV4cHyBC1PW9gysV0LnvQKfxpArRZU4X9Lk9CjnEyuWpsia8T997nmPpnsWp73sCUi+lc9qA3kQ7yEqjVAL0GR62B2awQoOE4dehHNz8J0EJnCNDbOfyeayg3ZDVAA8epJEkjQEf1pogALXOGAPUvSa+JAB2HAO2CAC1zigC9J6jD7OdA/Z2heTY36ox1BwK0C/0A1Z112737S5whQE/zTaR4gO5rc0cluhm+hQDtQj1AtZ85bPW+WOIMAXqabyI5u839E7ev0epKpPundnbxButkvSlS29B5uk1/vkd27xWx0vtyic7jNSJAT/RNJHfvyHznvkZrK1lM6MMZ+ydG9w/aQGob+ujP+8s9tPuphPfNzCX6jteIAP37j6Pk5/7NdZ5HN2x1XyVaFWyKjdVZelOktqHLjseP9PZRF1mi63gNCtBjvAffYnMPE6BXt46DhsbYA/1D8lNxQxcdGwnQruM16BT+PAEaa0nzjXjNCvB5dGddXYD2NOhNpLNcTCTeEgHa2Mc8l7SHAE3316jF8K7bU9BzfBMp0RIB2lbXF62wCwGa7q9Ri5H7DnNBOwLUgCk4SdDjIUDT/TVqMbjnFp8+s58D9VqS+WarRo1V0MdZt+scdPfOdu+j6yNA65uSxY3R9Cvo4rTPrM9Bd9Zt9z64PgJ0R1var9TpV9ADAXpsurNuu/ex9Y15DfQwmm6ufnrpV9ABAXpwurNuu/eh9RGgOBgCFHYQoDgYAvT4TnnqU2VYgN5fCP3l39cv3Y8zscuPjwA9vHO++F5lUIA+30e6B6iofqaePX58zmHJ7jqkaQ+RoIMC9PU+/C1AL6KaoOxwAzg+j42/cI5RX+WUX//788ctQO8JqvcpJna4CZwhHhqvsTiGBOi33C/H9AzQ+0m83rWZPn5/20B+HhkB6hgSoJfHN+FfAXp7Oqr3RtLH729gLwLUMSJAX9cDfQXo7SnoOb6JBHwkAtQx6Kuc9/eN3gH6TYACdhGgDgIUQAkC1KFwCn/hNVDALj7G5Bj0JtL9OecrQFV/o5MdDuzFB3Vnoz7G9Ns7QO+fCdX7JD07HNiND+pOxnwT6fHp+UeAfolo/roHexzYj/x8G/pVzifFLyIRoAAaGnQxkfuJ+wHykwAF0NCwy9m9IlQ1Pj87QDntAlrjgsqfghf+geYI0A/BR0+A9voGqOpnPmM+Njz48DPQAQH6Gfj6HdABAfoZCFCgAwL0MxCgQAf2AvQyvZtc0fTHZgcBCnRgLEC/xFfa+sdmBwEKdGAqQH/+kKXCD+Z/bHYQoEAHlgL08WUmJzEf37AvuzLJx2YHH2MCOugeoDGV3+f8DuKy+Np4nxsefJAeaM9SgF7CFUuf4n5weMzDr10JcBqGAvT1yyC+r7Jz+E9OD/ITaM3Qa6Cv36bzrf5CXSy8m5UD4OMRoABQyVCAcgoP4FgMBShvIgE4FksBGv8YU/isdAUBCqAdSwEa/SB92Tv6BCiAdiwFaOxTUfefmi8pTjYXAYBMpgLUvRTTExcTAaDH3m8icTk7AAcxMkC/K1OvIQIUQDv9A/Tnj+db568fhi+8+kdjBCiAdnoH6D02nwE6nXoXvu/TFAEKoJ3OATp/8uh+MeT76ful9BKeTRGgANrpHKDf0yn7l/NMVO8kngAF0E7nAL28T9hvT0Vft/7+o+zLQ00RoADa6f450Nf5+nzrFqp678QToADa6RqgkR+Be1N6HZQABdAOAQoAlbqfwj9f7/znr/mqH5Gr0g1DgAJop2+ATrn5PX8Fqfn340sQoADa6fwu/NfzU6C3c/n3x+eLL+HZFAEKoJ3OATpfgO71rPNbqn/VuAUCFEA7vb/K+U7QZ2g+31VS/DI8AQqgnf4XE/m6Z+brpP0Rp1xMBMA5jL0eqOq3kO4IUADt2Lug8i4EKIB2hgToRfFtIx8BCqCdEQH6z1+aV7DzEKAA2hkRoKofnfcRoADa4RkoAFQa8hrot+5nlxwEKIB2xrwL//PHQZ6DEqAA2hn0GqiPqzEBOAMCFAAqEaAAUIlvIgFAJQIUACoRoABQaViA3l8I/eXf1y/djzMRoADaGRSgz/eR7gHK9UABnMWYAH29D38L0AtXpAdwFkMC9P5Dcr/+9+eP+w/LXfhNJAAnMeq78L/fv8/5+GXOL36VE8A5jLmg8uOb8K8AVb02EwEKoJ1Bl7O7P+d8BejtKSjfRAJwBoO+ynl/3+gdoN8EKIBTIEABoJLCKfyF10ABnMKgN5HuzzlfAar6C0kEKIB2Rn2M6bd3gN4/E6r3SXoCFEA7Y76J9Pj0/CNAv0Q0f92DAAXQztCvcmpfTpkABdDSoIuJ3E/cD5CfBCiAhoZdzu4VoarxSYACaIkLKgNAJQIUACoRoABQaegV6fXfRyJAAbQzJkC/Gv4u/GVqpuILTQQogHaGBOjPH60CdJHExRlKgAJoZ9B34ductS+DuDyLCVAA7Qy6GlOT/Hx8lNRp6fHCatn3QglQAO0Muh5ok19B+g7isvjKJAQogHYsBeglfCJbem08AhRAOwMvqLxXtJmvsnN4AhRAO0PeRCpMuYTXL4P4Cn8fhAAF0M6QAG1zDk+AAjiWMR+k//mjwXNQTuEBHEvfAPW/wbn3g/S8iQTgUCwFaPxjTEUvDhCgANqxFKDRD9KXtUWAAmjH1OXsInn8/Kn5bAQogHZMBah7KaYnLiYCQI+xAL1yOTsAhzHmq5zeifZF8YfhCVAA7agEKFekB3AG4wP07z8IUACn0DlAl1eQf+IUHsAZdA7Q+AdBm1zcrgoBCqCd3qfw35r5GQvvUX0DOD+FN5HGIUAB9GQoQFt8L5QABdCOoQ/SE6AAjkUjQP/+n3UfY4r8qjEBCkDPoAD9rs88x/056L53oAhQAO2MCdDLjieNrluC7ns5lQAF0M6QAF2eexf9lPuypV2fwidAAbQzJEAvjysnfT0un3TZk5+PrzbtWZ0ABdDOoN+Fvz9tfP2A5teur8LfTuL3PAUlQAG0M+ZzoI+3fn7+eLx+eYtTvaegBCiAdgYF6J/z/16n8joIUADtDAzQ9++6//zB1ZgAnMHAAL1enk89f/7geqAAzmDQm0iPp55fz/d/vrmgMoBTGPIxptcb79/PT8F/Nbmg8lfVR+oJUADtjPog/T3tnp+C/676Oc0AAQpA27ivcv4+f6Nzz8eY3ghQANrGBOg/fz1S83VBuiafYiJAAWgbdTm772fcvZ6KNkCAAtBm6ILKPgIUgDYCFAAqDQvQ++uft8j7avUtJAIUgLZBAfp8++geoG3egydAAegbE6Cvt99vkXdp9CmmSgQogHaGBOj9U0y//vd5ObvLjl/02I8ABdDOkAD9fnx26XU90K9WH2SqQYACaGfQT3rc3zqaL6jM5ewAnMHAqzG9AnTnb3rsQ4ACaGfg9UDfAcrl7ACcAwEKAJUUTuEvvAYK4BQGvYl0f875CtDb81F+VA7AGYz6GNNv7wB9XdlOCQEKoJ1hF1T+138eAfolongGT4ACaGjoVzmfFL+IRIACaGjQxUTuJ+4HyE8CFEBDwy5n94pQ1fgkQAG0ZPaCynUIUADtDPogvd4Hl3wEKIB2BgWo7lVAZwQogHaGnML//HGE1z/vCFAA7Yx6DfSVodqn8gQogHYGvon0/f5ZD0UEKIB2xr4L/8xQvokE4BSGf4zp/nlQLmcH4AzGBuhF+8tIBCiAdsYF6Bdf5QRwLmMC9PsQj/ngAgAADuNJREFUX4S/EqAAWhr3QXrVN4/eCFAA7YwKUO2nni8EKIB2Bj4D1f0E6BMBCqCdQW8ivd5+V/9CPAEKoJ1x78K/Xwn9vXWHJQhQAO1ofBOJX+UEcAp8EwkAKo0N0Nc1mfTO4glQAO0MvxqT7qdBCVAA7YwK0K9m30R6v6Ff9VIqAQqgnSEB2u5DTF/iK81QAhRAO+M+SN/ghc/XS6iuwqe0BCiAdgYFaJMXPh8/Le8k5iOYy1omQAG0Y+l34b+DuLxHatHrAgQogHYsBeglPGEv/cl5AhRAO4YC9PZ0M3wd9avsHJ4ABdCOoQC9PdsMT9e/y95GIkABtEOAAkAlQwHKKTyAYzEUoLyJBOBYLAVo/GNMRR/QJ0ABtGMpQKMfpC/7KhIBCqAdSwE6XdTeUfhDSwQogHZMBah7KSYuJgJAm7EAvXI5OwCHYS9AdyFAAbRDgAJAJQIUACoZC9DHJZWdD4P+8xdf5QSgxVaAXpZvHxGgAPSYCtD5Dfj3k9D1AA0+NSoEKIB2LAXo/fz9fj2mrzlBCVAAeiwF6OX9vaP7dzqfCcopPAA9hgL0FpbTa5+XV4ISoAD0GApQ74LKrwQlQAHosRqg9wT9jQAFoMlsgD4vBUqAAtBjKEDd10Cvr9+EJ0AB6DEUoPePL7m/Kvfzh/zyfxKgANRYCtD750DdH/D4fnywkwAFoMRSgD4+Qe+exn8ToAAUmQrQR4K6z0Hvz0kJUABKbAVo+NvwXwQoAC3GAnQvAhRAOwQoAFQyG6Bfpb9o/ECAAmiHAAWASgQoAFQiQAGgEgEKAJUIUACoRIACQCUCFAAqEaAAUMlsgNYhQAG0Q4ACQCUCFAAqEaAAUIkABYBKBCgAVCJAAaASAQoAlQhQAKhEgAJAJQIUACoRoABQiQAFgEoEKABUIkABoBIBCgCVCFAAqESAAkAlAhQAKhGgAFCJAAWASgQoAFQiQAGgEgEKAJUIUACoRIACQCUCFAAqEaAAUIkABYBKBCgAVCJAAaASAQoAlQhQAKhEgAJAJXsBepG338pXJkABtGMsQL/EV5qhBCiAdkwF6M8fsvSv/xS1QIACaMdSgP7zl5+Yf/9x+/ev/y1pggAF0I6lAP0O4vIeqX+WNEGAAmjHUoBewhP225PQopdBCVAA7RgK0NvTzd+DO7/KzuEJUADtGArQ27PN8HT9u+xtJAIUQDsEKABUMhSgnMIDOBZDAcqbSACOxVKAxj/GFD4rXUGAAmjHUoBGP0hf9lUkAhRAO5YC9JmYvl/+XdQCAQqgHVMB6l6KiYuJANBmLECvXM4OwGHYC9BdCFAA7RCgAFDJaoA+3pEv+wjTHQEKoB1jAfr9ugLod931lAlQAA2ZCtDn00755d/zpem5Ij0ANaYC9P0G/K//9ys5L+tXpA8+NSoEKIB2LAXo7XnnPTYfPyz3ys3v1SvSE6AAerIUoF+vE/bL/AWk20k9FxMBoMRQgE6Xs7s9E51O3LmcHQA1hgJ0uqCy+7STCyoDUGMxQG/n8AQoAH22AvT1wfmLewpPgAJQYihAb2fu4eudF95EAqDFUIDeP7+0/MzSN1ekB6DGUoDer6f82/IOrkgPQIulAH1+A35+Enrx/5mDAAXQjqkAvSeo8xseX8X5SYACaMhWgN6edTqn7F/l16QnQAG0Yy1AdyJAAbRDgAJAJbMB+lX6i8YPBCiAdghQAKhEgAJAJQIUwEmMv2w6AQrgHBR+eIIABXAKU3AOTFACFMAZOLFJgG4iQAE43IN72IFOgAI4AwK0PwIUOCkCtD8CFDgpArQ/AhQ4KQK0PwIUOCkCtD8CFDgpPsbUHwEKnBUfpO+OAAVOi69y9kaAAufFxUQ6I0ABtEOAAkAlAhQAKhGgAFCJAAWASgQoAFQiQAGgEgEKAJUIUACoRIACQCUCFAAqEaAAUIkABYBKBCgAVCJAAaDSxwUoALTTPKNaN9iS9mADOJfmGdW6wfE40U9gYFIYmQQGppRoF7AfOz2BgUlhZBIYmFKiXcB+7PQEBiaFkUlgYEqJdgH7sdMTGJgURiaBgSkl2gXsx05PYGBSGJkEBqaUaBewHzs9gYFJYWQSGJhSol3Afuz0BAYmhZFJYGBKiXYB+7HTExiYFEYmgYEpJdoF7MdOT2BgUhiZBAamlGgXsB87PYGBSWFkEhiYUqJdwH7s9AQGJoWRSWBgSol2Afux0xMYmBRGJoGBKSXaBezHTk9gYFIYmQQGppRoF7AfOz2BgUlhZBIYmFKiXcB+7PQEBiaFkUlgYEqJdgH7sdMTGJgURiaBgSkl2gXsx05PYGBSGJkEBqaUaBewHzs9gYFJYWQSGJhSol3Afuz0BAYmhZFJYGBKiXYBAGCVaBcAAFaJdgEAYJVoFwAAVol2AQBglWgXAABWiXYBAGCVaBcAAFaJdgEAYJVoFwAAVol2AQBglWgXAABWiXYBAGCVaBcAAFaJdgEAYJVoFwAAVol2AQBglWgXAABWiXYBAGCVaBcAAFaJdgEAYJVoFwAAVol2AQBglWgXAABWiXYBAGCVaBcAAFaJdgEAYJVoF7DT33/Ize/aZRzB33/86z/uv5YD85lD9fPHfat/+fd8DyPz8ByY9WH4yIEpI9oF7HORJ/cA+VD//CVOgIYD85FD9UyAu2lsGJm722x5+fN9FwNTQ7QL2OXrPQvc7PhQF3cQwoH5yKGa83PabEbmbs7PKUEZmCqiXcAe97OQX/97vX7f9vFv2sXoehwR0zwPB+Yzh+rrfQJ6eW82I/PwHpj7X5j7xjMwtUS7gD0uzr7+7NOM5wta7mnqYmA+cqju8TA/v3qMDiNzd/tz+9rU2xClhuETB6acaBeww23nO2cbH/xS9+NM9X/Mr4GGA/OZQ/U9P3W6P0P/k5F5uUXie2C+nn9jGJhKol3ADs40uN18nol8pPsJ2Z/Om0jhwHzmULkH/uURFIzM0nNcGJhaol3ADs7x4b8F/Wm+7lPdGYJwYBiqy+M0lJFZeD/PZGAqiXYBO3zNn8GYX9T5WH6ALgbm44fqFhT3J1GMjO/r/S48A1NJtAvY4eJ9iO3Td7EToOHAfPxQvZ5PMTKOi/MpJgamkmgXsMPF+8zvp+9iL0CXA/PpQ/V+FY+RcfgBysBUEe0CdvB38fz38jMlA/Q+MB8+VLf8fI4NIzN7f5g+8pflswemiGgXsAN/I108A036nj4iy8gs3D8Bd3+znYGpJNoF7MAudhGgKXN+MjKB1yfpGZhKol3ADrxP6OJd+ISLzJ9iZGQCz7fXGJhKol3ADt98Us3hDEE4MJ87VPdX+uZvcjMygecAMDCVRLuAHfiuhItvIsXcLxLwu/dPRsb3DEoGppJoF7AD39Z1/cN34UPL62AwMg/hd1wZmEqiXcAeXC/G4Z5ncWmdp+laQxNG5m76WNd8k4GpI9oF7MEVCx1ugHJxx4fXJZhcjMzdfWCek2W6YioDU0e0C9iFa2bPvFf6ubz43bd43l/6ZmRe1499eoUjA1NFtAvYh19tmfCbSEvu71Z43/r++JFxf+xkenLJwNQQ7QJ24ncD3xafNeEnFr1fRJoDlJF5ej4Jdd9dZ2AqiHYBAGCVaBcAAFaJdgEAYJVoFwAAVol2AQBglWgXAABWiXYBAGCVaBcAAFaJdgEAYJVoFwAAVol2AQBglWgXAABWiXYBAGCVaBcAAFaJdgEAYJVoFwAAVol2AQBglWgXAABWiXYBAGCVaBcAAFaJdgEAYJVoFwAAVol2AQBglWgXAABWiXYBAGCVaBcAAFaJdgEAYJVoFwAAVol2AQBglWgXAABWiXYBAGCVaBcAAFaJdgEAYJVoFwAAVol2AQBglWgXAABWiXYBAGCVaBcAAFaJdgEAYJVoFwAAVol2AQBglWgXAABWiXYBAGCVaBcAAFaJdgEAYJVoFwAAVol2AQBglWgXAABWiXYBAGCVaBcAAFaJdgEAYJVoFwAAVol2AQBglWgXAABWiXYBwLq//5Bf/9tgGaA90S4AWEeA4rhEuwBgHQGK4xLtAoB1BCiOS7QLANYRoDgu0S4AWEeA4rhEuwBg3Tsc//lL/vWf65fczGF5e1DkNzdAv+8LyJ+P2z9/3Fd5rvvLv8eXjtMT7QKAdW6A/j8/RJyAfMbpLVD/v3eA/nwv8AzO2+O/u/8H2hLtAoB1ToDOns8nv97//F9fy0z5+UrQ1zNPzvDRiWgXAKzzA/T+1POem78/H3k89P0+rb8v8ojW78eJ/fPG7f8XTuDRh2gXAKxzA/R5Yn5P0N+e/5tP3F9J+nqmeVvpmZn37Px+pSnQmmgXAKxzA/T1QuYtMG93Oe8MvaLUeab5/Vr4tvb/+OsdvEBjol0AsM4J0Hc8PgP09sA7GKc7ppc6b/c8n3V+Oe85AY2JdgHAurUAfefl8+bjM02z54P3J66cwKMT0S4AWJcM0Od/nWWc9+CdT4teCFB0I9oFAOvyn4E6iTr75hQe/Yh2AcC6otdAg3eLbmv9L/8HnwJFJ6JdALAuGaD3VzfnbyT5d0xuj/z5zfeQ0IloFwCsSwbo/DnQ9yfqv6ZPir4/xvRYkm/CoxfRLgBYlw7Qe27eA/Px5tH7judz0MvrK0nPT4ZOn2kC2hLtAoB16QB9XXnp5n//Y3HH68ue7+8gXXgfCV2IdgHAupUAfQdm5GpMj/z85/0dJK4mgj5EuwBg3VqAPq8w4l0P9HmFpulbSL9fF7eAhkS7AACwSrQLAACrRLsAALBKtAsAAKtEuwAAsEq0CwAAq0S7AACwSrQLAACrRLsAALBKtAsAAKtEuwAAsEq0CwAAq0S7AACwSrQLAACrRLsAALBKtAsAAKtEuwAAsEq0CwAAq0S7AACwSrQLAACrRLsAALBKtAsAAKtEuwAAsEq0CwAAq0S7AACwSrQLAACrRLsAALBKtAsAAKtEuwAAsEq0CwAAq0S7AACwSrQLAACrRLsAALBKtAsAAKtEuwAAsEq0CwAAq0S7AACwSrQLAACrRLsAALBKtAsAAKtEuwAAsEq0CwAAq0S7AACwSrQLAACrRLsAALBKtAsAAKtEuwAAsEq0CwAAq0S7AACw6v8HthPTPYi3seoAAAAASUVORK5CYII=)
# Looks like Min.TemperatureF = 99 may be a missing value code.
# Let's make an indicator called "freezing" for days that never got above 32:
weather$freezing <- ifelse(weather$Max.TemperatureF <= 32, 1, 0)
head(weather$freezing)
## [1] 0 0 0 0 0 0
sum(weather$freezing) # number of days that never got above freezing
## [1] 6
which(weather$freezing==1) # which rows?
## [1] 22 23 24 25 48 342
weather[weather$freezing==1,"EST", drop=FALSE] # which days?
## EST
## 22 1/22/2013
## 23 1/23/2013
## 24 1/24/2013
## 25 1/25/2013
## 48 2/17/2013
## 342 12/8/2013
# More on ifelse()
# ifelse() is a vectorized version of an if-then-else construction. The first
# argument is a TRUE/FALSE comparison. The second argument is the value to
# output in the case of TRUE. The third argument is the value to output in the
# case of FALSE. It's a nice function for creating a vector of binary values and
# it's not limited to outputting numbers. The second and third arguments can be
# text. The output of ifelse() will always be the same length as the conditional
# argument.
# In this case, we could have just used weather$Max.TemperatureF <= 32 by itself
# to create a logical vector. Recall TRUE and FALSE are treated as 1 and 0. If
# we wanted actual 0/1 values, we could have wrapped weather$Max.TemperatureF <=
# 32 in as.numeric(). In fact, that's actually much faster. Let's demonstrate:
# vector with 10,000,000 random normal values
bv <- rnorm(1e7)
print(object.size(bv), units = "Mb")
## 76.3 Mb
# create a vector of 0/1 based on bv > 0:
system.time(out1 <- ifelse(bv > 0, 1, 0))
## user system elapsed
## 1.62 0.20 1.82
system.time(out2 <- as.numeric(bv > 0))
## user system elapsed
## 0.07 0.00 0.08
# The first one is easier to "read" but the second is faster.
rm(bv, out1, out2)
# We can also use the within() function to derive new variables and add to a
# data frame. The syntax is within(data, expr), where data is a data frame and
# expr is some expression. This allows us to not have to reference the data
# frame each time we refer to a value.
# Let's add a variable for humidity range:
weather <- within(weather, humidity.range <- Max.Humidity - Min.Humidity)
# within() allows you to change multiple variables and even use variables you
# just created. Just make sure you use curly brackets: {}. Here we create a mean
# temperature variable in celsius units and then standardize the new variable.
# Standardizing a variable means converting to number of standard deviations
# from mean.
weather <- within(weather, {
Mean.TemperatureC <- (Mean.TemperatureF - 32)/1.8
Mean.TemperatureCZ <- (Mean.TemperatureC - mean(Mean.TemperatureC))/sd(Mean.TemperatureC)
})
# NOTE: While using within() results in cleaner code, it means we can't take
# advantage of RStudio's Tab completion since we're not explicitly referencing
# the data frame.
# Rename columns in a data frame ------------------------------------------
# First we can see existing names with the names() function
names(weather)
## [1] "EST" "Max.TemperatureF"
## [3] "Mean.TemperatureF" "Min.TemperatureF"
## [5] "Max.Dew.PointF" "MeanDew.PointF"
## [7] "Min.DewpointF" "Max.Humidity"
## [9] "Mean.Humidity" "Min.Humidity"
## [11] "Max.Sea.Level.PressureIn" "Mean.Sea.Level.PressureIn"
## [13] "Min.Sea.Level.PressureIn" "Max.VisibilityMiles"
## [15] "Mean.VisibilityMiles" "Min.VisibilityMiles"
## [17] "Max.Wind.SpeedMPH" "Mean.Wind.SpeedMPH"
## [19] "Max.Gust.SpeedMPH" "PrecipitationIn"
## [21] "CloudCover" "Events"
## [23] "Temp.Range" "freezing"
## [25] "humidity.range" "Mean.TemperatureCZ"
## [27] "Mean.TemperatureC"
# But not only can we see names with the names() function, we can change names
# with the names() function.
names(weather)[21] <- "Cloud.Cover.Index"
# This is just a nightmare....
names(electionData)
## [1] "State" "Total" "Total.1"
## [4] "Elec Vote" NA "NA.1"
## [7] "Pop Vote" "NA.2" "NA.3"
## [10] "Margin of Victory" "NA.4" "Obama"
## [13] "NA.5" "Romney" "NA.6"
## [16] "0" "NA.7" "Johnson"
## [19] "NA.8" "Stein" "NA.9"
## [22] "Goode" "NA.10" "Harris"
## [25] "NA.11" "Alexander" "NA.12"
## [28] "Lindsay" "NA.13" "Write-ins"
## [31] "NA.14" "Anderson" "NA.15"
## [34] "Hoefling" "NA.16" "Barr"
## [37] "NA.17" "None" "NA.18"
## [40] "Carlson" "NA.19" "Morstad"
## [43] "NA.20" "Miller" "NA.21"
## [46] "Fellure" "NA.22" "Stevens"
## [49] "NA.23" "White" "NA.24"
## [52] "Barnett" "NA.25" "Terry"
## [55] "NA.26" "Reed" "NA.27"
## [58] "Litzel" "NA.28" "Tittle"
## [61] "NA.29" "Duncan" "NA.30"
## [64] "Boss" "NA.31" "Washer"
## [67] "NA.32" "Baldwin" "NA.33"
## [70] "Christensen" "NA.34" "NA.35"
## [73] "" ".1" "J"
## [76] "S" "H" "G"
## [79] ".2" ".3" ".4"
## [82] ".5"
# Let's clean up the electionData names. First we notice that the column names
# were split across two rows in the original Excel file. Look at electionData in
# the viewer. Or enter View(electionData) in the console.
# extract names into vector
top <- names(electionData)
top
## [1] "State" "Total" "Total.1"
## [4] "Elec Vote" NA "NA.1"
## [7] "Pop Vote" "NA.2" "NA.3"
## [10] "Margin of Victory" "NA.4" "Obama"
## [13] "NA.5" "Romney" "NA.6"
## [16] "0" "NA.7" "Johnson"
## [19] "NA.8" "Stein" "NA.9"
## [22] "Goode" "NA.10" "Harris"
## [25] "NA.11" "Alexander" "NA.12"
## [28] "Lindsay" "NA.13" "Write-ins"
## [31] "NA.14" "Anderson" "NA.15"
## [34] "Hoefling" "NA.16" "Barr"
## [37] "NA.17" "None" "NA.18"
## [40] "Carlson" "NA.19" "Morstad"
## [43] "NA.20" "Miller" "NA.21"
## [46] "Fellure" "NA.22" "Stevens"
## [49] "NA.23" "White" "NA.24"
## [52] "Barnett" "NA.25" "Terry"
## [55] "NA.26" "Reed" "NA.27"
## [58] "Litzel" "NA.28" "Tittle"
## [61] "NA.29" "Duncan" "NA.30"
## [64] "Boss" "NA.31" "Washer"
## [67] "NA.32" "Baldwin" "NA.33"
## [70] "Christensen" "NA.34" "NA.35"
## [73] "" ".1" "J"
## [76] "S" "H" "G"
## [79] ".2" ".3" ".4"
## [82] ".5"
bot <- electionData[1,] # extract first row
# look at bot; it's a data frame with "names"
bot # ugly!
## State Total Total.1 Elec Vote NA NA.1 Pop Vote NA.2 NA.3
## 1 <NA> Elec Vote Popular Vote D R O D R I
## Margin of Victory NA.4 Obama NA.5 Romney NA.6
## 1 Votes % Total Vote Democratic NA Republican NA
## 0 NA.7 Johnson NA.8 Stein NA.9 Goode NA.10
## 1 Independent NA Libertarian NA Green NA Constitution NA
## Harris NA.11 Alexander NA.12 Lindsay NA.13
## 1 Socialist Workers NA Socialist NA Socialism and Liberation NA
## Write-ins NA.14 Anderson NA.15 Hoefling NA.16 Barr NA.17
## 1 - NA Justice NA American Ind. NA Peace & Freedom NA
## None NA.18 Carlson NA.19 Morstad NA.20
## 1 - NA Grassroots NA Const. Government NA
## Miller NA.21 Fellure NA.22 Stevens NA.23
## 1 American Third Position NA Prohibition NA Objectivist NA
## White NA.24 Barnett NA.25 Terry NA.26 Reed
## 1 Socialist Equality NA Reform NA Independent NA Independent
## NA.27 Litzel NA.28 Tittle NA.29 Duncan NA.30
## 1 NA Independent NA We the People NA Independent NA
## Boss NA.31 Washer NA.32 Baldwin NA.33 Christensen NA.34 NA.35
## 1 NSA Did 911 NA Reform NA Reform NA Constitution NA State
## .1 J S H G .2 .3 .4 .5
## 1 <NA> EV NA NA NA NA State Code Blanks EV Meth
class(bot)
## [1] "tbl_df" "tbl" "data.frame"
# "tbl_df" "tbl"? That's because we used the readxl package to import the data,
# which made the data frames of class "tbl_df", a class provided for the dplyr
# package. We'll cover that package in much greater detail soon.
names(bot) # has the same column names
## [1] "State" "Total" "Total.1"
## [4] "Elec Vote" NA "NA.1"
## [7] "Pop Vote" "NA.2" "NA.3"
## [10] "Margin of Victory" "NA.4" "Obama"
## [13] "NA.5" "Romney" "NA.6"
## [16] "0" "NA.7" "Johnson"
## [19] "NA.8" "Stein" "NA.9"
## [22] "Goode" "NA.10" "Harris"
## [25] "NA.11" "Alexander" "NA.12"
## [28] "Lindsay" "NA.13" "Write-ins"
## [31] "NA.14" "Anderson" "NA.15"
## [34] "Hoefling" "NA.16" "Barr"
## [37] "NA.17" "None" "NA.18"
## [40] "Carlson" "NA.19" "Morstad"
## [43] "NA.20" "Miller" "NA.21"
## [46] "Fellure" "NA.22" "Stevens"
## [49] "NA.23" "White" "NA.24"
## [52] "Barnett" "NA.25" "Terry"
## [55] "NA.26" "Reed" "NA.27"
## [58] "Litzel" "NA.28" "Tittle"
## [61] "NA.29" "Duncan" "NA.30"
## [64] "Boss" "NA.31" "Washer"
## [67] "NA.32" "Baldwin" "NA.33"
## [70] "Christensen" "NA.34" "NA.35"
## [73] "" ".1" "J"
## [76] "S" "H" "G"
## [79] ".2" ".3" ".4"
## [82] ".5"
# remove names from bot
names(bot) <- NULL
names(bot)
## NULL
# Now convert bot to a character vector; first we convert to matrix which
# converts all data to character, and then we convert to a vector.
bot <- as.vector(as.matrix(bot))
bot
## [1] NA "Elec Vote"
## [3] "Popular Vote" "D"
## [5] "R" "O"
## [7] "D" "R"
## [9] "I" "Votes"
## [11] "% Total Vote" "Democratic"
## [13] NA "Republican"
## [15] NA "Independent"
## [17] NA "Libertarian"
## [19] NA "Green"
## [21] NA "Constitution"
## [23] NA "Socialist Workers"
## [25] NA "Socialist"
## [27] NA "Socialism and Liberation"
## [29] NA "-"
## [31] NA "Justice"
## [33] NA "American Ind."
## [35] NA "Peace & Freedom"
## [37] NA "-"
## [39] NA "Grassroots"
## [41] NA "Const. Government"
## [43] NA "American Third Position"
## [45] NA "Prohibition"
## [47] NA "Objectivist"
## [49] NA "Socialist Equality"
## [51] NA "Reform"
## [53] NA "Independent"
## [55] NA "Independent"
## [57] NA "Independent"
## [59] NA "We the People"
## [61] NA "Independent"
## [63] NA "NSA Did 911"
## [65] NA "Reform"
## [67] NA "Reform"
## [69] NA "Constitution"
## [71] NA "State"
## [73] NA "EV"
## [75] NA NA
## [77] NA NA
## [79] "State Code" "Blanks"
## [81] "EV" "Meth"
# now we can paste the two vectors together using the paste() function.
paste(top,bot)
## [1] "State NA" "Total Elec Vote"
## [3] "Total.1 Popular Vote" "Elec Vote D"
## [5] "NA R" "NA.1 O"
## [7] "Pop Vote D" "NA.2 R"
## [9] "NA.3 I" "Margin of Victory Votes"
## [11] "NA.4 % Total Vote" "Obama Democratic"
## [13] "NA.5 NA" "Romney Republican"
## [15] "NA.6 NA" "0 Independent"
## [17] "NA.7 NA" "Johnson Libertarian"
## [19] "NA.8 NA" "Stein Green"
## [21] "NA.9 NA" "Goode Constitution"
## [23] "NA.10 NA" "Harris Socialist Workers"
## [25] "NA.11 NA" "Alexander Socialist"
## [27] "NA.12 NA" "Lindsay Socialism and Liberation"
## [29] "NA.13 NA" "Write-ins -"
## [31] "NA.14 NA" "Anderson Justice"
## [33] "NA.15 NA" "Hoefling American Ind."
## [35] "NA.16 NA" "Barr Peace & Freedom"
## [37] "NA.17 NA" "None -"
## [39] "NA.18 NA" "Carlson Grassroots"
## [41] "NA.19 NA" "Morstad Const. Government"
## [43] "NA.20 NA" "Miller American Third Position"
## [45] "NA.21 NA" "Fellure Prohibition"
## [47] "NA.22 NA" "Stevens Objectivist"
## [49] "NA.23 NA" "White Socialist Equality"
## [51] "NA.24 NA" "Barnett Reform"
## [53] "NA.25 NA" "Terry Independent"
## [55] "NA.26 NA" "Reed Independent"
## [57] "NA.27 NA" "Litzel Independent"
## [59] "NA.28 NA" "Tittle We the People"
## [61] "NA.29 NA" "Duncan Independent"
## [63] "NA.30 NA" "Boss NSA Did 911"
## [65] "NA.31 NA" "Washer Reform"
## [67] "NA.32 NA" "Baldwin Reform"
## [69] "NA.33 NA" "Christensen Constitution"
## [71] "NA.34 NA" "NA.35 State"
## [73] " NA" ".1 EV"
## [75] "J NA" "S NA"
## [77] "H NA" "G NA"
## [79] ".2 State Code" ".3 Blanks"
## [81] ".4 EV" ".5 Meth"
# use the pasted together vector to replace the names of the electionData
names(electionData) <- paste(top, bot)
electionData <- electionData[-1,] # drop the first row
# a little better now, but still needs work. We'll get to it....
# Removing duplicates -----------------------------------------------------
# Base R has two main functions for dealing with duplicate data: unique() and
# duplicated().
# duplicated(x) gives the indices of duplicated elements. unique(x) returns a
# vector, data frame or array like x but with duplicate elements/rows removed.
# Another function, anyDuplicated(), is a shortcut for any(duplicated(x)), that
# returns the index i of the first duplicated entry if there is one, and
# 0 otherwise
# any duplicate dates in weather?
any(duplicated(weather$EST))
## [1] FALSE
anyDuplicated(weather$EST) # more efficient
## [1] 0
# Any duplicate max Temps? Of course, but let's check
anyDuplicated(weather$Max.TemperatureF)
## [1] 10
# The temp at position 10 is the first duplicate
weather$Max.TemperatureF[1:10]
## [1] 48 41 40 45 49 50 47 52 60 60
# To see the unique max Temps:
unique(weather$Max.TemperatureF)
## [1] 48 41 40 45 49 50 47 52 60 43 57 64 62 44 32 31 27 23 36 38 53 74 69
## [24] 59 33 34 37 39 46 51 30 42 55 67 58 54 63 65 72 81 86 91 80 73 83 79
## [47] 75 77 71 66 88 85 68 84 87 78 89 76 82 90 92 93 70 61 56
# uniqe and duplicated work on data frames as well.
dim(arrests)
## [1] 11616 17
dim(unique(arrests)) # same size, no duplicates.
## [1] 11616 17
any(duplicated(weather))
## [1] FALSE
dupeDat <- data.frame(x=c(1,2,1,3),y=c(1,2,1,2))
dupeDat # duplicate rows 1 and 3
## x y
## 1 1 1
## 2 2 2
## 3 1 1
## 4 3 2
dim(dupeDat)
## [1] 4 2
dim(unique(dupeDat))
## [1] 3 2
(dedupeData <- unique(dupeDat))
## x y
## 1 1 1
## 2 2 2
## 4 3 2
# Reorder columns in a data frame -----------------------------------------
# We rarely need to reorder the columns in a data frame. R doesn't care about
# the order, nor do any of the statistical functions you'll use on the data.
# However, it is possible. One way is to simply specify the column numbers in
# the order you want. For example, move temp.range next to the temperature
# columns:
names(weather)
## [1] "EST" "Max.TemperatureF"
## [3] "Mean.TemperatureF" "Min.TemperatureF"
## [5] "Max.Dew.PointF" "MeanDew.PointF"
## [7] "Min.DewpointF" "Max.Humidity"
## [9] "Mean.Humidity" "Min.Humidity"
## [11] "Max.Sea.Level.PressureIn" "Mean.Sea.Level.PressureIn"
## [13] "Min.Sea.Level.PressureIn" "Max.VisibilityMiles"
## [15] "Mean.VisibilityMiles" "Min.VisibilityMiles"
## [17] "Max.Wind.SpeedMPH" "Mean.Wind.SpeedMPH"
## [19] "Max.Gust.SpeedMPH" "PrecipitationIn"
## [21] "Cloud.Cover.Index" "Events"
## [23] "Temp.Range" "freezing"
## [25] "humidity.range" "Mean.TemperatureCZ"
## [27] "Mean.TemperatureC"
weather <- weather[,c(1:4,24,5:23,25:27)]
# Document a data frame ---------------------------------------------------
# The comment() function allows you to add comments to a data frame. This is
# good for documentation purposes.
comment(arrests) <- "Analysis of Arrests in Paris, June 1848"
# To see comments on a data frame:
comment(arrests)
## [1] "Analysis of Arrests in Paris, June 1848"
# We can set our own object attributes using the attr() function. The syntax is
# attr(x, which), where x is the object and which is the name of your attribute.
# For example, I could create an attribute called URL that contains the link to
# where I downloaded these data:
attr(arrests, "URL") <- "http://www.icpsr.umich.edu/icpsrweb/ICPSR/studies/00049"
# Notice the comment and URL attributes are printed at the bottom of str()
# output as an "attr", or Object Attribute.
str(arrests)
## 'data.frame': 11616 obs. of 17 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Source : int 9 9 9 9 9 9 9 9 9 9 ...
## $ Constant : int 55 55 55 55 55 55 55 55 55 55 ...
## $ Occup : int 172 92 43 70 24 155 20 23 24 999 ...
## $ DeptBorn : int 31 15 2 72 75 76 75 62 62 0 ...
## $ CommuneBorn : int 99 99 99 99 63 99 63 99 9 99 ...
## $ Sex : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Age : int 28 30 27 36 26 40 48 27 34 99 ...
## $ MaritalStatus : int 2 1 9 1 9 9 9 1 9 9 ...
## $ Children : int 99 1 99 1 99 99 99 99 99 99 ...
## $ Arrondissement: int 21 6 4 8 8 27 7 9 23 32 ...
## $ QuarterResided: int 99 21 15 32 30 99 25 33 99 99 ...
## $ FirstJudicial : int 3 3 1 3 1 1 1 0 3 3 ...
## $ FinalJudicial : int 1 1 0 0 0 0 0 0 0 0 ...
## $ FirstDecision : int 3 3 1 3 1 1 1 0 3 3 ...
## $ FinalOutcome : int 2 2 1 3 1 1 1 0 3 3 ...
## $ CommuneName : Factor w/ 5285 levels " ",..: 265 3466 2666 2537 3570 3485 3570 869 180 1 ...
## - attr(*, "comment")= chr "Analysis of Arrests in Paris, June 1848"
## - attr(*, "URL")= chr "http://www.icpsr.umich.edu/icpsrweb/ICPSR/studies/00049"
# To view all object attributes, use attributes(). Probably not wise to use on a
# large data frame since one of the attributes is "row.names", which usually
# contains row numbers. Instead you can wrap a call to attributes in str().
str(attributes(arrests))
## List of 5
## $ names : chr [1:17] "ID" "Source" "Constant" "Occup" ...
## $ class : chr "data.frame"
## $ row.names: int [1:11616] 1 2 3 4 5 6 7 8 9 10 ...
## $ comment : chr "Analysis of Arrests in Paris, June 1848"
## $ URL : chr "http://www.icpsr.umich.edu/icpsrweb/ICPSR/studies/00049"
# To view specific attributes, use the attr() function:
attr(arrests, "URL")
## [1] "http://www.icpsr.umich.edu/icpsrweb/ICPSR/studies/00049"
# save data for next set of lecture notes
save(list=c("electionData", "weather", "arrests", "allStocks"), file="../data/datasets_L03.Rda")