load("../data/datasets_L07.Rda")
# The data.table package is very similar to dplyr in its mission: provide fast
# aggregation of data using short, flexible syntax. I suppose you could say
# data.table and dplyr compete with one another, though I think any competition
# between them is ultimately friendly. The developers of each always seem very
# complimentary of the other. Which is "better"? That's up to you!
# I will admit I'm partial to dplyr only because I started using it before
# data.table. I've been teaching myself data.table and it's not coming to me as
# easily as dplyr did. I believe that says less about data.table and more about
# me! Perhaps data.table will come faster to you. I'll try my best to give it a
# fair and thorough treatment below.
# install.packages("data.table")
library(data.table)
# Like dplyr, data.table is intended to work on data frames. You may recall that
# dplyr gave us the option to wrap a data frame with a "data frame tbl" using
# the tbl_df() function, but it wasn't required. You can still use dplyr
# functions on a data frame without it being wrapped by tbl_df(). data.table on
# the other hand requires you to convert the data frame to a "data table" in
# order to use data.table functions.
# To create a data table, use the data.table() function:
class(allStocks)
## [1] "data.frame"
allStocksDT <- data.table(allStocks)
class(allStocksDT)
## [1] "data.table" "data.frame"
# so we see that allStocks is now a data table and a data frame.
is.data.frame(allStocksDT)
## [1] TRUE
is.data.table(allStocksDT)
## [1] TRUE
# Since it's also a data frame, it works with packages and functions that work
# with data frames. For example, aggregate() is a function for data frames:
aggregate(Volume ~ Stock, data=allStocksDT, mean)
## Stock Volume
## 1 bbby 2055076.4
## 2 flws 200264.3
## 3 foxa 12481889.4
## 4 ftd 224691.0
## 5 tfm 742273.8
## 6 twx 4817040.0
## 7 viab 2777436.5
# And since it's a data frame we can use the same base R functions. For example:
names(allStocksDT)
## [1] "Date" "Open" "High" "Low" "Close" "Volume" "Stock"
ncol(allStocksDT)
## [1] 7
dim(allStocksDT)
## [1] 1621 7
str(allStocksDT)
## Classes 'data.table' and 'data.frame': 1621 obs. of 7 variables:
## $ Date : Date, format: "2014-03-26" "2014-03-25" ...
## $ Open : num 67.8 67.6 67.7 68.4 67.6 ...
## $ High : num 68 67.9 68 68.4 68.1 ...
## $ Low : num 67.2 67.3 67 67.3 67.5 ...
## $ Close : num 67.2 67.7 67.3 67.5 67.8 ...
## $ Volume: int 1785164 1571625 1742341 3639114 1328860 2116779 1841733 3135071 2519323 2172587 ...
## $ Stock : Factor w/ 7 levels "bbby","flws",..: 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, ".internal.selfref")=<externalptr>
levels(allStocksDT$Stock)
## [1] "bbby" "flws" "foxa" "ftd" "tfm" "twx" "viab"
# Like data.frame(), we can alsu use data.table() to "manually" create data
# tables:
DT <- data.table(x=1:5, y=pi/1:5, z=letters[1:5])
DT
## x y z
## 1: 1 3.1415927 a
## 2: 2 1.5707963 b
## 3: 3 1.0471976 c
## 4: 4 0.7853982 d
## 5: 5 0.6283185 e
# Like dplyr's tbl_df() function, data.table has the effect of supressing the
# printing of entire data frames to the console.
allStocksDT
## Date Open High Low Close Volume Stock
## 1: 2014-03-26 67.76 68.05 67.18 67.25 1785164 bbby
## 2: 2014-03-25 67.61 67.93 67.34 67.73 1571625 bbby
## 3: 2014-03-24 67.73 68.00 66.99 67.26 1742341 bbby
## 4: 2014-03-21 68.41 68.41 67.29 67.55 3639114 bbby
## 5: 2014-03-20 67.58 68.12 67.52 67.82 1328860 bbby
## ---
## 1617: 2013-04-15 66.04 66.23 63.99 64.02 3078540 viab
## 1618: 2013-04-12 66.30 66.63 65.62 66.50 2029401 viab
## 1619: 2013-04-11 65.69 66.76 65.69 66.15 2784995 viab
## 1620: 2013-04-10 63.76 65.77 63.76 65.70 2259979 viab
## 1621: 2013-04-09 65.78 66.09 64.57 64.60 4605824 viab
# It prints the first 5 and last 5 records and places a colon after the row
# number.
# How about indexing? Does that work the same? Not quite.
# We can still extract, say, row 2:
allStocksDT[1,]
## Date Open High Low Close Volume Stock
## 1: 2014-03-26 67.76 68.05 67.18 67.25 1785164 bbby
# But trying to extract 3rd column of row 2 produces this:
allStocksDT[2,3]
## [1] 3
# What's going on here? It turns out that data table uses indexing brackets much
# differently than data frames. The basic arguments within brackets are not row
# and column numbers but rather "i", "j" and "by". If you're familiar with SQL
# it may be useful to think of i as WHERE, j as SELECT and by as GROUP BY. Say
# you have a data table called DT. In words, DT[i, j, by] translates to "Take
# DT, subset rows using i, then calculate j grouped by by".
# How to select rows:
allStocksDT[1:5,]
## Date Open High Low Close Volume Stock
## 1: 2014-03-26 67.76 68.05 67.18 67.25 1785164 bbby
## 2: 2014-03-25 67.61 67.93 67.34 67.73 1571625 bbby
## 3: 2014-03-24 67.73 68.00 66.99 67.26 1742341 bbby
## 4: 2014-03-21 68.41 68.41 67.29 67.55 3639114 bbby
## 5: 2014-03-20 67.58 68.12 67.52 67.82 1328860 bbby
# Actually don't need the comma to just select rows (unlike data frames)
allStocksDT[1:5]
## Date Open High Low Close Volume Stock
## 1: 2014-03-26 67.76 68.05 67.18 67.25 1785164 bbby
## 2: 2014-03-25 67.61 67.93 67.34 67.73 1571625 bbby
## 3: 2014-03-24 67.73 68.00 66.99 67.26 1742341 bbby
## 4: 2014-03-21 68.41 68.41 67.29 67.55 3639114 bbby
## 5: 2014-03-20 67.58 68.12 67.52 67.82 1328860 bbby
# Can use conditional selection:
allStocksDT[Close < 50]
## Date Open High Low Close Volume Stock
## 1: 2014-03-26 5.63 5.71 5.43 5.52 158853 flws
## 2: 2014-03-25 5.43 5.67 5.34 5.57 255168 flws
## 3: 2014-03-24 5.68 5.74 5.40 5.40 219552 flws
## 4: 2014-03-21 5.69 5.80 5.67 5.69 160431 flws
## 5: 2014-03-20 5.84 5.86 5.66 5.68 213300 flws
## ---
## 775: 2013-04-04 40.31 40.90 39.78 40.62 574740 tfm
## 776: 2013-04-03 40.98 41.25 39.90 40.03 595457 tfm
## 777: 2013-04-02 41.54 41.81 40.96 41.00 406618 tfm
## 778: 2013-04-01 42.68 43.48 41.23 41.38 391473 tfm
## 779: 2013-03-28 42.65 42.96 42.57 42.77 835846 tfm
allStocksDT[Close < 50 & Stock=="tfm"]
## Date Open High Low Close Volume Stock
## 1: 2014-03-26 34.19 34.50 33.31 33.46 611916 tfm
## 2: 2014-03-25 34.19 34.54 33.66 34.04 498802 tfm
## 3: 2014-03-24 34.51 34.82 33.76 34.08 807884 tfm
## 4: 2014-03-21 34.18 34.64 34.09 34.33 801948 tfm
## 5: 2014-03-20 34.04 34.37 33.68 34.18 620820 tfm
## ---
## 158: 2013-04-04 40.31 40.90 39.78 40.62 574740 tfm
## 159: 2013-04-03 40.98 41.25 39.90 40.03 595457 tfm
## 160: 2013-04-02 41.54 41.81 40.96 41.00 406618 tfm
## 161: 2013-04-01 42.68 43.48 41.23 41.38 391473 tfm
## 162: 2013-03-28 42.65 42.96 42.57 42.77 835846 tfm
# Notice we didn't have to preface Close or Stock with allStocksDT$. Nice
# benefit of data.table.
# How to select columns:
allStocksDT[1:5,.(Open, High) ]
## Open High
## 1: 67.76 68.05
## 2: 67.61 67.93
## 3: 67.73 68.00
## 4: 68.41 68.41
## 5: 67.58 68.12
# Here we would still need the comma if we wanted to just select columns. (I
# included 1:5 in the i argument to limit console output.)
# What is the .() that wraps the column names? It's an alias to list().
allStocksDT[1:5,list(Open, High)] # same as previous
## Open High
## 1: 67.76 68.05
## 2: 67.61 67.93
## 3: 67.73 68.00
## 4: 68.41 68.41
## 5: 67.58 68.12
# What if we don't use .() or list()? You get a vector:
allStocksDT[1:5,c(Open, High)]
## [1] 67.76 67.61 67.73 68.41 67.58 68.05 67.93 68.00 68.41 68.12
# When you use .() in j, the result is always a data.table.
# Going back to this: allStocksDT[2,3]. Can we make that work the way it works
# for a data frame? Yes, by setting the with argument to FALSE:
allStocksDT[2, 3, with=FALSE]
## High
## 1: 67.93
# But data.table brackets don't stop with selection. You can also compute on
# columns. For example, find the mean and std deviation of the Open price:
allStocksDT[,.(meanOpen = mean(Open), sdOpen = sd(Open))]
## meanOpen sdOpen
## 1: 48.28676 25.07568
# You can also combine column selection with computation:
allStocksDT[,.(Open, meanOpen = mean(Open))]
## Open meanOpen
## 1: 67.76 48.28676
## 2: 67.61 48.28676
## 3: 67.73 48.28676
## 4: 68.41 48.28676
## 5: 67.58 48.28676
## ---
## 1617: 66.04 48.28676
## 1618: 66.30 48.28676
## 1619: 65.69 48.28676
## 1620: 63.76 48.28676
## 1621: 65.78 48.28676
# Notice the mean was "recycled" to fill the data table
# You can pretty much throw anything into j. The following graphs bbby volume
# over time:
allStocksDT[Stock == "bbby",plot(Date, Volume, type="l", main="bbby Volume")]
![](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABUAAAAPACAMAAADDuCPrAAAAe1BMVEUAAAAAADoAAGYAOjoAOpAAZmYAZpAAZrY6AAA6OgA6ZmY6kLY6kNtmAABmZgBmZjpmZmZmtrZmtv+QOgCQZgCQkGaQkNuQtpCQ29uQ2/+2ZgC2tma225C2/7a2///bkDrbtmbb25Db/9vb////tmb/25D//7b//9v///9GHMTHAAAACXBIWXMAAB2HAAAdhwGP5fFlAAAgAElEQVR4nO3di5riSJdeYVVP2ZW2q+1xtk+N7cZ2kd3c/xUanUOKkFBsfUIRxHqfZ+avJmEjUrBSgBDVHQBgUp29AACQKwIKAEYEFACMCCgAGBFQADAioABgREABwIiAAoARAQUAIwIKAEYEFACMCCgAGBFQADAioABgREABwIiAAoARAQUAIwIKAEYEFACMCCgAGBFQADAioABgREABwIiAAoARAQUAIwIKAEYEFACMCCgAGBFQADAioABgREABwIiAAoARAQUAIwIKAEYEFACMCCgAGBFQADAioABgREABwIiAAoARAQUAIwIKAEYEFACMCCgAGBFQADAioABgREABwIiAAoARAQUAIwIKAEYEFACMCCgAGBFQADAioABgREABwIiAAoARAQUAIwIKAEYEFACMCCgAGBFQADAioABgREABwIiAAoARAQUAIwIKrWv18GPnyWv++aO+zG9/OSf9/Xt90vdfSxdpfv7tz6irATYgoNA6PKDtZapP55Rbc8rPxUsQUByEgELr+IB+fcwvdKnWA0lAcRACCq3jA+o9h2+fwa9MIaA4CAGF1vEB9Z7Dt5ukn8sXIKA4CAGF1gsCOn8Of/XeVZohoDgIAYXWCwI6ew7f/ufaEAKKgxBQaPVJvE2fZ0ec3L6k2b+n3v7Xp38dw2nzZ/BtUL0XSduAXp0dntozNhe8dGdwJ7dzJ+/ttwuz8n4/CkNAodUmsSvNuN0Xc/LFiVybsdnz82na3Cb2P5tcyeaAXvtLPs7gLep9PCl+oxlvioBCq4nQvxlT0/Un5uTJJmXoSX5bvkkGJzUddKduDOi/Gy/4/f+OHe7r7aR5ba99lISAQmtasKE/MSe7L2o6kXPcnAY3Xes3E2+zcW1BtwV0kTNkxDYo7gQUatcxhO4rhlEnO2+rh57BT18mdZPozGhT2lZzc0B/jIvUXGk7pD17e5bPyb9QOgIKravTHOft8qiTnTeObuGtvfFl0skzePcjSc7u9VsD+nMc0p1nTLn7Tr5p3wG8IwIKravTMOepdtzJl6FQl/DG3nhW9xn89P378TwbA9pt6LrvxN+G092SN/PWdjxFKQgotCbviY+bknEnD91qTgq8YTOm0t0avFWVt3/oz/vmgHYnu/vlj3m+OG1uLsh+pSCgUJs8vR0bFnfykLYmYKHdLvviTd5kmu7QNG7HbgxoIMNDQLudS128CAoCCrHp64PDdlvcycPp16VNvb5yk2fTs9cmh/80BLQ7y1pA2Z0eBBRimoB2b7432Qructk/3Z+8yURA8WIEFFqTp9Gz5+qbT+7b1pQvHKoueZM3mSKfwjtv9m8MKM3EFAGF1uTQSLN3izaf3L/pfasW36xpn8P/78mbTKE3kT7vkoBO3kQCOgQUWpMDfYz9iTu531PoPy49g+/r96+TrG3YjcltrHMEk+cBnW3dAjUCCq02iW2inM+sx518dz5cubTVN5zB2UR9viO9E83uZc2NAXUPYHJjWxQtAgqt61i1wGc2N558Hw/dsbi75XBsD2ezsDtt5aOc7Tmc69sa0C7YP+7TD3iibAQUWt7hQZzPRG4++T6+7b3YqeF9cXdjcD7POcJom0H/3fStAZ0cjKliN1DUCCi0moT969gZ9/ORm08efrT2XPnqXeQ+L6h/ODv3eE3f/lNMQGdHeuI5PO4EFGpNwT6H2Ez2cd98cm18sr2g2yT8ETx1ctnJV3rcxh9fowI6OaAd25+oEVBo9VGaHCYk9uTG5MD0Ae6bQIHTw1/pMQwevsBje0CHhPIxeHQIKFLFnpdIHgFFovgqTaSPgCJR7LmO9BFQpOnKe91IHwFFeoZ9kTjqO9JGQJGeYU8jNkCRNgKK9NzoJ/JAQJGedmd43kBC8ggoABgRUAAwIqAAYERAAcCIgAKAEQEFACMCCgBGBBQAjAgoABgRUAAwIqAAYERAAcCIgAKAEQEFACMCCgBGBBQAjAgoABgRUAAwIqAAYERAAcCIgAKAEQEFACMCCgBGBBQAjAgoABgRUAAwIqAAYERAAcCIgAKAEQEFACMCCgBGBBQAjAgoABgRUAAwIqAAYERAAcCIgAKAEQEFACMCCgBGBBQAjAgoABgRUAAwIqAAYERAAcCIgAKAEQEFACMCCgBGBBQAjAgoABgRUAAwIqAAYERAAcCIgAKAEQEFACMCCgBGaQe0AgAZfaLkE4XO/m0DeC/yRqkHKh3wBwNAsQgoABgRUAAwIqAAYERAAcCIgAKAEQEFACMCCgBGBBQAjAgoABgRUAAwIqAAYERAAcCIgAKAEQEFACMCCgBGBBQAjAgoABgRUAAwIqAAYERAAcCIgAKwK/whRUAB2B3wxb45IaAA7AioeqJ6oFLZKxuQI6DqieqBSmWvbECOgKonqgcqlb2yATkCqp6oHqhU9soG5AioeqJ6oFLZKxuQI6DqieqBSmWvbECOgKonqgcqlb2yATkCqp6oHqhU9soG5AioeqJ6oFLZKxuQI6DqieqBSmWvbECOgKonqgcqlb2yATkCqp6oHqhU9soG5AioeqJ6oFLZKxuQI6DqieqBSmWvbECOgKonqgcqlb2yATkCqp6oHqhU9soG5AioeqJ6oFLZKxuQI6DqieqBSmWvbECOgKonqgcqlb2yATkCqp6oHqhU9soG5AioeqJ6oFLZKxuQI6DqieqBSmWvbECOgKonqgcqlb2yATkCqp6oHtj7+/fq+6+dM8pe2YAcAVVPVA/sPQJafftz34yyVzYgR0DVE9UDe3VAq+rnrhllr2xAjoCqJ6oH9uqn8P+lqn77a8eMslc2IEdA1RPVA3vNa6DXx/r6YZ9R9soG5AioeqJ6YK99E+nrY8/z+LJXNiBHQNUT1QN7/bvwl8q+FVr2ygbkCKh6onpgb9iNqdkIrapPw4yyVzYgR0DVE9UDe85+oLeqFb1XU9krG5AjoOqJ6oG9yY703VZoFfmufNkrG5AjoOqJ6oG9+SeRLgQUOBsBVU9UD+wFPsr5zx8EFDgTAVVPVA/s8Vl4IDkEVD1RPbBHQIHkEFD1RPXAHgEFkkNA1RPVA5XKXtmAHAFVT1QPVCp7ZQNyBFQ9UT1QqeyVDcgRUPVE9cCZS78HveXz8GWvbECOgKonqge6rtVUbEPLXtmAHAFVT1QPHI2f3xxEHl657JUNyBFQ9UT1wEH9uSO3mM13fMTt2VT2ygbkCKh6onrg4Oblsk5q1FHtyl7ZgBwBVU9UDxxc/Cfsj43QqJdBy17ZgBwBVU9UD+w9Njf9r/K4xj2HL3tlA3IEVD1RPbD32Nr0n67fOBoTcCICqp6oHtgjoEByCKh6onpgj6fwQHIIqHqieuCAN5GA1BBQ9UT1wEF4N6ao74gve2UDcgRUPVE9cBDckT7uo0hlr2xAjoCqJ6oHjppiTkV+sXHZKxuQI6DqieqBrsusnxxMBDgVAVVPVA+c4XB2QDoIqHqieqBS2SsbkCOg6onqgUplr2xAjoCqJ6oHKpW9sgE5AqqeqB4YcI19971X9soG5AioeqJ6YAABBdJAQNUT1QMDCCiQBgKqnqgeGEBAgTQQUPVE9cAAAgqkgYCqJ6oHBhBQIA0EVD1RPdDVHE/EEXUopjsBBcQIqHqieqCLgAJJIaDqieqBLgIKJIWAqieqBwbwGiiQBgKqnqgeGEBAgTQQUPVE9cAAAgqkgYCqJ6oHBhBQIA0EVD1RPTCAgAJpIKDqieqBAQQUSAMBVU9UD1Qqe2UDcgRUPVE9UKnslQ3IEVD1RPVApbJXNiBHQNUT1QMD6i+I//bn/fr9V+QFy17ZgBwBVU9UD/TU+WwDWlWfcRcte2UDcgRUPVE9cK7tZx3Q+ivi1wpaBRy9dEBRCn9M5RfQ+ngi3399fdQ7Mj0K+ttfK4tCQIFjFf6Yyi+gt+YYTG1A6yfxUQdkKntlA3IEVD1RPXDmUtVvHXUBfWyORr2RVPbKBuQIqHqieuDUI5n1NmcX0Mcm6NpzeE/ZKxuQI6DqieqBU3//3rxv1Af0RkCBExFQ9UT1wCkCCiSEgKonqgdOzZ7CX3gNFDgRAVVPVA+cuTTbnF1AH9ujP2IuXPbKBuQIqHqieuDMraqb2Qa03ic06rNIZa9sQI6AqieqB841e883Ab1WVdwzeAIKaBFQ9UT1wLnuo5ytqLeQCCggRkDVE9UDPc6Xw0f2k4ACWgRUPVE9MKBLaGw+CSggRkDVE9UDlcpe2YAcAVVPVA9UKntlA3IEVD1RPTCkfSMp+oD0BBTQIqDqieqBvUc0+2Je+jeRovaivxNQQIyAqieqB/bGgA79jN4ILXtlA3IEVD1RPbA3BPRWdYdRrp/Ix22Dlr2yATkCqp6oHtjrA/r432H/pWvVHlNkq7JXNiBHQNUT1QN7fUBvztd4/PMHBxMBTkRA1RPVA3t9QCdbnVcOZweciICqJ6oH9vqAXtyAckBl4EwEVD1RPbA3BtRpJgEFzkRA1RPVA3vBp/AEFDgTAVVPVA/sOW8ijQdR5is9gDMRUPVE9cBeH1D3nXf3Hfktyl7ZgBwBVU9UD+x1B1L+OVazPqpd3CHtyl7ZgBwBVU9UD+y5R6Jvstl8ojPqK5EIKKBFQNUT1QMdQ0ObgF6j+0lAAS0Cqp6oHuj5+ujfjo89GBMBBbQIqHqieqBS2SsbkCOg6onqgUplr2xAjoCqJ6oHKpW9sgE5AqqeqB6oVPbKBuQIqHqieuCarw8+ygmciICqJ6oHriGgwKkIqHqieuAaAgqcioCqJ6oHKpW9sgE5AqqeqB6oVPbKBuQIqHqieqBS2SsbkCOg6onqgUplr2xAjoCqJ6oHKpW9sgE5AqqeqB6oVPbKBuQIqHqieqBS2SsbkCOg6onqgUplr2xAjoCqJ6oHKpW9sgE5AqqeqB7Yc7/Sw8EnkYDzEFD1RPXAHgEFkkNA1RPVAwdfHwQUSAsBVU9UDxzV26BRXwPvKXtlA3IEVD1RPdDxKOi3P/cMKHtlA3IEVD1RPdDVfyGnVdkrG5AjoOqJ6oET1+ivgp8oe2UDcgRUPVE9cOLxJH7PJmjZKxuQI6DqieqBU/s2Qcte2YAcAVVPVA9UKntlA3IEVD1RPVCp7JUNyBFQ9UT1QKWyVzYgR0DVE9UDlcpe2YAcAVVPVA9UKntlA3IEVD1RPVCp7JUNyBFQ9UT1QKWyVzYgR0DVE9UDlcpe2YAcAVVPVA9UKntlA2oVAVVPVA9UKntlA2oEVD5RPVCp7JUNqBFQ+UT1QKWyVzagRkDlE9UDlcpe2YAaAZVPVA9UKntlA2oEVD5RPVCp7JUNqBFQ+UT1QKWyVzagRkDlE9UDlcpe2YAaAZVPVA9UKntlA2oEVD5RPVCp7JUNqBFQ+UT1QKWyVzagRkDlE9UDlcpe2YAaAZVPVA9UKntlA2oEVD5RPVCp7JUNqBFQ+UT1QKWyVzagRkDlE9UDlcpe2YAaAZVPVA9UKntlA2oEVD5RPVCp7JUNqBFQ+UT1QKWyVzagRkDlE9UDlcpe2YAaAZVPVA9UKntlA2oEVD5RPVCp7JUNqBFQ+UT1QKWyVzagRkDlE9UDlcpe2YAaAZVPVA9UKntlA2oEVD5RPVCp7JUNqBFQ+UT1QKWyVzagRkDlE9UDlcpe2YAaAZVPVA9UKntlA2oEVD5RPVCp7JUNqBFQ+UT1QKWyVzagRkDlE9UDlcpe2YAaAZVPVA9UKntlA2oEVD5RPVCp7JUNqBFQ+UT1QKWyVzagRkDlE9UDlcpe2YAaAZVPVA9UKntlA2oEVD5RPVCp7JUNqD0eUUU/qAgoADMCKp+oHqhU9LoG5AiofKJ6oFLR6xqQI6DyieqBSkWva0COgMonqgf2/v69+v5r54yi1zUgR0DlE9UDe4+AVt/+3Dej6HUNyBFQ+UT1wF4d0Kr6uWtG0esakCOg8onqgb36Kfx/qarf/toxo+h1DcgRUPlE9cBe8xro9bER+sM+o+h1DcgRUPlE9cBe+ybS18ee5/FFr2tAjoDKJ6oH9vp34S+VfSu06HUNyBFQ+UT1wN6wG1OzEVpVn4YZRa9rQI6AyieqB/ac/UBvVSt6r6ai1zUgR0DlE9UDe5Md6but0CryXfmi1zUgR0DlE9UDe/NPIl0IKHAyAiqfqB7YC3yU858/CChwIgIqn6ge2OOz8EBqCKh8onpgj4ACqSGg8onqgT0CCqSGgMonqgcqFb2uATkCKp+oHqhU9LoG5AiofKJ6oFLR6xqQI6DyieqBM5d+D3rL5+GLXteAHAGVT1QPdF2rqdiGFr2uATkCKp+oHjgaP785iDy8ctHrGpAjoPKJ6oGD+nNHbjGb7/iI27Op6HUNyBFQ+UT1wMHNy2Wd1Kij2hW9rgE5AiqfqB44uPhP2B8boVEvgxa9rgE5AiqfqB7Ye2xu+l/lcY17Dl/0ugbkCKh8onpg77G16T9dv3E0JuA8BFQ+UT2wR0CB1BBQ+UT1wB5P4YHUEFD5RPXAAW8iAYkhoPKJ6oGD8G5MUd8RX/S6BuQIqHyieuAguCN93EeRil7XgBwBlU9UDxw1xZyK/GLjotc1IEdA5RPVA12XWT85mAhwJgIqn6geOMPh7IBkEFD5RPVApaLXNSBHQOUT1QOVil7XgBwBlU9UD1Qqel0DcgRUPjF8cv0O+rc/79fd30xcu8a++94rel0DcgRUPjF0YrsDUh3QyAN4hhFQIAkEVD4xcFq3A+ejevV76PsLSkCBJBBQ+UT/pPojRN9/fX3U1btEf49RAAEFkkBA5RP9k27NJ9bbgNZP4qM+vh5CQIEkEFD5RP+kS3MQkC6gj83R3W8kEVAgCQRUPtE7pTuQZxfQR/3sz+Gb44k4Yrdli17XgBwBlU/0TukOJd8HNPIg8hMEFEgJAZVP9E4hoMCbIqDyid4ps6fwF14DBd4EAZVP9E9qv4ujC2jst3CEEFAgCQRUPtE/6dYceq4NaP0kfPee9AQUSAIBlU8MnNbsPd8E9Fp5X2xkQECBJBBQ+cTAaZPv4tj/QSQCCqSBgMonhk503j0X9JOAAmkgoPKJ4ZO7hCryuUPR6xqQI6DyieqBSkWva0COgMonqgcqFb2uATkCKp+oHhhgPrx90esakCOg8omhE2+TD2DufCF0x+Hti17XgBwBlU/0T5p/gn1fQLcf3r4K2HPNAKYIqHyif9J1FrFdAY04vD0BBY5FQOUTvVPq5O3++Ptg1+Hti17XgBwBlU/0Tnk855Z8mXFr1+Hti17XgBwBlU/0TnkEdPe3IA32Hd6+6HUNyBFQ+UTvlK55GvuOzlz0ugbkCKh8on/SVfgSKAEF0kFA5RP9k7roSew7vH3R6xqQI6DyiYHTvj50T+J3Hd6+6HUNyBFQ+cTQiY+CqvYD3XV4e/XNLfq+AxDQ/D6JtOfw9vqAlnznAQiofKJ/kvSTSLsOb09AASUCKp/onaL9JNKuw9sTUECJgMoneqcovsh4ynx4ewIKKBFQ+UTvFOknkfYhoIASAZVP9E6RfhJpHwIKKBFQ+UT/pKv2YCI9w+sCBBRQIqDyif5Jj01Q0SeR5u/nxzaUgAJKBFQ+MXCa6IB2093x2Y0JOBsBlU/0TpnsuLljP9DmvXfnss1cdqQHzkNA5RO9U1QBvXm5TOCjnCXfeQACKp/onaIK6MW/4NkHEyGgKBsBlU9UD+wF94aKfIOfgAJKBFQ+UT2wFzyq6MkHVCagKBsBlU9UD+wRUCA1BFQ+UT2wx1N4IDUEVD7RO4U3kYA3RUDlE71Tjt2NKepj9gQUUCKg8oneKYfuSB83i4ACSgRUPnH1p48I2j/U6ZW4qtrv5ty+cAQUECKg8olPfr7r0EyXWT85mAhwJgIqn/jk53sPzcTh7IBkEFD5xGdnkB4dNBIBBZQIqHziszNcd34r5x4EFFAioPKJz85AQIF3QUDlE5/8XHRwZRsCCigRUPnE9R/XuyLpvuT464PPwgPnIaDyid4p8903I/fdXENAgTMRUPlE75R5QIXfcUxAgTMRUPlE75RpQM97B+lOQAEtAiqfqB6oREABJQIqn6geqERAASUCKp+oHqhEQAElAiqfqB6oREABJQIqn+j8O3D8uXPfRyKggBIBlU90/k1AgbdGQOUTnX8TUOCtEVD5RPVAJQIKCNX3/6IfAxkFVLE5S0ABIQJKQPcgoCgaAX1VQLv87Xn58+uDgAIpIaCvCaiz9bgjofWUfUciIaCAEAF9SUAnz753HM3uMWffsfAIKCBEQF8R0H/+eISm/SrO2+NfOw5I/3gWv+tw9gQUECKgrwjozYlmHdMdX2t83XVpAgooEdBXBPTivvD5eBq+4ys9dn6jEgEFhAjoCwL62Oh03/rZ973w+zZBCSggREBfc0R6t3k3PsoJvAcCSkD3zSv5zoPiEdDcnsLvQ0ABIQKa2ZtIOxFQQIiAZrYb004EFBAioHntSL8XAQWECOjrP8p55hfDE1BAiIDmdDCR/QgoIERAMzqcnQABBYQIaD4HVFYgoIAQAT34S+XO22MpiIACQgT04IDuOvynHgEFhAjo4QF9SGczlIACQgT04NdAL7uPQi91QEBLvvegdAT08DeR+j2YktgMJaCAEAF90Uc5E9kMJaCAEAF90W5MlzQ2QwkoIERAX7YfaPet7uduhhJQQIiAvnJH+uvpm6EEFBAioK/9JFJzXKb3OiJ9yfcelI6Avv6jnBcCCrwHAvrigF7YAgXeBgF9ZUBvvAYKvBMC+rKA9jvUn/d9HncCCkgR0NfuB3rit3k0CCggREBf+Emkczc+GwQUECKgL/ss/Nkbnw0CCggR0Nccjan6qb4aGwIKCBHQVxwP9OQvQnIQUECIgB4f0EQ2PhsEFBAioAcHNJ2NzwYBBYQI6Os/ynkmAgoIEVACum9e0fcelI6AEtB984q+96B0BJSA7ptX9L0HpSOgBHTfvKLvPSgdASWg++YVfe9B6QgoAd03r+h7D0pHQAnovnlF33tQOgJKQPfNK/reg9IRUAK6b17R9x6UjoAS0H3zir73oHQElIDum1f0vQelI6AEdN+8ou89KB0BJaD75hV970HpCCgB3Tev6HsPSkdACei+eUXfe1A6AkpA980r+t6D0hFQArpvXNH3HpSOgBLQfeOKvvegdAQ0r4AOX7J0rZ88f/sz9vIEFBAioFkF9NJ9RfKt+7b56C9MJqCAEAHNKaCXbqtz6Gf0RigBBYQIaEYB/fqoqh/39tvmm03Pf/6I3QYloIAQAc0ooI8N0M/6f69tR2uPbdGfMSMIKCBEQPMJ6GN780f3v+Nm56X6/itiBgEFhAhoPgF9PHP/7P53jOY17jk8AQWECGiWAf0xnHgjoMBpCGg+AX08df/Z/S8BBVJAQPMJ6P3ShfMyeQrPa6DAWQhoRgG9dXt9fn20z+Xv41bpVgQUECKgGQW03u2z2QYd3zm6RO5JT0ABIQKaUUCbPembp+xdN+tPJEVtgBJQQImA5hTQtqATP55fyEVAASECmlVA24/DOz6fX2KCgAJCBDSzgLafhG9Fvf/eIqCAEAHNLqC7EFBAiIAS0H3jir73oHTN3b/kx0B+Ab1Y30CqEVBAiIDmFdDrvjfhCSigREBzCqi/F1Psd3oQUECIgGYU0PqTSG4xm/fj496KJ6CAUHv3L/hBkFNAb14u66RG7QpKQAEhAppRQC/+E/bJsUE30Ae05DsPikdA8wlo8MhLpx/OruQ7D4pHQPMJaH9E+onTD6hc8p0HxSOgBHQHAoqydff+ch8EGQWUp/BAYghoPgHlTSQgMQQ0o4CGd2M6+Ss9Cr7vAAQ0o4AGd6SP+ygSAQWECGhGAXUOBTqI+0okAgoo9ff+Yh8FWQXUOyB9AgcTKfeuAyQS0BOvPbOA3pM7nB0BRcFSCehpV59fQHchoIAQASWgO8cRUJSLgBLQneMIKMo13PtPfRgQ0EjX2HffewQUECKgBHQHAoqyEVACugMBRdnmAT3n0UBAIxFQIAV+QM94OBDQSAQUSMF47++PDEpA905UDwwgoEAKCGheAW2OJ+KIOhTTnYACUgSUgO5AQFE2AkpAdyCgKBsBzSugHV4DBVLg3PvbBwMB3T1RPTCAgAIpIKAEdAcCirIRUAK6AwFF2QgoAd2BgKJsBJSA7kBAUTb33v/4NwEVTFQPVCKggBABJaA7xxFQlGsa0CahpywFAX0NAgoIEdAsA1p/Qfy3P+/X778iL0hAASECmmFA63y2Aa2qz7iLElBAaHLvPzOgZz0KMwxo2886oPVXxK8VtAqQLgoBRdkIaHYBrY8n8v3X10e9I9OjoL/9tbIoBBQ4EgHNLqC35hhMbUDrJ/FRB2QioIAQAc0uoJeqfuuoC+hjczTqjSQCCgjNAnrSw4GAbvdIZr3N2QX0sQm69hzeQ0ABIe/eT0B3T1QPnPr79+Z9oz6gNwIKnIaAEtAdCCjKRkBzC+jsKfyF10CB0xDQ3AL6SGa9zdkF9LE9+iPmwgQUECKg2QX0VtXNbANa7xMa9VmkkgKa7ILhfRDQ7ALa7j3fBPRaVXHP4AsLaMSSJXsrkDQCml9Au49ytqLeQiKgmjMDHQKaX0DdL4eP7CcBlZwZ6BDQDAM6JDQ2nwRUc2agQ0CzDKgZAVWcGegQUAK6c1yy5SGgOBwBzS6gl+EF0Kg9QFsEVHFmoJNEQM86CFR33eqJ6oGu6+zonrENJaCKMwMd/25zwh2JgG709eEdH5ndmBYRUByOgGYU0Oa9d6eYzQ6h7Ei/hIDicAQ0o4DevFzyUc4VsQFN9XYgYQQ0o4Be/CfsHExkGQHF4QhoPgHtDmQ3deVwdksIKA5HQPMJaHco5SkOqLyIgO7Br2OTbQE9+Je57WF4zEIQ0D0I6Nvi97HJ1oAe+svcGtAjFiKjgPIUPg4B3YPfxyYENJ+A8iZSHAK6B7+PTQhoRgEN78bkb5WuOCCgyRaUgO7B72OTfAJ60PVZpg8AACAASURBVD08p4AGd6SP+ygSAV0+c6I34yT8PjYhoBkFdHos+lb73ZybEdDFMyd7O85BQDchoDkF1D0UUzIHE0k2PAR0DwK6ReCX1J40/QEBjZmoHjiT2uHskg3PvoCmeqtehYBusRLQanLa6QE9agshv4DuUlBA4/7iBgKa5s16ldJv/zYElIDuH5fmQ42A7lL67d9mLaCVcxIBjZioHqhEQBfPTUAnSr/922QT0G27ihqvWzxRPVCJgC6em4BOlH77t1l7E2n82dHvUBJQs6+PMz8L301L86FGQHcp/fZvk1RA166DgIYR0EUEdJfSb/82q7sxDT98WUCXr4SAhiUX0HQedtEBnS576QEp/fZvsxTQ6e4p5wf0uN1lMg9orEMC6k5N53FHQHcp/fZvsxpQ9/FBQLdPVA9UIqCL5yagE6Xf/m1SCujaHZ6AihDQxXMT0InSb/826wF1/ufcgE57Lr9u8UT1QCUCunTmOwF1cXSqTbIK6CELkWlA2+MyxX0nfI2ALp35TkBdBHSTJwEd/+MVAV2+EgJae0SzL+ZwQJFzj8b0xgEtPSCl3/6NCGiWAXUOahe5EXpMQJ2x6TzuCOgexd1+2819FtAhnqcGNLChI71u8UT1wN4Q0FvVfZFH/UQ+bhv0FQFN5IFHQPco7vbbbu9CQN3Hw5Fvgc+uZFdAjQuYYUAf/zvsPH+NPCQ9AV068/j/+xNSuSGnSGc9vshBAR3uSEcHdGFpNv28+6FqG3ynwwN6c75I7p8/zvxWTgL6ttJZjy/yxgENPEyD5yoloJOtzlO/F56Avq101uOLHBXQ/jdJQDdPVA/s9QG9uAG9JfBZeAL6ftJZjy8iDGhFQPd4QUCdZqYRUHcdpfLAi0ogAZ0p7vbLAnqf79FOQCMnqgf2gk/hCegSArpHcbdfF9D5Y+B1AV1aHP9fwbOVEtBbVX0OJ16SeA10sgoTeeAR0D2Ku/3CgM5O9e5bcgR0kz6g7jvv7jvyWxDQxTPfCaijuNuvDOi+c+5Yip0BPfY3sHmiemCv/fz7I5hDNR8pjXsGX1BAqyr+7k1AB8XdflM/ot+mPMZqQKd36rUhhQS00WSz+UTn59PLuaQ3N7RqknngWQI6/YuQxu04SXG3n4C+fUDvTkObgF6j+3lYQN03AtN44O0LaNyl31Ay6/FVMg7o+nN0Aur5+ujfjo89GBMBXTn3nYCOklmPr3J0QA/8da4GNPQgXZhSTEB3IKDL574T0FEy6/FVCCgB3SCtgB79uhABNSstoLbbG3WRw36fmwO6uggEdItXBHT7lRy50z0B3aO42/+uAZ2eQkD3IqDL574T0EFxt/8FAT3sF6oKqGkBCahomJOgNwloWQWZIqBbL2U+t+7XuyOgzgkEdIujAxoXHgKaKgK69VLmc+vu/CsBnZ0Q+Ll7WQL6FAFdPrf3v0UVZKrEgBpucOQlZg+Y0IX3NYyAHo2ALp/b+9+iCjJFQDdfynj24CGbbA8Je0ArAhopuYAe9tsioHsQ0M2XMp7dD2h3n9v1QoJfyLX/JKDRCOjyuf3/LSohEwR086WM508hoJNmEtAtjguo004Cmj0CuvlStvPPc9Vfv+UhQUBfh4Aun9v53+n/FIiAbr6U7fyz6xseN68M6HQZjKucgKqGEdA3QkA3X8p0/vnjhIA6E9UDlQjo8rmd/yWgBHTrpUwXWAmoH71n12IM6HQZ4h65i9cgkPTdLrWAxi1QzJmjHhHTuxEBJaDbL2W6wPaAVvMv+VxdCgJ6sNwDuv3ctoB2/5ptkJaHgG6+lOkCs8fJeK8LBPTp1RDQ11He3NDK2rAapqubgCaptIBOV3vkxQyXWA9oNX2MHBPQ2U0moJu8JKDr17Jj392kAvrWfSGgERczXCJUr+GXXrlP26M3SdYWkIDulVVAvZ9FBDT6tYTJv7YFNOk1vQ8BjbiY4RLBgDqPh7GhRwV0siDbrufZlYskfbfLLKDeNRDQ17A+nHJ1bkCd/3XvifsDGtgECS6It0TbEVDznN0BfXbfmCcqZq8nArqH9dGUrZcFdHLH9AM6HTg8ovQB9e7hBHQTQ1IWfuxvPL5PQL2/01uW7p0Q0IhLGS4TG9Bnj6ilJSKgapEFWvmpNaAVAU0fAY24lOEypwfUCykB3ebIgN6H04wB9Yvs3Z/iklh2QHcsHwGNuJThMtOOOQ+IpTs8AU1ExM19WsFtAV2poh/Q9S1OAhplxwIS0IhLGa5qY0DHE0LrcrqnU3CRlgIauIMT0E3OCOhiFVMM6OT+TUBLkXpAQyuTgJ4g5uauxsNf2caATlbmakCnaduy8ATUfNHx/xdhyzPlxUvFX9VCQNcfLP7DLbTUy//hnDC7v4cWZSsCajnvqwLq/XF9dUBXRrxXQP2/VQR066Wir8v0HGz50SEMaPxNIqCG87aRMQU04u/vPJgENE6oiVFnTv0GCp0UUP/eVnnnXfrheFH/9MUFDD0+J/9LQNdF3dzFM6cf0MgCFBHQ1cWdrk4CGnOp6OuyBTSwjgjoqx0c0MBa0Ad0S7nsAR3/DHg/mV8m6TXtP96eBnR+q1O/gUJZBNS7ZPiJ1tq9l4DuFHdzF87drwVxQEO7EYe2DQ8P6Orf8IXzJSgyoJO/ZO7/FGBWkdiLRV+b+5teXUmBF0Un110FnpaNNyb4JPE+v7tP/peArjMGNPQkQRXQyQr3ribwJ3dLud4moDuuY7ZNuTpp+rsmoHEXi762iIAuXNZd4nBAQ/0MbLES0CjGe0j4LYbFgK48KzkyoOEnOFssB3R5xqsCar6S/AJ6WrFPC+jmP9fzC7vrKlzd4CwCutPLA+r9ib0vrrONAQ38zR2vf7bggfMt36+8W7F8/tDiHuOVAd39YNrrtBdFXh1Q99+xQ2YPAAL6UvaAVvMTQ88PTg/o822o4B12dpd6y4Curvvp38RiAxpxk5fvhpHXHD2jqmZvcwa2ZJYfIvOfzB+y8T2PO/+GieqBSsZfTyigoTO/KqBLf14J6OSi0zCsTAqvEQK6epHI76NZHmO4zPo27Mra9paagEax/Xrmf/IWz3xgQMeULy6Dc13vFFDrtRgCep/8ds8I6DmPnrwC6t1DvRHLyxYKaOhBGbEsceffMFE9UCn65g6/3PmmYPC8poA6j1fvrOkGVPP4eerlAXVWUDEB3XDfDlxGFNAjRiz/IsMBnd1Tdl77Tu8e0NXH4aaAhts4f4ybAjp77AsCuvlP+TFeElDnJwR080VO217egIAeJP7m9g89SUDdR+b8BcvZY3z+IH5aRnfC8sKG7/Vr25zvEFA3i+FzTq5wf0BtFxX/TmNqaLjIuwS0IqDb2QM6fTAunXd7QL19jqYB9R7EGzYLx8XcFdBtP9v7YN94YfvVuL/Plc32+Q/Gfu4LqOGy65vJBy7EOwZ08Qfe9gEBjWAK6OTht96azQH139afB3T2D1lAQ5c9JaCbLr0voNOVtrJVEnX6pitXBfQVCxEf0G4TIOnHehAB3cdwc/tfuTmg/v2zWnphNLAaw1e7HNClOeMJJQd05fe2/fRNV64LqHUptgfOFtAdi3aeUECnd4zIG0VAn19k02ZMd9bZ1Uy3BxcCOp5rvHwOAd1153llQIffYXCfwbXXy6zMAfWulIBKEdB9bPdrfUCnUwIBnS+x/7gKXX3/P5EBXf+thH/6uoAar2ZbQFf6uTOglj/VgSt9QUDds21fKwTUcvYtE9UDlXYHdHXAs4A6G5heQN1zBu7RxoCGtlzTDOjytSgDuun3uOVnT6/89IBu33a3B9SwWCcLBHR2KgFdY35m1f9DFVDvGnQBXbs7pBzQ0LCjA2q55duuvICA5mjpbkZAt9l1c2MCGtxq3RjQyU9fENAnv5RDAhp8KrVw3TsC6i1mMQGNWD/RAc23n5sDuvkm5hfQS9X7EX/hFwV0WCNLAV0Y3D+f8JZ4U/Tca18438JjdOkGrfz48IC6vxDbNfT/fzkQzyabb+D2zT/vyjb9Ydk2jYB6Sg/otZqKbei+m/tsu8L9cXvecEAXBwd6YQvo0vlCQbJV5AUB9X5/kdfQ///QFn3gPxZn2K48gYBuXAYCOg1o1H0uq4B+fVRzv/0VNeF1AW3PPN2k3BDQ8IvcmwK6cNfw/zMyoMuDjwyo8wfIdj1PA7rlabb1FsoCanotYBgTn0MCGnnPzimg//wxLebfvz/++/uvmBE7b25MQLsc9v9YvXLnNVN5QP2HR9zTWE1Aw31cPkOlC2j45gb+Vi0OMV35+wY0434uPp2abYW/Z0BvXi7rpH7GjNgd0Cc/XojChoCuX6U5oPOtYv+MOwO68QE+D0EgbPMH8oEB3VZP81Xbts8XArp3+3vr9Qb+Y8sF3sI8oFFbBvKFUQ8cXPwn7I+N0KiXQY9d9086aAro0qbEllO82fqAPitRNZzde5yuBXTM87ZFXF7qQLmfLrQ/xXLdxoB6QSOgh/PunW8Z0Mfm5k/vxGvcc/ijA7r6I3tAn2xvhjZdgsP7h1XcQ2YloNV06QIbymMVQoGcnLOaXuzIgEaPMVz32iVDBU8ioNa7Q+ZiHxK2826cqB7Ye2xt+k/Xb3FvI5228p8FNPqSbrWePFqGH4wP0Srw09UlWFisWSm8LJgDOjnPzoB6p58e0OBLCIcHdOHPNAGN2+acX1K6IHcCunzN9qeDawEdHozP++yVdP7vJ5f3F2u27Rt4B8w5y6yQywEdH/2HBDRunOXKny12aJ0GV862u83i3WM2aukt6CezJiMI6PyS0gW5HxjQ9J/Cr1+z8brXA9o/iX6ygTsLXfBf64uwdtok5rPrdR7JawGtJkdAqWan2SNWQkCXt2anKyk8yv+bt3pNb9nPMgKa/ptI69d8REDdp+/rTxnHyzgn3Df+RvwzzR/lw1jvdKed02w3/zc/wT1bIKDbf4neVvf4g8j1sPp7XTvdHNDp36Yny7v15YClu0hkQO2pSVkZAQ3vxuRvla44ce3vCOjS/b6adHBtvreJWgUeYqvLsHqKn7zx1FBA+9OmHQ/UcrqQMY/fxfMJA7q4NJObu/DXbzGgW6o3WYTAqvfXrrMalq53aWln10ZAp5eULsj9yIAGd6SP+yjSWwV09rJh3JMv/2n9k4VYOyEYUPcaZlfWPZqraSKHjepxgHf+pQV+soBbfrBorcVrAfVu0OTnK+Hyt9SXFy0cxoWALsV8+T+9875lPwsJaFvMqW9/Rk3IMqDBC862BZ7etb1th10BDT/quq3i2XnWAhqo7KTAbpi91yFmt259iXdYGrX8O98S0IW/QdN/rwfU20T3Bsz/sS+g7xnPWiEBdQ/F1HrtwUR2sV71SkA3nG3lDCs9Cl167T/dWd6mrRsB92eVe/ZnAV1sxfjz1QXeZWmj96iAzrq3eCX+H6vg5b0lCp83+J+bf5Y3+y3LLKD3Mw9nd46NfxyfvzjlJ+bJ88PlCy88ZXSDMXl5YfY4noRlfBK6HtDF6w6nTBvQhStdbqvzP5sCuvD7Xb0O5/RnAV3560NAd8kvoLtkeBeQBTQ4+YCAemX28udtOz8L6Mqi9KccGdB7/+prNTlp+WomvTIEdD5g7TrWLu6H0zvjwqoMyvDRczgC+i4ML+NYA7q0HeNuME5bsxbQMeTzHyYT0O4mTbeqV67G7Vboz8CGgFaTM1bz61/dcPQ21xe232cjg4M3/qhY+QW0tKfwW1leB9++2RoV0Pnc+QPfj938ebx/ufm1ezGJeCpqUU2311eeE8+29zYFdClj/SbsaOEShoCG6rl0i578pGCZBfTcI9In7dyABsMYWrTgVq8toNXshOUFVgk8E15t0EJAA1kLN3YM6Hz06gb3yt+WyXWubj3H/KRgWQX07CPSJ81000wBXcra4izveePCObYF1Nksm1946TpVtgV0/udiS0DDQ7y/FeNFzQEdBiz+tQuf/tYPHrucAnr+Eenfzubfx66Azs64+hDdHlAnBoGlOjSgwU260EntDVqoXSDH0zNVQ0Dnl11qsj9ufvX+iwErk7b+pGA5BfT8I9IXbP1J6z3i5QBZQN1n/t6Zj1nT3rZj4HpmLXse0MVXKkKbm/NNysC1rgb0vp7PlV8cj52QnAKa9cFEcqcM6NqPYgJ6n7yzM3/jatPCRPK3/xafoPc/rfxlmQZ06fc2e4nCvXBEQNf+Xi1cbewPipZRQPM+nF3u1p9v3iMCunolmyLoNMTdmgo81VULPIH2tnz9/1wMaOVfYHGSc3LoB4t/4KwBXf3DgF5GAc37gMq5exrQ6EdqeMSWGW6Upu+wjHU5MKBrhZrfgrWALj4df7IIazsnGAaG5/jXwmMnhIBik5cEdOOMKrRHZnfxp2+S7BJM3jThgZ81/zef0v1DtpyqgA5b2QR0i4wCylP4My0+s/POse9aNg158i7IYf1cSF61HK8uoNMFcl90SDSg/u3koROUUUB5E+lMzwOquZb9AT1UeJtx4f2e+1CjhYBKttpnQ3dP7JIfevMKnpwCmvcR6TN36EuLzrUkH9CFFyDXXs+c5+jIN7gEw6v5rTG9VluInAKa9xHpc9e/NPaCK9lyvrQCeg/vrzQ/x5Evz2oDOhn49LYVLKeA5n1E+twltRlyZkCXf7K+UDkF1JnUL3Eqqz4xWQU06yPS546APrni815XGBbgLllDk1d0+2fzqaz51GQW0DuHsztLUlshySyI6+yFkv+JGzeXCeiS/AK6C3cDs6o6b7sPWxwT0OGfurnvhIBimwN3roSE/DmCs8JZ9wsIKLahn8kL76SqGo2QPAN6jX33vcf9wIx+Ju/AgGIBAcU2BDR5BPT1CCjwLvgj93IEFHgXBPTlCCjwLgjoyxFQ4F1w9365zALaHE/EEXUopjv3MABKBBQAjAgoABhlFtAOr4ECSAABBQAjAgoARgQUAIwIKAAYEVAAMMozoGYEFIAOAQUAIwIKAEZ5BrT+gvhvf96v339FXpCAAtDJMaB1PtuAVtVn3EUJKACdDAPa9rMOaP0V8WsFrQKOXjoA5cgvoPXxRL7/+vqod2R6FPS3v1YWhYACOFB+Ab01x2BqA1o/iY86IBMBBaCTX0AvVf3WURfQx+Zo1BtJBBSATnYBfSSz3ubsAvrYBF17Du8hoAB0sgvo37837xv1Ab0RUABnIaAAYJRdQGdP4S+xr4ECgI66cMe/iVRvc3YBfWyP/oi58Nm/bADvRR24F+zG9KMPaL1PaORnkRKR02sJOS3rPa/FZVnV8ljKNYffgGbv+Sag16qKewafjpzWc07Les9rcVlWtTyWcs2rPsrZinoLKSE5reeclvWe1+KyrGp5LOWa42+A8+XwufYzq/Wc07Le81pcllUtj6Vc84ob0CU023zmtZ5zWtZ7XovLsqrlsZRrsr8BL5HTes5pWe95LS7LqpbHUq7J/ga8RE7rOadlvee1uCyrWh5Lueb4/UB7UXuAJian9ZzTst7zWlyWVS2PpVxz6A24znZizbahOa3nnJb1ntfisqxqeSzlmgNvwNeH9zGAXN9Hymk957Ss97wWl2VVy2Mp1xx3A5r33p1iNjuEsiP94XJa1ntei8uyquWxlGuOuwE3L5d8lPMVclrWe16Ly7Kq5bGUa467ARf/CXvswUSSkdN6zmlZ73ktLsuqlsdSrjnsBnQHspu6ZvocPqf1nNOy3vNaXJZVLY+lXHPYDegOpTwVeUDlZOS0nnNa1ntei8uyquWxlGsI6BY5reeclvWe1+KyrGp5LOUansJvkdN6zmlZ73ktLsuqlsdSruFNpC1yWs85Les9r8VlWdXyWMo1r96Nyd8qzUFO6zmnZb3ntbgsq1oeS7nmxTvS5/kSKACEHPgXYHIs+lb73ZwA8BYO3YS+zPqZ5wugABDG4ewAwCj7F3EB4CwEFACMCCgAGBFQADB6aUC/PtgRFMD7IKAAYERAAcCI10ABwIiAAoARAQUAIwIKAEZ8Fh4AjA4N6JWjMQF4YwcG9OvDOx4oOzEBeCMvPiJ9nt8pBwAhr/5OJP+rjgEgU3wrJwAY8b3wAGB0WEAfW5v+0/UbbyMBeB8EFACMeAoPAEa8iQQARq/ejcnfKgWATL14R/qUXwKtP3j67U/3lEs+Hz+9Lv1teqyGVH7p3UfTkn8V5zq9n97y+LO/eA9IQNYPrXUHfpSzKebU9JeYmOaT++59sLkBmazl9AN6y+YTvdfpaiegu2X90Fp36MFELrN+pv0bu843j27pL/Mg+YDe3DtC0n9I2zvCuAsJAd0t64fWOg5nN7jOHtrNaxDpL3Yr9YDWmxzd7/aa+tP463QrmYDulvVDax0HVB481vK/unfCr4/6vzNZy6kH9Or8JuvXQlM+JsJ1+gefgO6W9UNrHQEdPNbyf/7d2Ta6Vr/9z2zWcuoBvbgbINe0f62Pxfv3TuMJ6G5ZP7TWEdDBYy1/XsfH+aM8P27jWr44L945D6nHU9Mk7rbdw+ex0P0NePyVr++xKQb0sWj9r7V9afTTOf389xfq36XzTryztp2lfSzm8Ht1/30aJ6DufbW7A3ivQr562TY+tOr/+v7rlvzr5AMCOqjX8uMh3N8LH//8HNbyeGz9+k749/jX9JbIik49oOGNzuGY2+0i1gHtTjrzt1r/LutX6boFHgI67FbSLO01sW3UIaDT+2pzB/hf/e/5pFdOtj+0moD+n7PvAREI6KBey05uHk8z/urXsvvdJPW94OL8eU/j/ZDUA1p30XtION9Z0Czj47//7R/nv1Hf/C7HF2r7PDq75dVL62xGX1J4tPcBnd9X//nDOeGk5Yx4aD1+mf/ykdFbTAR00GxSDNsV9dOMe7eW64fO8BiqqzT8NXX+rJ4r9YD2u2O4j+D6od389617vDRBrZf2duoDqP1dDk/ib8Mju/1Ht5vo+Kv++/cU/op29wDvvtoG9LM9x0n31oiHVvNrTuDXuRUBHTQreNiuqJ9m9Gt5fL3mcWr9qBo2PK8pbHvUkg+os6nhvLjYPVIej556seuAtqc8/nXeL7b9XQ4fPO4COi5S96/hOXMSz+D7xQndV/vf+Glv3kU8tOqAZtRPAjpq1vLQm+bAUbf5Pa5/u6ALZzLP4DMIqLtPcP9q1xDJtkFONi8nRuk6LeZt2PD8MTnDsOGZxDN4/1347r7qHIGiu0u8XsxD68w1b0BAB+1TjO6JRns0vtlaHj/e3z11T+YZfBYBvY+fR6oXyX3m226dOA9w7/H1Qs6rifXi3PrXvYe3YLptqS6ciRxjbBbQ4b7q3yVOWbbND61L2jsJzxHQQbt+uwfH10d9r7u5b8W6T0C7Tc9rMnHKJKC1a/cWwexYCY+Fdd6Xcf55xgI2Keq23W79M/phO7P71Y6bpik84seATu+r6QR020Mrje35zQjooF3LXXAu/UOkWcvuV9z37yw8VnPzangaMgpos7CPpXR/qaGAnrnX4vAW4WM5h4C6+32Oe7Ol8gvul3p+X00noNseWgQ0V9fhvcrPfv94563CdgUPu0w3Z2heDU9D6gH9+nB/U5fuTYXZwzmxLdDuSfziFmj7WD9zSV3Ou/CT+2o6Ad320CKguerWcvOAuA3vH/y4uweIHD9zUv8dPf0Z/Pgy4vj6UpIBnb1A1z+SZkuW2Gug3ZN4/zVQZ+vp58nP4L17gHdfTSigmx5aBDRXY4N++6t9mtGtZecueBvW9+N+8D9Ofwbv9rx9GI93v2uVUEBv04OAXroXQGbp6fdkuZ/7VoJT+/pJ/H/13oW/jC+Sfv9/5+6IMb8H+PfVhAK66aFFQHPVb0q0Rz7odwGc7zLt/MH896c/gx8aNCzipX+YO/tRJxDQYTfpRvdY8T5vXr8eNuwqeN5Su5vL1+EjMt5+oM1Pv/33c3fEmN8D/PtqQgHd9NAioLnq13LzsvZw1JB2f5Xh48+Vu4V3fpquVb/dMezZP+xPnVJA3SPoNi97DS/XddvNw470/TsMp38S6T4urP9JpK5Dzl3lLPN7gHdfTSmgWx5aBDRX1/Fvef/wHT8uMXLujWc/g5+8h9luDI/7BnXHD0skoJPPPDu/3kG3kfcv08OLnLWo40blbQjo7LPwjfM/dzi/B3j31ZQCuuWhRUBzNbwbcB1y1L9bMHyG5j+Mr9u5b8ueZ7gD/pyd8PPvtALq1bI2Pvr7PXB+3E7v5+wdr8uwvNOjMTVuVXXqM/i7fw+Y31dTCuiWhxYBzdXV2Yob3ikaX5Or+r2qx7cSUvgYZ3usCHdJLu3f8tQCOgbIfeH46myQtu/SNr/qU19bngZ0ONzFfXb00u6Hp/925/eA2X01qYBueGgR0EKk8zHOt5HKLpXAVgTUKpkDMb0PAorcEFAjHux6/E6RGwJq4b0WBgUCitwQUAtnr0boEFDkhoBa1O8c8g6SHAFFbggoABgRUAAwIqAAYERAAcCIgAKAEQEFACMCCgBGBBQAjAgoABgRUAAwIqAAYERAAcCIgAKAEQEFACMCCgBGBBQAjAgoABgRUAAwIqAAYERAAcCIgAKAEQEFACMCCgBGBBQAjAgoABgRUAAwIqAAYERAAcCIgAKAEQEFACMCCgBGBBQAjAgoABgRUAAwIqAAYERAAcCIgAKAEQEFACMCCgBGBBQAjAgoABgRUAAwIqAAYERAAcCIgAKAEQEFACMCCgBGBBQAjAgocnCtRr/9tXbOv//bq5YJIKDIghvQqvr25+IZL9XPFy4WSkdAkYNpQKvqc+F8t4qA4oUIKHJwdcJ4WdkGJaB4KQKKHLgBvX99VNX3X8HzEVC8FAFFDiYBrTO58CSegOKlCChyMA1o/Sz+x/jP4Sn9bfIK6W311VJAgIAiB7OA3vqdmcY3l+rn9G5A6+f5G/Z6AvYgoMjBLKB//95uc7pvzv+cBHToJwXFgQgocjAL6D9/NJV8dLQ9uf5H87ZS/xpofYYmsXVTfwQGAgoEFDkIB/Q2xPGxEfFrSgAAAexJREFUwdlsaPYBvQ3v0/cbq8ABCChyEA7o6JHJSUAvYzV5Yx7HIaDIwXpA6/+cBPTR02FH0cfGKc/hcRACihz4Ae22MG/T94rGgLoW9roH9iKgyMHCu/DOe+2TgLqnE1Ach4AiB/5+oHUU+w3N3/6avQb6CCjRxAsQUOQg/Emk67CT5yyg/X8CxyKgyIH3Wfj6Gfz4Sujw0aQuoN679MAhCChy4B2Nqd4AHQNaP5ef7MY0bpuyGxMORECRAyegzT5LbTgvXSavw/FE+h3om1dHP7vzsCM9jkJAkYPZEem9fZj609q33z+nP2IDFEchoMjBNKDDG0SX/pT/0L3o2WyezvZkop84DAFFDtyAujsotZn8OX4svinoD+cyfAoJByKgAGBEQAHAiIACgBEBBQAjAgoARgQUAIwIKAAYEVAAMCKgAGBEQAHAiIACgBEBBQAjAgoARgQUAIwIKAAYEVAAMCKgAGBEQAHAiIACgBEBBQAjAgoARgQUAIwIKAAYEVAAMCKgAGBEQAHAiIACgBEBBQAjAgoARgQUAIwIKAAYEVAAMCKgAGBEQAHAiIACgBEBBQAjAgoARgQUAIwIKAAYEVAAMCKgAGBEQAHAiIACgBEBBQAjAgoARgQUAIwIKAAYEVAAMCKgAGD0/wGpsnWBpOZh4gAAAABJRU5ErkJggg==)
## NULL
# Finally we can use the by argument to do calculations by group. Here we
# calculate mean and SD of Open by levels of Stock:
allStocksDT[,.(meanOpen = mean(Open), sdOpen = sd(Open)), by = .(Stock)]
## Stock meanOpen sdOpen
## 1: bbby 72.078327 4.8706097
## 2: flws 5.583227 0.5904042
## 3: foxa 32.415936 1.3959866
## 4: ftd 32.076696 1.0200178
## 5: tfm 45.014622 7.0677125
## 6: twx 63.733426 3.5010912
## 7: viab 78.321952 7.5416673
# Notice the .() notation in the by argument. If you have one item in by, you
# can drop the .(). Probably not a bad idea to just keep it.
# We can also define groups in the by argument. For example, calculate the mean
# volume per month per stock:
allStocksDT[, .(meanVolume = mean(Volume)), by = .(Month = months(Date), Stock)]
## Month Stock meanVolume
## 1: March bbby 2432205.5
## 2: February bbby 2864568.8
## 3: January bbby 3980932.6
## 4: December bbby 1385548.4
## 5: November bbby 1299664.3
## 6: October bbby 1153647.8
## 7: September bbby 2014858.6
## 8: August bbby 1324535.0
## 9: July bbby 1435507.8
## 10: June bbby 2331990.9
## 11: May bbby 1981067.0
## 12: April bbby 2669126.4
## 13: March flws 195382.3
## 14: February flws 313419.6
## 15: January flws 222454.7
## 16: December flws 207340.8
## 17: November flws 203659.0
## 18: October flws 299751.6
## 19: September flws 182440.9
## 20: August flws 109690.5
## 21: July flws 122969.5
## 22: June flws 187831.5
## 23: May flws 193158.7
## 24: April flws 174202.9
## 25: April foxa 13930784.1
## 26: March foxa 11953765.7
## 27: February foxa 12610182.4
## 28: January foxa 15282016.9
## 29: December foxa 9543893.4
## 30: November foxa 9829641.4
## 31: October foxa 10478305.8
## 32: September foxa 8826728.1
## 33: August foxa 12139137.4
## 34: July foxa 11634087.5
## 35: June foxa 18145795.2
## 36: May foxa 15559846.3
## 37: March ftd 207211.4
## 38: February ftd 162931.5
## 39: January ftd 189268.9
## 40: December ftd 210457.2
## 41: November ftd 402275.0
## 42: October ftd 160888.1
## 43: March tfm 1321118.9
## 44: February tfm 877611.2
## 45: January tfm 1117749.9
## 46: December tfm 783214.0
## 47: November tfm 1356568.9
## 48: October tfm 449696.0
## 49: September tfm 538532.4
## 50: August tfm 478805.6
## 51: July tfm 410348.9
## 52: June tfm 496984.5
## 53: May tfm 735669.0
## 54: April tfm 485626.3
## 55: April twx 5541198.0
## 56: March twx 5128075.0
## 57: February twx 6126211.3
## 58: January twx 6083629.0
## 59: December twx 3960030.3
## 60: November twx 4510329.9
## 61: October twx 4064156.0
## 62: September twx 4281455.0
## 63: August twx 4023607.3
## 64: July twx 4078454.6
## 65: June twx 4965998.7
## 66: May twx 5289637.5
## 67: April viab 3102027.8
## 68: March viab 2332968.1
## 69: February viab 2349228.5
## 70: January viab 2772575.3
## 71: December viab 2698481.7
## 72: November viab 3422706.9
## 73: October viab 2289071.2
## 74: September viab 3335488.1
## 75: August viab 3404614.6
## 76: July viab 2196480.1
## 77: June viab 2686626.8
## 78: May viab 2809408.8
## Month Stock meanVolume
# We defined a new grouping variable called Month and then used it as one of the
# by variables for which to calculate the means.
# We can use i to limit the calculation to a subset. Here we calculate the mean
# Volume for per month for bbby:
allStocksDT[Stock == "bbby", .(meanVolume = mean(Volume)),
by = .(Month = months(Date))]
## Month meanVolume
## 1: March 2432205
## 2: February 2864569
## 3: January 3980933
## 4: December 1385548
## 5: November 1299664
## 6: October 1153648
## 7: September 2014859
## 8: August 1324535
## 9: July 1435508
## 10: June 2331991
## 11: May 1981067
## 12: April 2669126
# Chaining ----------------------------------------------------------------
# Recall how we chained operations together in dplyr using %>%. We can also
# chain operations in data.table.
# Calculate mean open price per stock then sort by mean in ascending order:
allStocksDT[,.(meanOpen = mean(Open)), by = .(Stock)][order(meanOpen)]
## Stock meanOpen
## 1: flws 5.583227
## 2: ftd 32.076696
## 3: foxa 32.415936
## 4: tfm 45.014622
## 5: twx 63.733426
## 6: bbby 72.078327
## 7: viab 78.321952
# Notice the "][". They need to be next to one another for chaining to work.
# Find the minimum and maximum stock price for each stock.
# Recall the chaining we used in dplyr:
# allStocks %>%
# group_by(Stock) %>%
# summarise(Min=min(Low), Max=max(High))
# We actually don't need to chain anything to do that in data.table:
allStocksDT[, .(Min=min(Low), Max=max(High)), .(Stock)]
## Stock Min Max
## 1: bbby 62.12 80.82
## 2: flws 4.53 7.17
## 3: foxa 27.22 35.75
## 4: ftd 29.02 36.99
## 5: tfm 31.35 57.16
## 6: twx 55.70 70.77
## 7: viab 63.52 89.27
# Find the largest change in Open and Close price for each stock.
# Recall the chaining we used in dplyr:
# allStocks %>%
# group_by(Stock) %>%
# mutate(Change = Close - Open) %>%
# summarise(LargestGain = max(Change), LargestLoss = min(Change))
allStocksDT[,.(Change = Close - Open),
.(Stock)][,.(LargestGain = max(Change),
LargestLoss = min(Change)),
.(Stock)]
## Stock LargestGain LargestLoss
## 1: bbby 2.66 -3.25
## 2: flws 0.54 -0.56
## 3: foxa 3.65 -3.47
## 4: ftd 1.65 -3.89
## 5: tfm 2.06 -2.34
## 6: twx 2.09 -2.26
## 7: viab 3.93 -3.35
# It works, but it's a little too concise for my taste.
# Back to computations on columns. What if you have a lot of columns? This can
# get tedious:
allStocksDT[,.(mean(Open), mean(High), mean(Low), mean(Close), mean(Volume)), by = .(Stock)]
## Warning in gmean(Volume): Group 3 summed to more than type 'integer'
## can hold so the result has been coerced to 'numeric' automatically, for
## convenience.
## Stock V1 V2 V3 V4 V5
## 1: bbby 72.078327 72.710000 71.499243 72.08498 2055076.4
## 2: flws 5.583227 5.689761 5.460757 5.58008 200264.3
## 3: foxa 32.415936 32.790876 32.142271 32.44554 12481889.4
## 4: ftd 32.076696 32.598609 31.619043 32.10991 224691.0
## 5: tfm 45.014622 45.624303 44.420598 45.00155 742273.8
## 6: twx 63.733426 64.259721 63.231355 63.73303 4817040.0
## 7: viab 78.321952 79.050876 77.646335 78.32291 2777436.5
# data.table provides the .SD symbol to help with this. SD = Subset Data. Of
# course it's only helpful if you're comfortable using the lapply function.
allStocksDT[,lapply(.SD, mean), by = .(Stock)]
## Warning in gmean(Volume): Group 3 summed to more than type 'integer'
## can hold so the result has been coerced to 'numeric' automatically, for
## convenience.
## Stock Date Open High Low Close Volume
## 1: bbby 15973.29 72.078327 72.710000 71.499243 72.08498 2055076.4
## 2: flws 15973.29 5.583227 5.689761 5.460757 5.58008 200264.3
## 3: foxa 15983.41 32.415936 32.790876 32.142271 32.44554 12481889.4
## 4: ftd 16071.51 32.076696 32.598609 31.619043 32.10991 224691.0
## 5: tfm 15973.29 45.014622 45.624303 44.420598 45.00155 742273.8
## 6: twx 15983.41 63.733426 64.259721 63.231355 63.73303 4817040.0
## 7: viab 15983.41 78.321952 79.050876 77.646335 78.32291 2777436.5
# That calculated the mean for all columns (including the date!) except what was
# in the by argument. We can use the .SDcols argument to specify all columns
# except the Date and Volume columns:
allStocksDT[,lapply(.SD, mean), by = .(Stock), .SDcols = -c("Date","Volume")]
## Stock Open High Low Close
## 1: bbby 72.078327 72.710000 71.499243 72.08498
## 2: flws 5.583227 5.689761 5.460757 5.58008
## 3: foxa 32.415936 32.790876 32.142271 32.44554
## 4: ftd 32.076696 32.598609 31.619043 32.10991
## 5: tfm 45.014622 45.624303 44.420598 45.00155
## 6: twx 63.733426 64.259721 63.231355 63.73303
## 7: viab 78.321952 79.050876 77.646335 78.32291
# := ----------------------------------------------------------------------
# So far everything we've done has been output to the console and not saved. We
# could have saved our work the usual way with an assigment operator "<-".
# However, data.table provides a convenient way to modify a data table without
# using an assignment operator. The function is ":=" (read "colon equals"). It
# updates or adds column(s) by reference. That is, it makes no copies of any
# part of memory at all. This can be very efficient for large data sets.
# Let's do some examples.
# Create a column for day of trading:
names(allStocksDT)
## [1] "Date" "Open" "High" "Low" "Close" "Volume" "Stock"
allStocksDT[, Day := weekdays(Date)]
## Date Open High Low Close Volume Stock Day
## 1: 2014-03-26 67.76 68.05 67.18 67.25 1785164 bbby Wednesday
## 2: 2014-03-25 67.61 67.93 67.34 67.73 1571625 bbby Tuesday
## 3: 2014-03-24 67.73 68.00 66.99 67.26 1742341 bbby Monday
## 4: 2014-03-21 68.41 68.41 67.29 67.55 3639114 bbby Friday
## 5: 2014-03-20 67.58 68.12 67.52 67.82 1328860 bbby Thursday
## ---
## 1617: 2013-04-15 66.04 66.23 63.99 64.02 3078540 viab Monday
## 1618: 2013-04-12 66.30 66.63 65.62 66.50 2029401 viab Friday
## 1619: 2013-04-11 65.69 66.76 65.69 66.15 2784995 viab Thursday
## 1620: 2013-04-10 63.76 65.77 63.76 65.70 2259979 viab Wednesday
## 1621: 2013-04-09 65.78 66.09 64.57 64.60 4605824 viab Tuesday
names(allStocksDT)
## [1] "Date" "Open" "High" "Low" "Close" "Volume" "Stock" "Day"
# Remove the column we created:
allStocksDT[, Day := NULL]
## Date Open High Low Close Volume Stock
## 1: 2014-03-26 67.76 68.05 67.18 67.25 1785164 bbby
## 2: 2014-03-25 67.61 67.93 67.34 67.73 1571625 bbby
## 3: 2014-03-24 67.73 68.00 66.99 67.26 1742341 bbby
## 4: 2014-03-21 68.41 68.41 67.29 67.55 3639114 bbby
## 5: 2014-03-20 67.58 68.12 67.52 67.82 1328860 bbby
## ---
## 1617: 2013-04-15 66.04 66.23 63.99 64.02 3078540 viab
## 1618: 2013-04-12 66.30 66.63 65.62 66.50 2029401 viab
## 1619: 2013-04-11 65.69 66.76 65.69 66.15 2784995 viab
## 1620: 2013-04-10 63.76 65.77 63.76 65.70 2259979 viab
## 1621: 2013-04-09 65.78 66.09 64.57 64.60 4605824 viab
names(allStocksDT)
## [1] "Date" "Open" "High" "Low" "Close" "Volume" "Stock"
# We can also add/update multiple columns. Here we create a new column for Day
# and format the Volume column to have commas:
# install.packages("scales")
library(scales) # for comma function
allStocksDT[, c("Day", "Volume") := list(weekdays(Date), comma(Volume))]
## Date Open High Low Close Volume Stock Day
## 1: 2014-03-26 67.76 68.05 67.18 67.25 1,785,164 bbby Wednesday
## 2: 2014-03-25 67.61 67.93 67.34 67.73 1,571,625 bbby Tuesday
## 3: 2014-03-24 67.73 68.00 66.99 67.26 1,742,341 bbby Monday
## 4: 2014-03-21 68.41 68.41 67.29 67.55 3,639,114 bbby Friday
## 5: 2014-03-20 67.58 68.12 67.52 67.82 1,328,860 bbby Thursday
## ---
## 1617: 2013-04-15 66.04 66.23 63.99 64.02 3,078,540 viab Monday
## 1618: 2013-04-12 66.30 66.63 65.62 66.50 2,029,401 viab Friday
## 1619: 2013-04-11 65.69 66.76 65.69 66.15 2,784,995 viab Thursday
## 1620: 2013-04-10 63.76 65.77 63.76 65.70 2,259,979 viab Wednesday
## 1621: 2013-04-09 65.78 66.09 64.57 64.60 4,605,824 viab Tuesday
allStocksDT
## Date Open High Low Close Volume Stock Day
## 1: 2014-03-26 67.76 68.05 67.18 67.25 1,785,164 bbby Wednesday
## 2: 2014-03-25 67.61 67.93 67.34 67.73 1,571,625 bbby Tuesday
## 3: 2014-03-24 67.73 68.00 66.99 67.26 1,742,341 bbby Monday
## 4: 2014-03-21 68.41 68.41 67.29 67.55 3,639,114 bbby Friday
## 5: 2014-03-20 67.58 68.12 67.52 67.82 1,328,860 bbby Thursday
## ---
## 1617: 2013-04-15 66.04 66.23 63.99 64.02 3,078,540 viab Monday
## 1618: 2013-04-12 66.30 66.63 65.62 66.50 2,029,401 viab Friday
## 1619: 2013-04-11 65.69 66.76 65.69 66.15 2,784,995 viab Thursday
## 1620: 2013-04-10 63.76 65.77 63.76 65.70 2,259,979 viab Wednesday
## 1621: 2013-04-09 65.78 66.09 64.57 64.60 4,605,824 viab Tuesday
# Let's change back to the way it was by removing the Day column and converting
# Volume to integer:
# install.packages("tidyr")
library(tidyr) # for extract_numeric function
allStocksDT[, c("Day", "Volume") := list(NULL, extract_numeric(Volume))]
## Date Open High Low Close Volume Stock
## 1: 2014-03-26 67.76 68.05 67.18 67.25 1785164 bbby
## 2: 2014-03-25 67.61 67.93 67.34 67.73 1571625 bbby
## 3: 2014-03-24 67.73 68.00 66.99 67.26 1742341 bbby
## 4: 2014-03-21 68.41 68.41 67.29 67.55 3639114 bbby
## 5: 2014-03-20 67.58 68.12 67.52 67.82 1328860 bbby
## ---
## 1617: 2013-04-15 66.04 66.23 63.99 64.02 3078540 viab
## 1618: 2013-04-12 66.30 66.63 65.62 66.50 2029401 viab
## 1619: 2013-04-11 65.69 66.76 65.69 66.15 2784995 viab
## 1620: 2013-04-10 63.76 65.77 63.76 65.70 2259979 viab
## 1621: 2013-04-09 65.78 66.09 64.57 64.60 4605824 viab
allStocksDT
## Date Open High Low Close Volume Stock
## 1: 2014-03-26 67.76 68.05 67.18 67.25 1785164 bbby
## 2: 2014-03-25 67.61 67.93 67.34 67.73 1571625 bbby
## 3: 2014-03-24 67.73 68.00 66.99 67.26 1742341 bbby
## 4: 2014-03-21 68.41 68.41 67.29 67.55 3639114 bbby
## 5: 2014-03-20 67.58 68.12 67.52 67.82 1328860 bbby
## ---
## 1617: 2013-04-15 66.04 66.23 63.99 64.02 3078540 viab
## 1618: 2013-04-12 66.30 66.63 65.62 66.50 2029401 viab
## 1619: 2013-04-11 65.69 66.76 65.69 66.15 2784995 viab
## 1620: 2013-04-10 63.76 65.77 63.76 65.70 2259979 viab
## 1621: 2013-04-09 65.78 66.09 64.57 64.60 4605824 viab
# And now let's do what we did before another way!
allStocksDT[, `:=`(Day = weekdays(Date), Volume = comma(Volume))]
## Date Open High Low Close Volume Stock Day
## 1: 2014-03-26 67.76 68.05 67.18 67.25 1,785,164 bbby Wednesday
## 2: 2014-03-25 67.61 67.93 67.34 67.73 1,571,625 bbby Tuesday
## 3: 2014-03-24 67.73 68.00 66.99 67.26 1,742,341 bbby Monday
## 4: 2014-03-21 68.41 68.41 67.29 67.55 3,639,114 bbby Friday
## 5: 2014-03-20 67.58 68.12 67.52 67.82 1,328,860 bbby Thursday
## ---
## 1617: 2013-04-15 66.04 66.23 63.99 64.02 3,078,540 viab Monday
## 1618: 2013-04-12 66.30 66.63 65.62 66.50 2,029,401 viab Friday
## 1619: 2013-04-11 65.69 66.76 65.69 66.15 2,784,995 viab Thursday
## 1620: 2013-04-10 63.76 65.77 63.76 65.70 2,259,979 viab Wednesday
## 1621: 2013-04-09 65.78 66.09 64.57 64.60 4,605,824 viab Tuesday
allStocksDT
## Date Open High Low Close Volume Stock Day
## 1: 2014-03-26 67.76 68.05 67.18 67.25 1,785,164 bbby Wednesday
## 2: 2014-03-25 67.61 67.93 67.34 67.73 1,571,625 bbby Tuesday
## 3: 2014-03-24 67.73 68.00 66.99 67.26 1,742,341 bbby Monday
## 4: 2014-03-21 68.41 68.41 67.29 67.55 3,639,114 bbby Friday
## 5: 2014-03-20 67.58 68.12 67.52 67.82 1,328,860 bbby Thursday
## ---
## 1617: 2013-04-15 66.04 66.23 63.99 64.02 3,078,540 viab Monday
## 1618: 2013-04-12 66.30 66.63 65.62 66.50 2,029,401 viab Friday
## 1619: 2013-04-11 65.69 66.76 65.69 66.15 2,784,995 viab Thursday
## 1620: 2013-04-10 63.76 65.77 63.76 65.70 2,259,979 viab Wednesday
## 1621: 2013-04-09 65.78 66.09 64.57 64.60 4,605,824 viab Tuesday
# Here we're using := like a function, because it is a function. As I've
# mentioned before in class, just about everything is accomplished by functions
# in R. Even the "+" sign is a function:
2+4
## [1] 6
`+`(2,4)
## [1] 6
# Those brackets in data frames? Yep, functions:
`[`(allStocks,1:4,1:4)
## Date Open High Low
## 1 2014-03-26 67.76 68.05 67.18
## 2 2014-03-25 67.61 67.93 67.34
## 3 2014-03-24 67.73 68.00 66.99
## 4 2014-03-21 68.41 68.41 67.29
# Again let's tidy up:
allStocksDT[, `:=`(Day = NULL, Volume = extract_numeric(Volume))]
## Date Open High Low Close Volume Stock
## 1: 2014-03-26 67.76 68.05 67.18 67.25 1785164 bbby
## 2: 2014-03-25 67.61 67.93 67.34 67.73 1571625 bbby
## 3: 2014-03-24 67.73 68.00 66.99 67.26 1742341 bbby
## 4: 2014-03-21 68.41 68.41 67.29 67.55 3639114 bbby
## 5: 2014-03-20 67.58 68.12 67.52 67.82 1328860 bbby
## ---
## 1617: 2013-04-15 66.04 66.23 63.99 64.02 3078540 viab
## 1618: 2013-04-12 66.30 66.63 65.62 66.50 2029401 viab
## 1619: 2013-04-11 65.69 66.76 65.69 66.15 2784995 viab
## 1620: 2013-04-10 63.76 65.77 63.76 65.70 2259979 viab
## 1621: 2013-04-09 65.78 66.09 64.57 64.60 4605824 viab
allStocksDT
## Date Open High Low Close Volume Stock
## 1: 2014-03-26 67.76 68.05 67.18 67.25 1785164 bbby
## 2: 2014-03-25 67.61 67.93 67.34 67.73 1571625 bbby
## 3: 2014-03-24 67.73 68.00 66.99 67.26 1742341 bbby
## 4: 2014-03-21 68.41 68.41 67.29 67.55 3639114 bbby
## 5: 2014-03-20 67.58 68.12 67.52 67.82 1328860 bbby
## ---
## 1617: 2013-04-15 66.04 66.23 63.99 64.02 3078540 viab
## 1618: 2013-04-12 66.30 66.63 65.62 66.50 2029401 viab
## 1619: 2013-04-11 65.69 66.76 65.69 66.15 2784995 viab
## 1620: 2013-04-10 63.76 65.77 63.76 65.70 2259979 viab
## 1621: 2013-04-09 65.78 66.09 64.57 64.60 4605824 viab
# we can combine := with i and j. Here we subset where month equals January,
# then calculate the total Volume per Stock. In other words, we calculate the
# total volume per stock for January.
allStocksDT[months(Date)=="January", Total := sum(Volume), by = .(Stock)]
## Date Open High Low Close Volume Stock Total
## 1: 2014-03-26 67.76 68.05 67.18 67.25 1785164 bbby NA
## 2: 2014-03-25 67.61 67.93 67.34 67.73 1571625 bbby NA
## 3: 2014-03-24 67.73 68.00 66.99 67.26 1742341 bbby NA
## 4: 2014-03-21 68.41 68.41 67.29 67.55 3639114 bbby NA
## 5: 2014-03-20 67.58 68.12 67.52 67.82 1328860 bbby NA
## ---
## 1617: 2013-04-15 66.04 66.23 63.99 64.02 3078540 viab NA
## 1618: 2013-04-12 66.30 66.63 65.62 66.50 2029401 viab NA
## 1619: 2013-04-11 65.69 66.76 65.69 66.15 2784995 viab NA
## 1620: 2013-04-10 63.76 65.77 63.76 65.70 2259979 viab NA
## 1621: 2013-04-09 65.78 66.09 64.57 64.60 4605824 viab NA
# If we print the data table, we'll see NA for Total. That's because the head
# and tail of the data table do not display data from January.
allStocksDT
## Date Open High Low Close Volume Stock Total
## 1: 2014-03-26 67.76 68.05 67.18 67.25 1785164 bbby NA
## 2: 2014-03-25 67.61 67.93 67.34 67.73 1571625 bbby NA
## 3: 2014-03-24 67.73 68.00 66.99 67.26 1742341 bbby NA
## 4: 2014-03-21 68.41 68.41 67.29 67.55 3639114 bbby NA
## 5: 2014-03-20 67.58 68.12 67.52 67.82 1328860 bbby NA
## ---
## 1617: 2013-04-15 66.04 66.23 63.99 64.02 3078540 viab NA
## 1618: 2013-04-12 66.30 66.63 65.62 66.50 2029401 viab NA
## 1619: 2013-04-11 65.69 66.76 65.69 66.15 2784995 viab NA
## 1620: 2013-04-10 63.76 65.77 63.76 65.70 2259979 viab NA
## 1621: 2013-04-09 65.78 66.09 64.57 64.60 4605824 viab NA
# To see the result, we need to show some data from January
allStocksDT[months(Date)=="January"]
## Date Open High Low Close Volume Stock Total
## 1: 2014-01-31 63.43 64.49 63.26 63.85 2107357 bbby 83599585
## 2: 2014-01-30 64.74 65.07 64.00 64.10 2585396 bbby 83599585
## 3: 2014-01-29 64.88 64.88 64.03 64.35 3114832 bbby 83599585
## 4: 2014-01-28 64.32 65.30 64.30 65.09 2876603 bbby 83599585
## 5: 2014-01-27 64.75 65.18 64.32 64.32 2223187 bbby 83599585
## ---
## 143: 2014-01-08 86.19 86.93 85.71 86.67 2257414 viab 58224082
## 144: 2014-01-07 86.16 86.62 85.87 86.33 1805977 viab 58224082
## 145: 2014-01-06 87.13 87.16 85.41 85.73 2331050 viab 58224082
## 146: 2014-01-03 86.89 87.19 86.47 86.60 1554206 viab 58224082
## 147: 2014-01-02 87.45 87.57 86.69 86.95 1613286 viab 58224082
# data.table Speed --------------------------------------------------------
# This is a good time to demonstrate data.table's speed. Let's generate a data
# frame with 1,000,000 rows.
DF <- data.frame(x=factor(sample(x = c("A","B","C"),size = 1e6, replace = T)),
y = rnorm(1e6,100,10))
dim(DF)
## [1] 1000000 2
print(object.size(DF), units = "Mb")
## 11.4 Mb
# Now lets find the mean of y for each level of x using aggregate():
system.time(
ans1 <- aggregate(y ~ x, data=DF, mean)
)
## user system elapsed
## 1.05 0.02 1.06
ans1
## x y
## 1 A 100.00284
## 2 B 99.98164
## 3 C 100.00780
# Now do the same with data.table:
DT <- data.table(DF)
system.time(
ans2 <- DT[,.(y = mean(y)), by = x]
)
## user system elapsed
## 0 0 0
ans2
## x y
## 1: A 100.00284
## 2: C 100.00780
## 3: B 99.98164
# Considerably faster!
# How about dplyr?
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:data.table':
##
## between, last
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
system.time(
ans3 <- DT %>%
group_by(x) %>%
summarise(mean(y))
)
## user system elapsed
## 0.02 0.01 0.03
ans3
## Source: local data table [3 x 2]
##
## x mean(y)
## 1 A 100.00284
## 2 C 100.00780
## 3 B 99.98164
# just as fast.
# Remember the baseball example from the dplyr lecture? Here it is again with dplyr:
library(Lahman)
# Batting %>%
# group_by(playerID) %>%
# summarize(total = sum(G_batting)) %>%
# arrange(desc(total)) %>%
# head(5)
# Here's how we can do it with data.table and some chaining:
BattingDT <- data.table(Batting)
BattingDT[,.(total = sum(G_batting)), by = .(playerID)][head(order(total,decreasing = T),n=5)]
## playerID total
## 1: rosepe01 3562
## 2: yastrca01 3308
## 3: aaronha01 3298
## 4: henderi01 3081
## 5: cobbty01 3035
rm(BattingDT)
# Keys --------------------------------------------------------------------
# data.table allows us to create a "key" on a data table. The data.table
# documentation refers to keys as "super-charged row names". It may help to also
# think of them as a factor. Let's see how to set a key and what we can do with
# it.
# First let's introduce the tables() function, not to be confused with the base
# R table() function. It will display all data tables currently in memory.
tables()
## NAME NROW NCOL MB
## [1,] allStocksDT 1,621 8 1
## [2,] ans2 3 2 1
## [3,] ans3 3 2 1
## [4,] DT 1,000,000 2 12
## COLS KEY
## [1,] Date,Open,High,Low,Close,Volume,Stock,Total
## [2,] x,y
## [3,] x,mean(y)
## [4,] x,y
## Total: 15MB
# Notice the key column is empty
setkey(allStocksDT, Stock)
tables()
## NAME NROW NCOL MB
## [1,] allStocksDT 1,621 8 1
## [2,] ans2 3 2 1
## [3,] ans3 3 2 1
## [4,] DT 1,000,000 2 12
## COLS KEY
## [1,] Date,Open,High,Low,Close,Volume,Stock,Total Stock
## [2,] x,y
## [3,] x,mean(y)
## [4,] x,y
## Total: 15MB
# Now Stock is the key
allStocksDT
## Date Open High Low Close Volume Stock Total
## 1: 2014-03-26 67.76 68.05 67.18 67.25 1785164 bbby NA
## 2: 2014-03-25 67.61 67.93 67.34 67.73 1571625 bbby NA
## 3: 2014-03-24 67.73 68.00 66.99 67.26 1742341 bbby NA
## 4: 2014-03-21 68.41 68.41 67.29 67.55 3639114 bbby NA
## 5: 2014-03-20 67.58 68.12 67.52 67.82 1328860 bbby NA
## ---
## 1617: 2013-04-15 66.04 66.23 63.99 64.02 3078540 viab NA
## 1618: 2013-04-12 66.30 66.63 65.62 66.50 2029401 viab NA
## 1619: 2013-04-11 65.69 66.76 65.69 66.15 2784995 viab NA
## 1620: 2013-04-10 63.76 65.77 63.76 65.70 2259979 viab NA
## 1621: 2013-04-09 65.78 66.09 64.57 64.60 4605824 viab NA
# The data table is now sorted automatically by Stock. Also notice we didn't
# have to use an assignment operator "<-" above. The key assignment was made
# directly to the data table and saved.
# Having a key can make for east subsetting. Just state a level in the brackets.
# Here we view just the bbby stocks
allStocksDT["bbby"]
## Date Open High Low Close Volume Stock Total
## 1: 2014-03-26 67.76 68.05 67.18 67.25 1785164 bbby NA
## 2: 2014-03-25 67.61 67.93 67.34 67.73 1571625 bbby NA
## 3: 2014-03-24 67.73 68.00 66.99 67.26 1742341 bbby NA
## 4: 2014-03-21 68.41 68.41 67.29 67.55 3639114 bbby NA
## 5: 2014-03-20 67.58 68.12 67.52 67.82 1328860 bbby NA
## ---
## 247: 2013-04-04 64.14 64.71 63.70 64.67 1772495 bbby NA
## 248: 2013-04-03 65.05 65.25 63.48 63.96 2046493 bbby NA
## 249: 2013-04-02 64.14 65.16 64.14 64.77 2283494 bbby NA
## 250: 2013-04-01 63.93 64.81 63.62 63.82 1807803 bbby NA
## 251: 2013-03-28 65.07 65.24 64.32 64.42 2613743 bbby NA
# A key can consist of multiple columns.
head(airquality)
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
airqualityDT <- data.table(airquality)
# Make Month and Day the keys:
setkey(airqualityDT, Month, Day)
tables()
## NAME NROW NCOL MB
## [1,] airqualityDT 153 6 1
## [2,] allStocksDT 1,621 8 1
## [3,] ans2 3 2 1
## [4,] ans3 3 2 1
## [5,] DT 1,000,000 2 12
## COLS KEY
## [1,] Ozone,Solar.R,Wind,Temp,Month,Day Month,Day
## [2,] Date,Open,High,Low,Close,Volume,Stock,Total Stock
## [3,] x,y
## [4,] x,mean(y)
## [5,] x,y
## Total: 16MB
# see record for May 5
airqualityDT[.(5,2)]
## Ozone Solar.R Wind Temp Month Day
## 1: 36 118 8 72 5 2
# see record for June 21
airqualityDT[.(6,21)]
## Ozone Solar.R Wind Temp Month Day
## 1: NA 150 6.3 77 6 21
# Keys can be set when creating data tables using the key argument. Just make
# sure the column name is in quotes.
weatherDT <- data.table(weather, key="Events")
# See records for Events="Snow" along with the Date and Mean Temperature
weatherDT["Snow", Date, Mean.TemperatureF]
## Mean.TemperatureF Date
## 1: 22 2013-01-23
## 2: 20 2013-01-24
## 3: 18 2013-01-25
## 4: 26 2013-02-01
## 5: 25 2013-02-02
## 6: 32 2013-02-03
## 7: 41 2013-02-08
# Conclusion --------------------------------------------------------------
# This was merely an intro to data.table. There is much more to the package,
# including a number of utility and convenience functions. The authors of the
# package appear to be very passionate and helpful with respect to data.table.
# The examples and documentation are thorough, thoughtful and well-written. The
# same can be said of the dplyr package. Their vignettes have set the standard
# for educating new users.
# Instead of choosing between dplyr and data.table, I would try to learn both.
# It will make you flexible, adaptable and a more knowledgable and more informed
# R user.