8 3/20 Lab VII | Lists, Loops, & Functions in R

8.1 Preparation

## Packages
library(plyr)
library(tidyverse)
library(readxl)
library(readr)
library(lubridate)
library(knitr)

Differences in policies and personnel associated with presidential candidates could affect the profitability of firms in different ways, with stocks potentially rising and falling with the expected outlooks for political candidates. In this report we analyze stocks that have been identified as potentially responsive to the political fortunes of Donald Trump and Joe Biden.

8.1.1 Data

Our stock market data consists of daily closing prices of 31 stocks that were identified by Kiplinger as being politically exposed. These stocks cover a broad range of sectors potentially influenced by politics, including tech stocks, energy (both oil and renewable oriented firms), health care, marijuana, gold and foreign-oriented ETFs.

Our measure of political expectations comes from the PredictIt betting market in which investors buy shares in (among other things) presidential candidates. Share are worth $1 if the candidate wins the general election. The price for a candidate on a given day is taken to be the market’s estimate of the probability of victory for that candidate.

8.2 Load data. Stock price data is in stocks2020.csv. Presidential prices are in USPres_2020_Price History By Market -Bulk.xlsx. Think about renaming Data, Contract Name and Close Share Price for ease of use.

## Reading in stock data
stk <- read_csv("Data/stocks2020.csv",  col_names = TRUE)

## Reading in presidential market data
pres_mkt <- read_xlsx("Data/USPres_2020_Price History By Market -Bulk.xlsx", col_names = TRUE) %>%   
  mutate(date = `Date (ET)`, 
         name = `Contract Name`, 
         price = `Close Share Price`) %>%
  dplyr::select(-`Date (ET)`, -`Contract Name`, -`Close Share Price`)

8.3 Create data frame with daily price data for Trump in 2020

## Creating daily price data for Trump
trump2020 <- pres_mkt %>%
  filter(year(date) > 2019 & name == "Donald Trump") %>%
  dplyr::select(c(date, "Trump" = price))

8.4 Merge the stock price and Trump price data

## Joining the two datasets
stk_pres <- left_join(stk, trump2020, by = "date")

8.5 Create a list of stock ticker names (do not include DJI as a ticker)

## Creating list of ticker names
stk_tickers <- 
  c(names(stk)[!names(stk) %in% c("date", "DJI")])

## Creating number of stocks
stk_num <- length(stk_tickers)

8.6 Create a function that calculates daily percent change. Use the lag function and the mutate_at function. Make sure to check that your function worked.

## Price Change Function
pct_change <- function(x){
  
(x - lag(x))/lag(x)
  
}

## Daily Change Dataset
daily_df <- stk_pres %>% 
  mutate_at(c("DJI", "Trump", stk_tickers), pct_change) %>% 
  dplyr::select(c(date, c("DJI", "Trump", stk_tickers)))

8.7 Loop thru list of tickers and run regressions in which daily change in stock price is a function of change in DJIA and change in Trump price. Create a data frame that stores the coefficient, standard error and t-stat for the Trump variable for each stock. Include a column that has the stock ticker in that data frame as well.

## Preparing Data Frame to store results
ols_results <- data.frame("row" = 1:stk_num,
                         "ticker" = NA, 
                         "coef" = NA, 
                         "se" = NA,
                         "tStat" = NA)

## Looping Though Regressions
for(i in 1:stk_num){
  daily_df$temp <- unlist(daily_df[, i + 3])
  ols.1 <- lm(temp ~ DJI + Trump, data = daily_df)
  ols_results[i, "ticker"] <- stk_tickers[i]
  ols_results[i, 3:5] <- round(summary(ols.1)$coefficients["Trump", 1:3], 3)
}

## Showing results
head(ols_results)
##   row ticker   coef    se  tStat
## 1   1   AAPL -0.015 0.011 -1.304
## 2   2   NFLX -0.036 0.016 -2.208
## 3   3   AMZN -0.034 0.013 -2.643
## 4   4      K -0.003 0.011 -0.259
## 5   5   TSLA -0.004 0.031 -0.115
## 6   6    LMT  0.000 0.011 -0.026

8.8 The questions below are not required for full credit but I encourage you to try!

8.8.1 Use list apply - lapply() - to regress stock price on Trump.

## Now with lapply()
ols_results2 <- lapply(stk_tickers, function(x){
  daily_df$temp = unlist(daily_df[, which(names(daily_df) == x)])
  ols.1 <- lm(temp ~ DJI + Trump, data = daily_df)
})

## Showing Results
head(ols_results2)
## [[1]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##    0.001889     0.980104    -0.014758  
## 
## 
## [[2]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##    0.001903     0.535397    -0.035819  
## 
## 
## [[3]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##    0.002097     0.532408    -0.033624  
## 
## 
## [[4]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##  -0.0003296    0.4133016   -0.0028461  
## 
## 
## [[5]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##    0.009543     1.042136    -0.003596  
## 
## 
## [[6]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##  -0.0002786    0.8337098   -0.0002856

8.8.2 Use tapply to find the average price of each presidential candidate over the course of the campaign.

## With tapply()
tapply(pres_mkt$price, pres_mkt$name, mean)
##      Amy Klobuchar       Andrew Cuomo        Andrew Yang     Bernie Sanders      Beto O'Rourke        Cory Booker 
##         0.02166512         0.01606446         0.03927786         0.09528414         0.03062162         0.03407125 
##       Donald Trump   Elizabeth Warren    Hillary Clinton      Howie Hawkins       Jo Jorgensen          Joe Biden 
##         0.38840543         0.07018660         0.02375566         0.01000000         0.01000000         0.24708227 
##        John Kasich      Kamala Harris         Kanye West Kirsten Gillibrand         Mark Cuban    Mark Zuckerberg 
##         0.01776930         0.06757422         0.01000000         0.03126378         0.01106383         0.01024052 
##  Michael Bloomberg         Mike Pence        Nikki Haley          Paul Ryan     Pete Buttigieg      Sherrod Brown 
##         0.02249647         0.03799830         0.01691027         0.01322849         0.04516535         0.01289140 
##         Tom Steyer      Tulsi Gabbard 
##         0.01000000         0.01221106

8.8.3 Now use map() in library(purrr) to regress stock price on Trump.

## The function from above
func_stock <- function(x){
  daily_df$temp = unlist(daily_df[, which(names(daily_df) == x)])
  ols.1 <- lm(temp ~ DJI + Trump, data = daily_df)
}

## Now with map()
map(.x=stk_tickers, .f=func_stock)
## [[1]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##    0.001889     0.980104    -0.014758  
## 
## 
## [[2]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##    0.001903     0.535397    -0.035819  
## 
## 
## [[3]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##    0.002097     0.532408    -0.033624  
## 
## 
## [[4]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##  -0.0003296    0.4133016   -0.0028461  
## 
## 
## [[5]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##    0.009543     1.042136    -0.003596  
## 
## 
## [[6]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##  -0.0002786    0.8337098   -0.0002856  
## 
## 
## [[7]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##   0.0003462    0.9118574   -0.0070218  
## 
## 
## [[8]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##  -0.0007476    1.3508059    0.0489526  
## 
## 
## [[9]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##    0.001328     0.658680    -0.008615  
## 
## 
## [[10]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##   -0.002651     1.056977     0.028651  
## 
## 
## [[11]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##    0.001382     1.011709    -0.010030  
## 
## 
## [[12]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##     0.00125      0.86138     -0.04332  
## 
## 
## [[13]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##   0.0008178    0.8435450   -0.0202986  
## 
## 
## [[14]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##   -0.003142     1.067967     0.019232  
## 
## 
## [[15]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##    0.001017     0.463254    -0.002714  
## 
## 
## [[16]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##   -0.002939     0.866421     0.005825  
## 
## 
## [[17]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##   0.0001886    1.0378297    0.0003729  
## 
## 
## [[18]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##   4.747e-05    1.067e+00    2.402e-02  
## 
## 
## [[19]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##   0.0005725    0.9488423    0.0269447  
## 
## 
## [[20]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##  -0.0005572    0.5019176   -0.0012461  
## 
## 
## [[21]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##    0.001302     1.009206     0.004588  
## 
## 
## [[22]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##   0.0006483    0.8554046    0.0061128  
## 
## 
## [[23]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##    0.001798     0.893112    -0.010831  
## 
## 
## [[24]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##    0.001051     0.810440    -0.008493  
## 
## 
## [[25]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##    0.002559     0.937766     0.005098  
## 
## 
## [[26]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##     0.01192      0.69831      0.05660  
## 
## 
## [[27]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##   0.0008981    1.0917988   -0.0242325  
## 
## 
## [[28]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##   -0.001052     1.176215     0.019945  
## 
## 
## [[29]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##   -0.002581     0.959133     0.031636  
## 
## 
## [[30]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##   0.0001306    0.8001097   -0.0106032  
## 
## 
## [[31]]
## 
## Call:
## lm(formula = temp ~ DJI + Trump, data = daily_df)
## 
## Coefficients:
## (Intercept)          DJI        Trump  
##   0.0008454    0.0707473   -0.0124007

8.8.4 Create your own simple regression function. To avoid issues of NAs, simulate some data and see if it worked!

## Creating constituent functions

## Beta Hat Function
beta_hat <- function(x,y) {
  
  (sum((x-mean(x))*(y-mean(y))))/
    (sum((x-mean(x))^2))
  
}

## Intercept Function
constant <- function(x,y,b){
  
  mean(y) - b*mean(x)
  
}

## Fitted Values Function
fit_val <- function(x, y, b) {
  constant(x=x,y=y,b=b) + beta_hat(x,y) * x
}

## R squared Function
r2 <- function(fitted, y) {
  sum((fitted - mean(y))^2)/
    sum((y-mean(y))^2)
}

## Standard Error Function
se <- function(fitted, x, y) {
  sqrt(sum((fitted-y)^2)/length(y)/
         sum((x-mean(x))^2))
  
}

## T Stat Function
t <- function(beta, null=0, se) {
  (beta-null)/
    se
}

## P-Value Function
pval <- function(t,df) {
  
  pt(q=t, df, lower.tail = F)*2
  
}

## OLS Function
ols <- function(X,Y) {
  
  b <- beta_hat(X, Y)
  
  constant <- constant(x=X, y=Y, b)
  
  fitted <- fit_val(x=X,y=Y,b=b)
  
  r_squared <- r2(fitted, y=Y)
  
  stand_err <- se(fitted=fitted, x=X, y=Y)
  
  t_stat <- t(beta=b, se=stand_err)
  
  p <- pval(t=t_stat, df=(length(X)-1))
  
  tibble(b, stand_err, t_stat, p)
  
}

## Creating Data
x <- rnorm(1000, 15, 2)
y <- x*17 + rnorm(1000, 15, 12)

## Testing
ols(x, y)
## # A tibble: 1 × 4
##       b stand_err t_stat     p
##   <dbl>     <dbl>  <dbl> <dbl>
## 1  17.2     0.186   92.2     0
summary(lm(y~x))
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -37.944  -7.582  -0.012   7.890  44.258 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  13.3346     2.8372    4.70 2.97e-06 ***
## x            17.1512     0.1862   92.13  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.52 on 998 degrees of freedom
## Multiple R-squared:  0.8948, Adjusted R-squared:  0.8947 
## F-statistic:  8489 on 1 and 998 DF,  p-value: < 2.2e-16