# Import approval polls data directly off fivethirtyeight website
approval_polllist <- read_csv('https://projects.fivethirtyeight.com/biden-approval-data/approval_polllist.csv')
## Rows: 1601 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): president, subgroup, modeldate, startdate, enddate, pollster, grad...
## dbl (9): samplesize, weight, influence, approve, disapprove, adjusted_appro...
## lgl (1): tracking
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(approval_polllist)
## Rows: 1,601
## Columns: 22
## $ president <chr> "Joseph R. Biden Jr.", "Joseph R. Biden Jr.", "Jos…
## $ subgroup <chr> "All polls", "All polls", "All polls", "All polls"…
## $ modeldate <chr> "9/19/2021", "9/19/2021", "9/19/2021", "9/19/2021"…
## $ startdate <chr> "1/31/2021", "2/1/2021", "2/1/2021", "2/2/2021", "…
## $ enddate <chr> "2/2/2021", "2/3/2021", "2/3/2021", "2/4/2021", "2…
## $ pollster <chr> "YouGov", "Rasmussen Reports/Pulse Opinion Researc…
## $ grade <chr> "B+", "B", "B", "B", "A-", "B", "B-", "B", "B", "B…
## $ samplesize <dbl> 1500, 1500, 15000, 15000, 1429, 1500, 1005, 15000,…
## $ population <chr> "a", "lv", "a", "a", "a", "lv", "a", "a", "lv", "a…
## $ weight <dbl> 1.08560770, 0.33082275, 0.27856273, 0.25074450, 2.…
## $ influence <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ approve <dbl> 46, 52, 54, 54, 49, 49, 57, 54, 50, 54, 60, 59, 51…
## $ disapprove <dbl> 38, 46, 33, 34, 39, 48, 34, 34, 47, 34, 32, 35, 46…
## $ adjusted_approve <dbl> 47.26205, 54.40467, 52.54153, 52.54153, 49.64334, …
## $ adjusted_disapprove <dbl> 38.32436, 40.05448, 36.32914, 37.32914, 39.06416, …
## $ multiversions <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ tracking <lgl> NA, TRUE, TRUE, TRUE, NA, TRUE, NA, TRUE, TRUE, TR…
## $ url <chr> "https://docs.cdn.yougov.com/460mactkmh/econTabRep…
## $ poll_id <dbl> 74332, 74338, 74366, 74367, 74348, 74347, 74345, 7…
## $ question_id <dbl> 139593, 139642, 139733, 139734, 139668, 139654, 13…
## $ createddate <chr> "2/3/2021", "2/4/2021", "2/11/2021", "2/11/2021", …
## $ timestamp <chr> "13:37:08 19 Sep 2021", "13:37:08 19 Sep 2021", "1…
# Use `lubridate` to fix dates, as they are given as characters.
approval_polllist <- approval_polllist %>%
mutate(
modeldate=mdy(modeldate),
startdate=mdy(startdate),
enddate=mdy(enddate),
createddate=mdy(createddate)
)
glimpse(approval_polllist)
## Rows: 1,601
## Columns: 22
## $ president <chr> "Joseph R. Biden Jr.", "Joseph R. Biden Jr.", "Jos…
## $ subgroup <chr> "All polls", "All polls", "All polls", "All polls"…
## $ modeldate <date> 2021-09-19, 2021-09-19, 2021-09-19, 2021-09-19, 2…
## $ startdate <date> 2021-01-31, 2021-02-01, 2021-02-01, 2021-02-02, 2…
## $ enddate <date> 2021-02-02, 2021-02-03, 2021-02-03, 2021-02-04, 2…
## $ pollster <chr> "YouGov", "Rasmussen Reports/Pulse Opinion Researc…
## $ grade <chr> "B+", "B", "B", "B", "A-", "B", "B-", "B", "B", "B…
## $ samplesize <dbl> 1500, 1500, 15000, 15000, 1429, 1500, 1005, 15000,…
## $ population <chr> "a", "lv", "a", "a", "a", "lv", "a", "a", "lv", "a…
## $ weight <dbl> 1.08560770, 0.33082275, 0.27856273, 0.25074450, 2.…
## $ influence <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ approve <dbl> 46, 52, 54, 54, 49, 49, 57, 54, 50, 54, 60, 59, 51…
## $ disapprove <dbl> 38, 46, 33, 34, 39, 48, 34, 34, 47, 34, 32, 35, 46…
## $ adjusted_approve <dbl> 47.26205, 54.40467, 52.54153, 52.54153, 49.64334, …
## $ adjusted_disapprove <dbl> 38.32436, 40.05448, 36.32914, 37.32914, 39.06416, …
## $ multiversions <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ tracking <lgl> NA, TRUE, TRUE, TRUE, NA, TRUE, NA, TRUE, TRUE, TR…
## $ url <chr> "https://docs.cdn.yougov.com/460mactkmh/econTabRep…
## $ poll_id <dbl> 74332, 74338, 74366, 74367, 74348, 74347, 74345, 7…
## $ question_id <dbl> 139593, 139642, 139733, 139734, 139668, 139654, 13…
## $ createddate <date> 2021-02-03, 2021-02-04, 2021-02-11, 2021-02-11, 2…
## $ timestamp <chr> "13:37:08 19 Sep 2021", "13:37:08 19 Sep 2021", "1…
Create a plot
Replicating the Biden Approval Margin graph
qplot <- approval_polllist %>%
mutate(week=week(enddate)) %>% #Creating a new column called week by extracting the week from the enddate variable
group_by(week) %>%
mutate(
net_approval_rate=approve-disapprove #Creating a new column called net_approval_rate by subtracting disapprove from approve
) %>%
summarise(
mean=mean(net_approval_rate), #Mean net approval by week
sd=sd(net_approval_rate), #Standard deviation of net approval by week
count=n(), #Count by week
se=sd/sqrt(count), #Standard error of the week
t_critical=qt(0.975, count-1), #T-critical value
lower=mean-t_critical*se, #Lower end of the CI
upper=mean+t_critical*se #Upper end of the CI
) %>%
#Scatterplot of the calculated net approval rate means by week
ggplot(aes(x=week, y=mean)) +
geom_point(colour='red') + #Scatterplot using red points
geom_line(colour='red', size=0.25) + #Adding a red line to connect the points
geom_ribbon(aes(ymin=lower, ymax=upper), colour='red', linetype=1, alpha=0.1, size=0.25) +
geom_smooth(se=F) + #Adding a smooth line for the trend
geom_hline(yintercept=0, color='orange', size=2) + #Adding an orange horizontal line
theme_bw() + #Theme
labs(title='Estimating Approval Margin (approve-disapprove) for Joe Biden', #Adding a title
subtitle='Weekly average of all polls', #Subtitle
x='Week of the year', #X-label
y='Average Approval Margin (Approve - Disapprove)') + #Y-label
NULL
qplot
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
