The following capstone project replicates some of the key results from the paper “Intergenerational Mobility and Preferences for Redistribution” by Alberto Alesina, Stefanie Stantcheva, and Edoardo Teso (American Economic Review 2018). In their paper, Alesina, Stantcheva, and Teso explore how overly optimistic or pessimistic beliefs about intergenerational mobility (as measured by the difference between true and perceived mobility) is related to support for redistribution.
In this replication, you will reproduce three of the main tables and two figures, either in part or in whole. You have 20 days to complete the replication exercise - it is due by the end of the day on March 28th. During the replication, you are expected to work on your own - do not collaborate with other students. If you have devoted substantial time to a problem and are still in need of assistance, you may contact me for suitable hints. But please try to use the course materials, R help files, and online documentation to figure out the solutions yourself as much as possible. In the future, it is important that you’re able to troubleshoot problems and navigate resources in R on your own.
Set the following options for the document:
Be sure to commit and push your work to Github after each section AT A MINIMUM (hence, at least after “Preliminaries”, “Basic data set preparation”, “Table 1”,“Table 2”, “Table 3”, “Figure 2”, and “Figure 3”).
# Load libaries
library(rio)
library(tidyverse)
library(broom)
library(knitr)
library(stargazer)
library(glue)
library(ggthemes)
# Import data
base_data <- import("Data_Descriptive_Waves_ABC.dta")
# Filter for flag_1 and flag_2 equal to zero
base_data <- base_data %>% filter(flag_1 == 0, flag_2 == 0)
Generate a University indicator variable, as follows:
The variable should equal 1 when any of the following conditions hold:
Zero otherwise (if using a case_when function, you can do this by setting the last case to be “TRUE ~ 0”. )
# Generate university indicator
base_data <- base_data %>% mutate(university = case_when(
(education >= 6 & US==1) ~ 1,
(education>=6 & UK==1) ~ 1,
(education>=5 & Italy==1) ~ 1,
(education>=7 & France==1) ~ 1,
(education>=5 & Sweden==1 & wave=="September") ~ 1,
(education>=6 & Sweden==1 & wave=="February") ~ 1,
(is.na(education)) ~ NA_real_,
TRUE ~ 0
))
# Turn married into indicator variable by recoding values of 2 for "married" to 0
base_data <- base_data %>% mutate(married = if_else(married == 2, 0, married))
Generate age bracket indicators as follows:
# Generate Age Bracket Variables
base_data <- base_data %>% mutate(
age_1 = if_else(age >= 18 & age <= 29, 1, 0),
age_2 = if_else(age >= 30 & age <= 39, 1, 0),
age_3 = if_else(age >= 40 & age <= 49, 1, 0),
age_4 = if_else(age >= 50 & age <= 59, 1, 0),
age_5 = if_else(age >= 60, 1, 0)
)
Generate a country variable, equal to:
# Generate Country Variable
base_data <- base_data %>% mutate(country = case_when(
(US == 1) ~ "US",
(UK == 1) ~ "UK",
(France == 1) ~ "France",
(Italy == 1) ~ "Italy",
(Sweden == 1) ~ "Sweden"
))
Begin by creating a summary statistics data frame, with averages for selected variables by country (with missing values removed when calculating the mean). Be sure to create new variable names that match the description in Table 1 of Alesina, Stantcheva, & Teso. The variables should be as follows:
Variables | |
---|---|
age_1 | inc_bracket_3 |
age_2 | inc_bracket_4 |
age_3 | married |
age_4 | born_in_country |
age_5 | employed |
inc_bracket_1 | unemployed |
inc_bracket_2 | university |
summary_data <- base_data %>% group_by(country) %>% summarize(
"Male" = mean(male),
"18-29 years old" = mean(age_1, na.rm = TRUE),
"30-39 years old" = mean(age_2, na.rm = TRUE),
"40-49 years old" = mean(age_3, na.rm = TRUE),
"50-59 years old" = mean(age_4, na.rm = TRUE),
"60-69 years old" = mean(age_5, na.rm = TRUE),
"Income bracket 1" = mean(inc_bracket_1, na.rm = TRUE),
"Income bracket 2" = mean(inc_bracket_2, na.rm = TRUE),
"Income bracket 3" = mean(inc_bracket_3, na.rm = TRUE),
"Income bracket 4" = mean(inc_bracket_4, na.rm = TRUE),
"Married" = mean(married, na.rm = TRUE),
"Native" = mean(born_in_country, na.rm = TRUE),
"Employed" = mean(employed, na.rm = TRUE),
"Unemployed" = mean(unemployed, na.rm = TRUE),
"College" = mean(university, na.rm = TRUE)
)
Rearrange the summary statistics dataframe to look more like Table 1 in Alesina, Stantcheva, & Teso.
# Rearrange the Table
summary_data_2 <- t(summary_data) %>% as_tibble()
colnames(summary_data_2) <- summary_data$country
summary_data_2 <- summary_data_2[-1,]
summary_data_2$Variable <- colnames(summary_data)[-1]
## Use the Select Function to rearrange the columns to match column 1
summary_data_2 <- summary_data_2 %>% select(Variable,US, UK, France,Italy,Sweden)
# Turn the columns that should be numeric into numbers
summary_data_2[,c("US","UK","France","Italy","Sweden")] <-
summary_data_2[,c("US","UK","France","Italy","Sweden")] %>%
map(as.numeric) %>%
map(round,2)
# Turn Data Transformation Into a Function
transform_tab <-function(sum_obj){
new_summary <- t(sum_obj) %>% as_tibble()
colnames(new_summary) <- sum_obj$country
new_summary <- new_summary[-1,]
new_summary$Variable <- colnames(sum_obj)[-1]
new_summary <- new_summary %>% select(Variable,US,UK,France,Italy,Sweden)
new_summary[,c("US","UK","France","Italy","Sweden")] <-
new_summary[,c("US","UK","France","Italy","Sweden")] %>%
map_df(as.numeric) %>%
map_df(round,2)
return(new_summary)
}
summary_data <- transform_tab(summary_data)
summary_data
Variable | US | UK | France | Italy | Sweden |
---|---|---|---|---|---|
Male | 0.48 | 0.48 | 0.50 | 0.50 | 0.47 |
18-29 years old | 0.26 | 0.26 | 0.23 | 0.19 | 0.21 |
30-39 years old | 0.18 | 0.18 | 0.20 | 0.22 | 0.18 |
40-49 years old | 0.19 | 0.21 | 0.21 | 0.23 | 0.19 |
50-59 years old | 0.21 | 0.18 | 0.20 | 0.20 | 0.21 |
60-69 years old | 0.16 | 0.16 | 0.16 | 0.17 | 0.21 |
Income bracket 1 | 0.16 | 0.31 | 0.31 | 0.27 | 0.33 |
Income bracket 2 | 0.22 | 0.35 | 0.30 | 0.28 | 0.26 |
Income bracket 3 | 0.23 | 0.11 | 0.14 | 0.18 | 0.22 |
Income bracket 4 | 0.39 | 0.23 | 0.25 | 0.27 | 0.18 |
Married | 0.51 | 0.47 | 0.44 | 0.55 | 0.41 |
Native | 0.94 | 0.89 | 0.94 | 0.97 | 0.91 |
Employed | 0.62 | 0.65 | 0.63 | 0.64 | 0.66 |
Unemployed | 0.08 | 0.05 | 0.12 | 0.11 | 0.07 |
College | 0.42 | 0.37 | 0.30 | 0.38 | 0.33 |
table2_data <- base_data %>% filter(Treated == 0) %>% as_tibble()
Create a table 2 summary statistics data frame, with averages again computed by country and missing values excluded. The summary statistics should comprise the averages of the following variables (again with new variable names that match the formatting of Table 2 in the paper):
Variables | |
---|---|
q1_to_q5 | q1_to_q3 |
q1_to_q4 | q1_to_q2 |
age_3 | q1_to_q1 |
table2 <- table2_data %>% group_by(country) %>% summarize(
"Q1 to Q5" = mean(q1_to_q5, na.rm = TRUE),
"Q1 to Q4" = mean(q1_to_q4, na.rm = TRUE),
"Q1 to Q3" = mean(q1_to_q3, na.rm = TRUE),
"Q1 to Q2" = mean(q1_to_q2, na.rm = TRUE),
"Q1 to Q1" = mean(q1_to_q1, na.rm = TRUE)
)
table2 <- transform_tab(table2)
table2 <- table2 %>% mutate(
type = "perceived")
# Actual Probabilities (supplied)
actual <- matrix(c(7.8,12.7,18.7,27.7,33.1,
11.4,12.9,19.9,25.1,30.6,
11.2,12.8,23.0,23.8,29.2,
10.4,15.6,21.0,25.8,27.3,
11.1,17.3,21.0,23.8,26.7),
nrow=5, ncol=5, byrow=FALSE)
actual <- actual %>% as_tibble()
actual <- actual %>% mutate(
Variable = table2$Variable,
type = "actual"
)
actual <- actual %>% select(Variable, everything())
colnames(actual) <- colnames(table2)
export(actual, "actual.csv")
table2 <- bind_rows(table2, actual) %>% arrange(desc(Variable))
table2
Variable | US | UK | France | Italy | Sweden | type |
---|---|---|---|---|---|---|
Q1 to Q5 | 11.72 | 9.97 | 9.10 | 10.14 | 9.21 | perceived |
Q1 to Q5 | 7.80 | 11.40 | 11.20 | 10.40 | 11.10 | actual |
Q1 to Q4 | 11.98 | 10.62 | 10.53 | 11.25 | 11.16 | perceived |
Q1 to Q4 | 12.70 | 12.90 | 12.80 | 15.60 | 17.30 | actual |
Q1 to Q3 | 22.32 | 19.39 | 21.51 | 21.87 | 24.52 | perceived |
Q1 to Q3 | 18.70 | 19.90 | 23.00 | 21.00 | 21.00 | actual |
Q1 to Q2 | 21.83 | 22.25 | 23.60 | 23.13 | 23.10 | perceived |
Q1 to Q2 | 27.70 | 25.10 | 23.80 | 25.80 | 23.80 | actual |
Q1 to Q1 | 32.16 | 37.77 | 35.26 | 33.61 | 32.00 | perceived |
Q1 to Q1 | 33.10 | 30.60 | 29.20 | 27.30 | 26.70 | actual |
table3_data <- import("Data_Experiment_Waves_BC.dta") %>%
filter(flag_1 == 0, flag_2 == 0, Treated == 0)
# Repeat the creation of the country variable for the table 3 dataset
table3_data <- table3_data %>% mutate(country = case_when(
(US == 1) ~ "US",
(UK == 1) ~ "UK",
(France == 1) ~ "France",
(Italy == 1) ~ "Italy",
(Sweden == 1) ~ "Sweden"
))
Generate political spectrum position indicators as follows:
# Generate political spectrum position indicators
table3_data <- table3_data %>% mutate(
left = ifelse((ideology_economic ==1 | ideology_economic ==2), 1, 0),
right = ifelse((ideology_economic ==4 | ideology_economic ==5), 1, 0),
center = ifelse((ideology_economic==3), 1, 0)
)
Generate variables for specific policy support beliefs, as follows:
# Generate policy support indicators
table3_data <- table3_data %>% mutate(
budget_opportunities = budget_education+budget_health,
support_estate_45 = (estate_tax_support>=4),
unequal_opp_problem_d = (unequal_opportunities_problem==4),
tools_d = (tools_government >=2)
)
Rename the following policy support variables:
# Rename policy support variables
table3_data <- table3_data %>% rename(
support_eq_opp_pol = level_playing_field_policies,
income_tax_bot50 = income_tax_bottom50
)
Generate an indicator for whether or not someone is “rich” (household income is above the 75th percentile for the country.)
## First create a summary table with income_p75, the 75th percentile of household_income by country
## You should probably use the quantile() function with summarize to do so.
country_rich_levels <- table3_data %>% group_by(country) %>%
summarize(income_p75 = quantile(household_income, probs = 0.75, na.rm = TRUE))
## Then murge in income_p75 into the main table 3 dataframe
table3_data <- table3_data %>% left_join(country_rich_levels, by = "country")
## Finally, create the *rich* indicator equal to 1 if household_income is greater than
## the 75th percentile level
table3_data <- table3_data %>% mutate(
rich = ifelse((household_income > income_p75), 1, 0)
)
Generate the following further indicator variables:
# Generate further indicators
table3_data <- table3_data %>% mutate(
young = ifelse(age<45, 1, 0),
moved_up = ifelse((job_prestige_father>3),1,0),
immigrant = ifelse((parents_born_in_country==0), 1,0)
)
# Generate a country_survey and turn it into a factor variable
table3_data <- table3_data %>% mutate(
country_survey = as.factor(glue("{country} {round}"))
)
Panel B, Q1 to Q5 Specification: The main explanatory variables should be q1_to_q5 \(\times\) left, q1_to_q5 \(\times\) right, and q1_to_q5 \(\times\) center.
Control Variables | |
---|---|
country_survey | children_dummy |
left | rich |
right | university_degree |
male | immigrant |
young | moved_up |
Dependent Variables | |
---|---|
budget_opportunities | support_estate_45 |
support_eq_opp_pol | government_intervention |
unequal_opp_problem_d | budget_safetynet |
income_tax_top1 | income_tax_bot50 |
tools_d |
Save each of the regressions into a named regression object. You can either run perform these regressions using a for-loop, which is much more concise but more difficult, or manually write each of the 4 sets of 9 regressions. If you use a loop, you may need to use the get() function with the iterated dependent variables (and store the regressions in a list).
# Create a vector of dependent variables and then use a for-loop to iterate
# across them to perform each regression [you need to use get() for this].
## Vector of dependent variables
dep <- c("budget_opportunities", "support_estate_45", "support_eq_opp_pol",
"government_intervention", "unequal_opp_problem_d", "budget_safetynet",
"income_tax_top1", "income_tax_bot50", "tools_d")
# Table 3, Panel A:
t3a_q1toq1 <- list()
t3a_q1toq5 <- list()
for(yvar in 1:length(dep)){
t3a_q1toq1[[yvar]] <- lm(get(dep[yvar]) ~ q1_to_q1 + country_survey +
left + right + male + young + children_dummy + rich + university_degree +
immigrant + moved_up, data = table3_data)
t3a_q1toq5[[yvar]] <- lm(get(dep[yvar]) ~ q1_to_q5 + country_survey +
left + right + male + young + children_dummy + rich + university_degree +
immigrant + moved_up, data = table3_data)
}
# Table 3, Panel B: Q1 to Q1
t3b_q1toq1 <- list()
t3b_q1toq5 <- list()
for(yvar in 1:length(dep)){
t3b_q1toq1[[yvar]] <- lm(get(dep[yvar]) ~ q1_to_q1:left + q1_to_q1:right +
q1_to_q1:center + country_survey +left + right +
male + young + children_dummy + rich +
university_degree +immigrant + moved_up,
data = table3_data)
t3b_q1toq5[[yvar]] <- lm(get(dep[yvar]) ~ q1_to_q5:left + q1_to_q5:right +
q1_to_q5:center + country_survey +left + right +
male + young + children_dummy + rich +
university_degree + immigrant + moved_up,
data = table3_data)
}
# Table 3, Panel A Q1 to Q1 Regressions
t3_a_q1_1 <- lm(budget_opportunities ~ q1_to_q1 + country_survey +
left + right + male + young + children_dummy + rich + university_degree +
immigrant + moved_up, data = table3_data)
t3_a_q1_2 <- lm(support_estate_45 ~ q1_to_q1 + country_survey +
left + right + male + young + children_dummy + rich + university_degree +
immigrant + moved_up, data = table3_data)
t3_a_q1_3 <- lm(support_eq_opp_pol ~ q1_to_q1 + country_survey +
left + right + male + young + children_dummy + rich + university_degree +
immigrant + moved_up, data = table3_data)
t3_a_q1_4 <- lm(government_intervention ~ q1_to_q1 + country_survey +
left + right + male + young + children_dummy + rich + university_degree +
immigrant + moved_up, data = table3_data)
t3_a_q1_5 <- lm(unequal_opp_problem_d ~ q1_to_q1 + country_survey +
left + right + male + young + children_dummy + rich + university_degree +
immigrant + moved_up, data = table3_data)
t3_a_q1_6 <- lm(budget_safetynet ~ q1_to_q1 + country_survey +
left + right + male + young + children_dummy + rich + university_degree +
immigrant + moved_up, data = table3_data)
t3_a_q1_7 <- lm(income_tax_top1 ~ q1_to_q1 + country_survey +
left + right + male + young + children_dummy + rich + university_degree +
immigrant + moved_up, data = table3_data)
t3_a_q1_8 <- lm(income_tax_bot50 ~ q1_to_q1 + country_survey +
left + right + male + young + children_dummy + rich + university_degree +
immigrant + moved_up, data = table3_data)
t3_a_q1_9 <- lm(tools_d ~ q1_to_q1 + country_survey +
left + right + male + young + children_dummy + rich + university_degree +
immigrant + moved_up, data = table3_data)
# Table 3, Panel B Q1 to Q1 Regressions
t3_b_q1_1 <- lm(budget_opportunities ~ q1_to_q1*left + q1_to_q1*right +
q1_to_q1*center + country_survey + left + right + male +
young + children_dummy + rich + university_degree +
immigrant + moved_up, data = table3_data)
t3_b_q1_2 <- lm(support_estate_45 ~ q1_to_q1*left + q1_to_q1*right +
q1_to_q1*center + country_survey + left + right + male +
young + children_dummy + rich + university_degree +
immigrant + moved_up, data = table3_data)
t3_b_q1_3 <- lm(support_eq_opp_pol ~ q1_to_q1*left + q1_to_q1*right +
q1_to_q1*center + country_survey + left + right + male +
young + children_dummy + rich + university_degree +
immigrant + moved_up, data = table3_data)
t3_b_q1_4 <- lm(government_intervention ~ q1_to_q1*left + q1_to_q1*right +
q1_to_q1*center + country_survey +
left + right + male + young + children_dummy +
rich + university_degree +
immigrant + moved_up, data = table3_data)
t3_b_q1_5 <- lm(unequal_opp_problem_d ~ q1_to_q1*left + q1_to_q1*right +
q1_to_q1*center + country_survey + left + right + male +
young + children_dummy + rich + university_degree +
immigrant + moved_up, data = table3_data)
t3_b_q1_6 <- lm(budget_safetynet ~ q1_to_q1*left + q1_to_q1*right +
q1_to_q1*center + country_survey + left + right + male +
young + children_dummy + rich + university_degree +
immigrant + moved_up, data = table3_data)
t3_b_q1_7 <- lm(income_tax_top1 ~ q1_to_q1*left + q1_to_q1*right +
q1_to_q1*center + country_survey + left + right + male +
young + children_dummy + rich + university_degree +
immigrant + moved_up, data = table3_data)
t3_b_q1_8 <- lm(income_tax_bot50 ~ q1_to_q1*left + q1_to_q1*right +
q1_to_q1*center + country_survey + left + right + male +
young + children_dummy + rich + university_degree +
immigrant + moved_up, data = table3_data)
t3_b_q1_9 <- lm(tools_d ~ q1_to_q1*left + q1_to_q1*right + q1_to_q1*center +
country_survey +left + right + male + young + children_dummy +
rich + university_degree + immigrant + moved_up,
data = table3_data)
First, create a table_columns vector, using the following code to get the column titles as written in the paper:
table_columns <- c("Budget opp",
"Support estate tax",
"Support equality opp. policies",
"Government interv",
"Unequal opp. very serious problem",
"Budget safety net",
"Tax rate top 1",
"Tax rate bottom 50",
"Govt. tools")
Option | Value |
---|---|
type: | html |
object.names | FALSE |
style | aer |
omit.stat | all |
column.sep.width | 2pt |
font.size | footnotesize |
digits | 3 |
column.labels | table_columns |
Finally, you will also need to manually specify the values for the following options: title, keep, covariate.labels. Choose these such that the regressions looks like the respective panels of Table 3.
# Table 3, Panel A
stargazer(t3a_q1toq1, type="html", header = FALSE, object.names=FALSE,
dep.var.labels.include = FALSE,
title = "Table 3: Panel A: Q1 to Q1",
keep = c("q1_to_q1"),
style = "aer", omit.stat = "all", column.sep.width = "2pt",
font.size = "footnotesize", digits = 3,
covariate.labels = c("Q1 to Q1"),
column.labels = table_columns
)
Budget opp | Support estate tax | Support equality opp. policies | Government interv | Unequal opp. very serious problem | Budget safety net | Tax rate top 1 | Tax rate bottom 50 | Govt. tools | |
(1) | (2) | (3) | (4) | (5) | (6) | (7) | (8) | (9) | |
Q1 to Q1 | 0.030*** | 0.000 | 0.004*** | 0.002** | 0.001*** | 0.013*** | 0.057*** | -0.035*** | -0.000 |
(0.007) | (0.000) | (0.001) | (0.001) | (0.000) | (0.005) | (0.012) | (0.007) | (0.000) | |
Notes: | ***Significant at the 1 percent level. | ||||||||
**Significant at the 5 percent level. | |||||||||
*Significant at the 10 percent level. |
stargazer(t3a_q1toq5, type="html", header = FALSE, object.names=FALSE,
dep.var.labels.include = FALSE,
title = "Table 3: Panel B, Q1 to Q5",
keep = c("q1_to_q5"),
style = "qje", omit.stat = "all", column.sep.width = "1pt",
font.size = "footnotesize", digits = 3,
covariate.labels = c("Q1 to Q5"),
column.labels = table_columns
)
Budget opp | Support estate tax | Support equality opp. policies | Government interv | Unequal opp. very serious problem | Budget safety net | Tax rate top 1 | Tax rate bottom 50 | Govt. tools | |
(1) | (2) | (3) | (4) | (5) | (6) | (7) | (8) | (9) | |
Q1 to Q5 | -0.044*** | 0.0002 | -0.004*** | 0.001 | -0.00002 | -0.011 | -0.041** | 0.060*** | -0.0001 |
(0.011) | (0.0005) | (0.001) | (0.001) | (0.0004) | (0.007) | (0.019) | (0.011) | (0.0004) | |
Notes: | ***Significant at the 1 percent level. | ||||||||
**Significant at the 5 percent level. | |||||||||
*Significant at the 10 percent level. |
# Table 3, Panel B
stargazer(t3b_q1toq1, type="html", header = FALSE, object.names=FALSE,
dep.var.labels.include = FALSE,
title = "Table 3: Panel B, Q1 to Q1",
keep = c("q1_to_q1:left", "q1_to_q1:right","left", "right"),
style = "qje", omit.stat = "all", column.sep.width = "1pt",
font.size = "footnotesize", digits = 3,
order = c("q1_to_q1:left", "q1_to_q1:right","left","right"),
covariate.labels = c("Q1 to Q1 x left-wing",
"Q1 to Q1 x right-wing",
"Left-wing",
"Right-wing"),
column.labels = table_columns
)
Budget opp | Support estate tax | Support equality opp. policies | Government interv | Unequal opp. very serious problem | Budget safety net | Tax rate top 1 | Tax rate bottom 50 | Govt. tools | |
(1) | (2) | (3) | (4) | (5) | (6) | (7) | (8) | (9) | |
Q1 to Q1 x left-wing | 0.030*** | 0.001** | 0.006*** | 0.004*** | 0.002*** | 0.020*** | 0.069*** | -0.041*** | 0.001 |
(0.011) | (0.0005) | (0.001) | (0.001) | (0.0004) | (0.008) | (0.020) | (0.011) | (0.0005) | |
Q1 to Q1 x right-wing | 0.019 | -0.0004 | 0.003** | 0.003** | 0.001** | 0.003 | 0.039* | -0.033*** | -0.0004 |
(0.012) | (0.001) | (0.001) | (0.002) | (0.0005) | (0.008) | (0.021) | (0.012) | (0.0005) | |
Left-wing | 1.410** | 0.128*** | 0.223*** | 0.139 | 0.067** | 0.885* | 2.191* | -0.652 | 0.034 |
(0.681) | (0.029) | (0.066) | (0.092) | (0.027) | (0.469) | (1.221) | (0.689) | (0.028) | |
Right-wing | -1.161* | -0.051* | -0.329*** | -0.713*** | -0.056** | -0.769* | -1.487 | 1.423** | -0.057** |
(0.664) | (0.029) | (0.065) | (0.090) | (0.026) | (0.457) | (1.201) | (0.678) | (0.028) | |
Notes: | ***Significant at the 1 percent level. | ||||||||
**Significant at the 5 percent level. | |||||||||
*Significant at the 10 percent level. |
stargazer(t3b_q1toq5, type="html", header = FALSE, object.names=FALSE,
dep.var.labels.include = FALSE,
title = "Table 3: Panel B, Q1 to Q5",
keep = c("q1_to_q5:left", "q1_to_q5:right","left", "right"),
style = "qje", omit.stat = "all", column.sep.width = "1pt",
font.size = "footnotesize", digits = 3,
order = c("q1_to_q5:left", "q1_to_q5:right","left","right"),
covariate.labels = c("Q1 to Q5 x left-wing",
"Q1 to Q5 x right-wing",
"Left-wing",
"Right-wing"),
column.labels = table_columns
)
Budget opp | Support estate tax | Support equality opp. policies | Government interv | Unequal opp. very serious problem | Budget safety net | Tax rate top 1 | Tax rate bottom 50 | Govt. tools | |
(1) | (2) | (3) | (4) | (5) | (6) | (7) | (8) | (9) | |
Q1 to Q5 x left-wing | -0.080*** | -0.001 | -0.006*** | -0.003 | -0.002*** | -0.013 | -0.054* | 0.060*** | -0.001 |
(0.018) | (0.001) | (0.002) | (0.002) | (0.001) | (0.013) | (0.032) | (0.018) | (0.001) | |
Q1 to Q5 x right-wing | -0.009 | 0.001 | -0.002 | 0.002 | 0.001 | -0.003 | -0.001 | 0.039** | 0.0005 |
(0.019) | (0.001) | (0.002) | (0.003) | (0.001) | (0.013) | (0.034) | (0.019) | (0.001) | |
Left-wing | 1.499*** | 0.159*** | 0.382*** | 0.411*** | 0.170*** | 1.076*** | 2.549*** | -0.868* | 0.089*** |
(0.471) | (0.020) | (0.046) | (0.064) | (0.018) | (0.324) | (0.837) | (0.471) | (0.020) | |
Right-wing | -2.216*** | -0.095*** | -0.318*** | -0.544*** | -0.041** | -1.300*** | -2.862*** | 1.793*** | -0.050** |
(0.484) | (0.021) | (0.047) | (0.065) | (0.019) | (0.333) | (0.860) | (0.484) | (0.020) | |
Notes: | ***Significant at the 1 percent level. | ||||||||
**Significant at the 5 percent level. | |||||||||
*Significant at the 10 percent level. |
# Get sample
figure2_data <- base_data %>% filter(Treated == 0)
Create a figure 2 summary statistics data frame, which computes the average by country for the following variables:
# Generate Average perceived and true probability of Q1 to Q5 transition
figure2_sum <- figure2_data %>% group_by(country) %>% summarize(
perceived_q1_to_q5 = mean(q1_to_q5),
true_q1_to_q5 = mean(true_q1_to_q5)
)
Recode the country variable so that:
# Recode Country
figure2_sum <- figure2_sum %>% mutate(country = recode(country,
"Sweden" = "SE",
"Italy" = "IT",
"France" = "FR"
))
Generate a scatterplot using text labels for country instead of points, with the following formatting:
annotate(geom="text", x=7, y=10, label = "Optimistic") +
annotate(geom="text", x=10, y=7, label = "Pessimistic")
# Generate text scatterplot
ggplot(figure2_sum,
aes(x=true_q1_to_q5, y = perceived_q1_to_q5,
label=country, col = country)) +
geom_text() + xlim(6,12) + ylim(6, 12) +
xlab("True Q1 to Q1 Probability") + ylab("Perceived Q1 to Q1 Probability") +
geom_abline(intercept = 0, slope=1, linetype="dotted") +
annotate(geom="text", x=7, y=10, label = "Optimistic") +
annotate(geom="text", x=10, y=7, label = "Pessimistic")
# Take out outliers
figure3_data <- base_data %>% filter(Treated == 0, q1_to_q1 != 100,
q1_to_q5 < 80)
Generate the following misperception variable:
# Generate misperception variables
figure3_data <- figure3_data %>% mutate(
misperception_q1 = -abs(q1_to_q1 - true_q1_to_q1),
misperception_q5 = -abs(q1_to_q5 - true_q1_to_q5)
)
Generate two different data sets:
figure3_US <- figure3_data %>% filter(country == "US")
figure3_Europe <- figure3_data %>% filter(country != "US")
Use ggplot to reproduce the plots of the CDF of the negative absolute error between perceived and actual transition probabilities by country.
ggplot(figure3_US) + stat_ecdf(aes(x = misperception_q1),
geom = "step", col="blue") +
ggtitle("Panel B. United States") +
stat_ecdf(aes(x = misperception_q5),color ="red", geom = "step") +
xlim(-80,0) + theme_hc()
ggplot(figure3_Europe) + stat_ecdf(aes(x = misperception_q1),
geom = "step", col="blue") +
ggtitle("Panel B. Europe") +
stat_ecdf(aes(x = misperception_q5),color ="red", geom = "step") +
xlim(-80,0) + theme_hc()