diff --git a/DESCRIPTION b/DESCRIPTION
index c2c339a..3501ecd 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -26,7 +26,7 @@ License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
Depends:
R (>= 2.10),
fixest (>= 0.10.1)
diff --git a/NAMESPACE b/NAMESPACE
index 7c9e761..8f2bdbf 100755
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,6 +6,7 @@ export(gen_data)
export(get_honestdid_obj_did2s)
export(honest_did_did2s)
export(plot_event_study)
+export(sparse_model_matrix)
import(data.table)
import(fixest)
importFrom(Matrix,sparseMatrix)
diff --git a/R/utils.R b/R/utils.R
index 61944b8..631fe20 100755
--- a/R/utils.R
+++ b/R/utils.R
@@ -1,113 +1,3 @@
-# Copied directly from https://github.com/lrberge/fixest/pull/418/commits/869aeee1ac030fe787a217fbf9f358b7f0eef8f4
-#' Create, or interact variables with, factors
-#'
-#' Treat a variable as a factor, or interacts a variable with a factor. Values to be dropped/kept from the factor can be easily set. Note that to interact fixed-effects, this function should not be used: instead use directly the syntax `fe1^fe2`.
-#'
-#'
-#' @inheritParams bin
-#'
-#' @param factor_var A vector (of any type) that will be treated as a factor. You can set references (i.e. exclude values for which to create dummies) with the `ref` argument.
-#' @param var A variable of the same length as `factor_var`. This variable will be interacted with the factor in `factor_var`. It can be numeric or factor-like. To force a numeric variable to be treated as a factor, you can add the `i.` prefix to a variable name. For instance take a numeric variable `x_num`: `i(x_fact, x_num)` will treat `x_num` as numeric while `i(x_fact, i.x_num)` will treat `x_num` as a factor (it's a shortcut to `as.factor(x_num)`).
-#' @param ref A vector of values to be taken as references from `factor_var`. Can also be a logical: if `TRUE`, then the first value of `factor_var` will be removed. If `ref` is a character vector, partial matching is applied to values; use "@" as the first character to enable regular expression matching. See examples.
-#' @param keep A vector of values to be kept from `factor_var` (all others are dropped). By default they should be values from `factor_var` and if `keep` is a character vector partial matching is applied. Use "@" as the first character to enable regular expression matching instead.
-#' @param ref2 A vector of values to be dropped from `var`. By default they should be values from `var` and if `ref2` is a character vector partial matching is applied. Use "@" as the first character to enable regular expression matching instead.
-#' @param keep2 A vector of values to be kept from `var` (all others are dropped). By default they should be values from `var` and if `keep2` is a character vector partial matching is applied. Use "@" as the first character to enable regular expression matching instead.
-#' @param bin2 A list or vector defining the binning of the second variable. See help for the argument `bin` for details (or look at the help of the function [`bin`]). You can use `.()` for `list()`.
-#' @param ... Not currently used.
-#'
-#' @details
-#' To interact fixed-effects, this function should not be used: instead use directly the syntax `fe1^fe2` in the fixed-effects part of the formula. Please see the details and examples in the help page of [`feols`].
-#'
-#' @return
-#' It returns a matrix with number of rows the length of `factor_var`. If there is no interacted variable or it is interacted with a numeric variable, the number of columns is equal to the number of cases contained in `factor_var` minus the reference(s). If the interacted variable is a factor, the number of columns is the number of combined cases between `factor_var` and `var`.
-#'
-#' @author
-#' Laurent Berge
-#'
-#' @seealso
-#' [`iplot`][fixest::coefplot] to plot interactions or factors created with `i()`, [`feols`] for OLS estimation with multiple fixed-effects.
-#'
-#' See the function [`bin`] for binning variables.
-#'
-#' @examples
-#'
-#' #
-#' # Simple illustration
-#' #
-#'
-#' x = rep(letters[1:4], 3)[1:10]
-#' y = rep(1:4, c(1, 2, 3, 4))
-#'
-#' # interaction
-#' data.frame(x, y, i(x, y, ref = TRUE))
-#'
-#' # without interaction
-#' data.frame(x, i(x, "b"))
-#'
-#' # you can interact factors too
-#' z = rep(c("e", "f", "g"), c(5, 3, 2))
-#' data.frame(x, z, i(x, z))
-#'
-#' # to force a numeric variable to be treated as a factor: use i.
-#' data.frame(x, y, i(x, i.y))
-#'
-#' # Binning
-#' data.frame(x, i(x, bin = list(ab = c("a", "b"))))
-#'
-#' # Same as before but using .() for list() and a regular expression
-#' # note that to trigger a regex, you need to use an @ first
-#' data.frame(x, i(x, bin = .(ab = "@a|b")))
-#'
-#' #
-#' # In fixest estimations
-#' #
-#'
-#' data(base_did)
-#' # We interact the variable 'period' with the variable 'treat'
-#' est_did = feols(y ~ x1 + i(period, treat, 5) | id + period, base_did)
-#'
-#' # => plot only interactions with iplot
-#' iplot(est_did)
-#'
-#' # Using i() for factors
-#' est_bis = feols(y ~ x1 + i(period, keep = 3:6) + i(period, treat, 5) | id, base_did)
-#'
-#' # we plot the second set of variables created with i()
-#' # => we need to use keep (otherwise only the first one is represented)
-#' coefplot(est_bis, keep = "trea")
-#'
-#' # => special treatment in etable
-#' etable(est_bis, dict = c("6" = "six"))
-#'
-#' #
-#' # Interact two factors
-#' #
-#'
-#' # We use the i. prefix to consider week as a factor
-#' data(airquality)
-#' aq = airquality
-#' aq$week = aq$Day %/% 7 + 1
-#'
-#' # Interacting Month and week:
-#' res_2F = feols(Ozone ~ Solar.R + i(Month, i.week), aq)
-#'
-#' # Same but dropping the 5th Month and 1st week
-#' res_2F_bis = feols(Ozone ~ Solar.R + i(Month, i.week, ref = 5, ref2 = 1), aq)
-#'
-#' etable(res_2F, res_2F_bis)
-#'
-#' #
-#' # Binning
-#' #
-#'
-#' data(airquality)
-#'
-#' feols(Ozone ~ i(Month, bin = "bin::2"), airquality)
-#'
-#' feols(Ozone ~ i(Month, bin = list(summer = 7:9)), airquality)
-#'
-#'
-#'
i = function(factor_var, var, ref, keep, bin, ref2, keep2, bin2, ...){
# Used to create interactions
@@ -140,7 +30,7 @@ i = function(factor_var, var, ref, keep, bin, ref2, keep2, bin2, ...){
var_name = fixest:::deparse_long(mc$f2)
var = dots$f2
- check_value(var, "vector", .arg_name = "f2")
+ dreamerr::check_value(var, "vector", .arg_name = "f2")
}
# General information on the factor variable
@@ -160,10 +50,10 @@ i = function(factor_var, var, ref, keep, bin, ref2, keep2, bin2, ...){
if(grepl("^i\\.", var_name)){
var_name = gsub("^i\\.", "", var_name)
var = str2lang(var_name)
- check_value_plus(var, "evalset vector", .data = parent.frame(), .arg_name = "var")
+ dreamerr::check_value_plus(var, "evalset vector", .data = parent.frame(), .arg_name = "var")
IS_INTER_FACTOR = TRUE
} else {
- check_value(var, "vector", .arg_name = "var")
+ dreamerr::check_value(var, "vector", .arg_name = "var")
}
} else {
# f2 was used
@@ -220,18 +110,18 @@ i = function(factor_var, var, ref, keep, bin, ref2, keep2, bin2, ...){
}
if(!missing(bin)){
- bin = error_sender(eval_dot(bin), arg_name = "bin")
+ bin = fixest:::error_sender(fixest:::eval_dot(bin), arg_name = "bin")
if(!is.null(bin)){
- f = bin_factor(bin, f, f_name)
+ f = fixest:::bin_factor(bin, f, f_name)
}
}
if(IS_INTER_FACTOR && !fixest:::missnull(bin2)){
- bin2 = error_sender(eval_dot(bin2), arg_name = "bin2")
+ bin2 = fixest:::error_sender(fixest:::eval_dot(bin2), arg_name = "bin2")
if(!is.null(bin2)){
- var = bin_factor(bin2, var, f_name)
+ var = fixest:::bin_factor(bin2, var, f_name)
}
}
@@ -373,7 +263,7 @@ i = function(factor_var, var, ref, keep, bin, ref2, keep2, bin2, ...){
return(res)
}
- res = cpp_factor_matrix(fe_num, is_na_all, who_is_dropped, var, col_names)
+ res = fixest:::cpp_factor_matrix(fe_num, is_na_all, who_is_dropped, var, col_names)
# res => matrix with...
# - NAs where appropriate
# - appropriate number of columns
@@ -452,8 +342,7 @@ i_noref = function(factor_var, var, ref, bin, keep, ref2, keep2, bin2){
#'
#' This function creates the left-hand-side or the right-hand-side(s) of a [`femlm`], [`feols`] or [`feglm`] estimation. Note that this function currently does not accept a formula
#'
-#' @inheritParams nobs.fixest
-#'
+#' @param object A `fixest` estimation object
#' @param data If missing (default) then the original data is obtained by evaluating the `call`. Otherwise, it should be a `data.frame`.
#' @param type Character vector or one sided formula, default is "rhs". Contains the type of matrix/data.frame to be returned. Possible values are: "lhs", "rhs", "fixef", "iv.rhs1" (1st stage RHS), "iv.rhs2" (2nd stage RHS), "iv.endo" (endogenous vars.), "iv.exo" (exogenous vars), "iv.inst" (instruments).
#' @param na.rm Default is `TRUE`. Should observations with NAs be removed from the matrix?
@@ -507,7 +396,7 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE, collin
}
if (any(grepl("^iv", type)) && !isTRUE(object$iv)) {
- stop("The type", enumerate_items(grep("^iv", type, value = TRUE), "s.is"), " only valid for IV estimations.")
+ stop("The type", dreamerr::enumerate_items(grep("^iv", type, value = TRUE), "s.is"), " only valid for IV estimations.")
}
dreamerr::check_arg(subset, "logical scalar | character vector no na")
@@ -540,7 +429,7 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE, collin
isFormula = FALSE
split_fml = NULL
if ("formula" %in% class(object)) {
- split_fml = fml_split_internal(object)
+ split_fml = fixest:::fml_split_internal(object)
if (collin.rm == TRUE | length(split_fml) == 3) {
message("Formula passed to sparse_model_matrix with collin.rm == TRUE or iv. Estimating feols with formula.")
@@ -570,7 +459,7 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE, collin
fake_intercept = !is.null(split_fml[[2]]) | fml_0
} else {
- fml_linear = formula(object, type = "linear")
+ fml_linear = stats::formula(object, type = "linear")
# we kick out the intercept if there is presence of fixed-effects
fml_0 = attr(stats::terms(fml_linear), "intercept") == 0
@@ -596,10 +485,10 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE, collin
if (isFormula && (length(split_fml) == 3)) {
fml_iv = split_fml[[3]]
- fml = .xpd(..lhs ~ ..endo + ..rhs, ..lhs = fml[[2]], ..endo = fml_iv[[2]], ..rhs = fml[[3]])
+ fml = fixest:::.xpd(..lhs ~ ..endo + ..rhs, ..lhs = fml[[2]], ..endo = fml_iv[[2]], ..rhs = fml[[3]])
} else if (isTRUE(object$iv)) {
fml_iv = object$fml_all$iv
- fml = .xpd(..lhs ~ ..endo + ..rhs, ..lhs = fml[[2]], ..endo = fml_iv[[2]], ..rhs = fml[[3]])
+ fml = fixest:::.xpd(..lhs ~ ..endo + ..rhs, ..lhs = fml[[2]], ..endo = fml_iv[[2]], ..rhs = fml[[3]])
}
vars = attr(stats::terms(fml), "term.labels")
@@ -618,7 +507,7 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE, collin
} else {
if (isFormula) {
- fixef_fml = .xpd(~ ..fe, ..fe = split_fml[[2]])
+ fixef_fml = fixest:::.xpd(~ ..fe, ..fe = split_fml[[2]])
} else {
fixef_fml = object$fml_all$fixef
}
@@ -790,7 +679,7 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE, collin
fml = object$fml
if (object$iv_stage == 2) {
fml_iv = object$fml_all$iv
- fml = .xpd(..lhs ~ ..inst + ..rhs, ..lhs = fml[[2]], ..inst = fml_iv[[3]], ..rhs = fml[[3]])
+ fml = fixest:::.xpd(..lhs ~ ..inst + ..rhs, ..lhs = fml[[2]], ..inst = fml_iv[[3]], ..rhs = fml[[3]])
}
vars = attr(stats::terms(fml), "term.labels")
iv_rhs1 = vars_to_sparse_mat(vars = vars, data = data, object = object, collin.rm = collin.rm, type = "iv.rhs1", add_intercept = !fake_intercept)
@@ -810,12 +699,12 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE, collin
fit_vars = c()
for (i in seq_along(stage_1)) {
fit_vars[i] = v = paste0("fit_", names(stage_1)[i])
- data[[v]] = predict(stage_1[[i]], newdata = data, sample = "original")
+ data[[v]] = stats::predict(stage_1[[i]], newdata = data, sample = "original")
}
# II) we create the variables
fml = object$fml
- fml = .xpd(..lhs ~ ..fit + ..rhs, ..lhs = fml[[2]], ..fit = fit_vars, ..rhs = fml[[3]])
+ fml = fixest:::.xpd(..lhs ~ ..fit + ..rhs, ..lhs = fml[[2]], ..fit = fit_vars, ..rhs = fml[[3]])
vars = attr(stats::terms(fml), "term.labels")
iv_rhs2 = vars_to_sparse_mat(vars = vars, data = data, object = object, collin.rm = collin.rm, type = "iv.rhs2", add_intercept = !fake_intercept)
@@ -1055,7 +944,7 @@ vars_to_sparse_mat = function(vars, data, collin.rm = FALSE, object = NULL, type
}
# TODO: Should I use error_sender?
- # mat = error_sender(fixest_model_matrix_extra(
+ # mat = fixest:::error_sender(fixest_model_matrix_extra(
# object = object, newdata = data, original_data = original_data,
# fml = fml, fake_intercept = fake_intercept,
# subset = subset),
@@ -1079,7 +968,7 @@ vars_to_sparse_mat = function(vars, data, collin.rm = FALSE, object = NULL, type
endo = fml_iv[[2]]
# Trick to get the rhs variables as a character vector
- endo = .xpd(~ ..endo, ..endo = endo)
+ endo = fixest:::.xpd(~ ..endo, ..endo = endo)
endo = attr(stats::terms(endo), "term.labels")
exo = lapply(object$iv_first_stage, function(x) names(stats::coef(x)))
diff --git a/README.Rmd b/README.Rmd
index a18c7c7..58f26d8 100755
--- a/README.Rmd
+++ b/README.Rmd
@@ -10,7 +10,8 @@ knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.path = "man/figures/README-",
- out.width = "100%"
+ out.width = "100%",
+ dpi = 300
)
```
@@ -77,7 +78,7 @@ df_het = as.data.frame(df_het)
Here is a plot of the average outcome variable for each of the groups:
-```{r plot-df-het, fig.width=8, fig.height=4, fig.cap="Example data with heterogeneous treatment effects"}
+```{r plot-df-het, fig.width=8, fig.height=4, dpi=300, fig.cap="Example data with heterogeneous treatment effects"}
# Mean for treatment group-year
agg <- aggregate(df_het$dep_var, by = list(g = df_het$g, year = df_het$year), FUN = mean)
@@ -136,7 +137,7 @@ es <- did2s(df_het,
And plot the results:
-```{r plot-es, fig.cap="Event-study plot with example data"}
+```{r plot-es, fig.cap="Event-study plot with example data", fig.width=8, fig.height=5, dpi=300}
fixest::iplot(es, main = "Event study: Staggered treatment", xlab = "Relative time to treatment", col = "steelblue", ref.line = -0.5)
# Add the (mean) true effects
@@ -153,7 +154,7 @@ legend(
### Comparison to TWFE
-```{r plot-compare, ig.cap="TWFE and Two-Stage estimates of Event-Study"}
+```{r plot-compare, fig.cap="TWFE and Two-Stage estimates of Event-Study", fig.width=8, fig.height=5, dpi=300}
twfe <- feols(dep_var ~ i(rel_year, ref = c(-1, Inf)) | unit + year, data = df_het)
fixest::iplot(list(es, twfe),
@@ -178,7 +179,7 @@ In version 1.1.0, we added support for computing a sensitivity analysis using th
Here's an example using data from [here](https://github.com/Mixtape-Sessions/Advanced-DID/tree/main/Exercises/Exercise-1). The provided dataset `ehec_data.dta` contains a state-level panel dataset on health insurance coverage and Medicaid expansion. The variable `dins` shows the share of low-income childless adults with health insurance in the state. The variable `yexp2` gives the year that a state expanded Medicaid coverage under the Affordable Care Act, and is missing if the state never expanded.
-```{r ehec-data-est, fig.cap="Estimates of the effect of Medicaid expansion on health insurance coverage"}
+```{r ehec-data-est, fig.cap="Estimates of the effect of Medicaid expansion on health insurance coverage", fig.width=8, fig.height=5, dpi=300}
library(HonestDiD)
library(ggplot2)
@@ -200,7 +201,7 @@ es_did2s <- did2s(
coefplot(es_did2s)
```
-```{r sensitivity, fig.cap="Sensitivity analysis for the example data"}
+```{r sensitivity, fig.cap="Sensitivity analysis for the example data", fig.width=8, fig.height=5, dpi=300}
# Relative Magnitude sensitivity analysis
sensitivity_results <- es_did2s |>
# Take fixest obj and convert for `honest_did_did2s`
@@ -222,7 +223,7 @@ HonestDiD::createSensitivityPlot_relativeMagnitudes(
## Event-study plot
-```{r}
+```{r, fig.cap="Multiple event-study estimators", fig.width=12, fig.height=8.5, dpi=300}
library(tidyverse)
data(df_het)
df = df_het
diff --git a/README.md b/README.md
index 7457d25..6254526 100755
--- a/README.md
+++ b/README.md
@@ -229,7 +229,14 @@ legend(
)
```
-
+
+TWFE and Two-Stage estimates of Event-Study +
+ ++Multiple event-study estimators +
+ +