Assessing market-clearing

This short tutorial gives an example of how one can statistically assess whether a market is in an equilibrium state. The tutorial assumes some familiarity with the concepts and the functionality of the package. The basic_usage vignette can be helpful in acquiring this familiarity.

Setup the environment

Load the required libraries.

library(diseq)
library(magrittr)

Prepare the data. Here, we simply simulate data using a data generating process for a market in equilibrium.

nobs <- 5000
tobs <- 5

alpha_d <- -1.9
beta_d0 <- 4.9
beta_d <- c(2.1, -0.7)
eta_d <- c(3.5, 6.25)

alpha_s <- 2.8
beta_s0 <- 1.2
beta_s <- c(0.65)
eta_s <- c(1.15, 4.2)

sigma_d <- 1
sigma_s <- 1
rho_ds <- 0.5

seed <- 443

eq_data <- simulate_model_data(
  "eq_fiml", nobs, tobs,
  alpha_d, beta_d0, beta_d, eta_d,
  alpha_s, beta_s0, beta_s, eta_s,
  NA, NA, c(NA),
  sigma_d = sigma_d, sigma_s = sigma_s, rho_ds = rho_ds,
  seed = seed
)

Initialize the model

Prepare the basic parameters for model initialization.

key_columns <- c("id", "date")
time_column <- c("date")
quantity_column <- "Q"
price_column <- "P"
demand_specification <- paste0(price_column, " + Xd1 + Xd2 + X1 + X2")
supply_specification <- "Xs1 + X1 + X2"
price_specification <- "Xp1"
verbose <- 2
use_correlated_shocks <- TRUE

Using the above parameterization, construct the model objects. Here we construct two equilibrium models and four disequilibrium models. All the models are constructed using the simulated data from a model of market in equilibrium.

eq2sls <- new(
  "eq_2sls",
  key_columns,
  quantity_column, price_column,
  demand_specification, paste0(price_column, " + ", supply_specification),
  eq_data[eq_data$date != 1, ],
  verbose = verbose
)
#> Info: This is 'Equilibrium 2SLS with independent shocks' model
#> Warning: Problem with `mutate()` input `date`.
#> ℹ Removing unobserved '1' level(s).
#> 
#> ℹ Input `date` is `(function (x) ...`.
#> Warning: Removing unobserved '1' level(s).
eqfiml <- new(
  "eq_fiml",
  key_columns,
  quantity_column, price_column,
  demand_specification, paste0(price_column, " + ", supply_specification),
  eq_data[eq_data$date != 1, ],
  use_correlated_shocks = use_correlated_shocks, verbose = verbose
)
#> Info: This is 'Equilibrium FIML with correlated shocks' model
#> Warning: Problem with `mutate()` input `date`.
#> ℹ Removing unobserved '1' level(s).
#> 
#> ℹ Input `date` is `(function (x) ...`.

#> Warning: Removing unobserved '1' level(s).
bsmdl <- new(
  "diseq_basic",
  key_columns,
  quantity_column, price_column,
  demand_specification, paste0(price_column, " + ", supply_specification),
  eq_data[eq_data$date != 1, ],
  use_correlated_shocks = use_correlated_shocks, verbose = verbose
)
#> Info: This is 'Basic with correlated shocks' model
#> Warning: Problem with `mutate()` input `date`.
#> ℹ Removing unobserved '1' level(s).
#> 
#> ℹ Input `date` is `(function (x) ...`.

#> Warning: Removing unobserved '1' level(s).
damdl <- new(
  "diseq_deterministic_adjustment",
  key_columns, time_column,
  quantity_column, price_column,
  demand_specification, paste0(price_column, " + ", supply_specification),
  eq_data,
  use_correlated_shocks = use_correlated_shocks, verbose = verbose
)
#> Info: This is 'Deterministic Adjustment with correlated shocks' model
#> Info: Dropping 5000 rows by generating 'LAGGED_P'.
#> Info: Sample separated with 10003 rows in excess supply and 9997 in excess demand regime.

Estimation

Set the estimation parameters.

optimization_method <- "BFGS"
optimization_controls <- list(maxit = 10000, reltol = 1e-8)

Estimate the models.

eq2sls <- estimate(eq2sls)
eqfiml_est <- estimate(eqfiml,
  control = optimization_controls,
  method = optimization_method
)
bsmdl_est <- estimate(bsmdl,
  control = optimization_controls,
  method = optimization_method
)
damdl_est <- estimate(damdl,
  control = optimization_controls,
  method = optimization_method
)

Post estimation analysis

Summaries

All the models provide estimates for the simulated data. Even with simulated data, it is difficult to assess which model performs better by examining only the summaries in separation or collectively.

summary(eq2sls@first_stage_model)
#> 
#> Call:
#> lm(formula = first_stage_formula, data = [email protected]_tibble)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -0.85453 -0.14407  0.00004  0.14368  0.78798 
#> 
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  0.761849   0.017106   44.54   <2e-16 ***
#> Xd1          0.448077   0.003055  146.68   <2e-16 ***
#> Xd2         -0.144864   0.003056  -47.41   <2e-16 ***
#> X1           0.505181   0.003026  166.92   <2e-16 ***
#> X2           0.437596   0.003072  142.44   <2e-16 ***
#> Xs1         -0.139496   0.003057  -45.63   <2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.215 on 19994 degrees of freedom
#> Multiple R-squared:  0.7873, Adjusted R-squared:  0.7873 
#> F-statistic: 1.48e+04 on 5 and 19994 DF,  p-value: < 2.2e-16
summary(eq2sls@system_model)
#> 
#> systemfit results 
#> method: 2SLS 
#> 
#>            N    DF     SSR detRCov   OLS-R2 McElroy-R2
#> system 40000 39989 30544.3 8.5e-05 0.923359   0.846725
#> 
#>            N    DF     SSR      MSE     RMSE       R2   Adj R2
#> demand 20000 19994 15271.1 0.763782 0.873946 0.923364 0.923345
#> supply 20000 19995 15273.3 0.763855 0.873988 0.923353 0.923338
#> 
#> The covariance matrix of the residuals
#>          demand   supply
#> demand 0.763782 0.763763
#> supply 0.763763 0.763855
#> 
#> The correlations of the residuals
#>          demand   supply
#> demand 1.000000 0.999927
#> supply 0.999927 1.000000
#> 
#> 
#> 2SLS estimates for 'demand' (equation 1)
#> Model Formula: Q ~ P_FITTED + Xd1 + Xd2 + X1 + X2
#> <environment: 0x55eea06d2cd0>
#> Instruments: ~Xd1 + Xd2 + X1 + X2 + Xs1
#> <environment: 0x55eea06d2cd0>
#> 
#>               Estimate Std. Error  t value   Pr(>|t|)    
#> (Intercept)  4.8762033  0.0722859  67.4571 < 2.22e-16 ***
#> P_FITTED    -1.8442136  0.0890651 -20.7064 < 2.22e-16 ***
#> Xd1          2.0524139  0.0418253  49.0712 < 2.22e-16 ***
#> Xd2         -0.6858288  0.0179589 -38.1887 < 2.22e-16 ***
#> X1           3.5025023  0.0466818  75.0293 < 2.22e-16 ***
#> X2           6.2117062  0.0409079 151.8463 < 2.22e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.873946 on 19994 degrees of freedom
#> Number of observations: 20000 Degrees of Freedom: 19994 
#> SSR: 15271.060811 MSE: 0.763782 Root MSE: 0.873946 
#> Multiple R-Squared: 0.923364 Adjusted R-Squared: 0.923345 
#> 
#> 
#> 2SLS estimates for 'supply' (equation 2)
#> Model Formula: Q ~ P_FITTED + Xs1 + X1 + X2
#> <environment: 0x55eea06d2cd0>
#> Instruments: ~Xd1 + Xd2 + X1 + X2 + Xs1
#> <environment: 0x55eea06d2cd0>
#> 
#>              Estimate Std. Error  t value   Pr(>|t|)    
#> (Intercept) 1.3095281  0.0673668  19.4388 < 2.22e-16 ***
#> P_FITTED    2.7509924  0.0263305 104.4792 < 2.22e-16 ***
#> Xs1         0.6409146  0.0129713  49.4101 < 2.22e-16 ***
#> X1          1.1808148  0.0181389  65.0984 < 2.22e-16 ***
#> X2          4.2008260  0.0170625 246.2027 < 2.22e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.873988 on 19995 degrees of freedom
#> Number of observations: 20000 Degrees of Freedom: 19995 
#> SSR: 15273.281424 MSE: 0.763855 Root MSE: 0.873988 
#> Multiple R-Squared: 0.923353 Adjusted R-Squared: 0.923338
bbmle::summary(eqfiml_est)
#> Maximum likelihood estimation
#> 
#> Call:
#> `bbmle::mle2`(list(control = list(maxit = 10000, reltol = 1e-08), 
#>     method = "BFGS", start = c(D_P = 0.248904000229706, D_CONST = 4.01094480918336, 
#>     D_Xd1 = 1.1137756892884, D_Xd2 = -0.380925520464488, D_X1 = 2.44419761908979, 
#>     D_X2 = 5.29620369779804, S_P = 1.70876661459514, S_CONST = 2.885697667298, 
#>     S_Xs1 = 0.493421828653425, S_X1 = 1.70854379946901, S_X2 = 4.66113245104969, 
#>     D_VARIANCE = 1, S_VARIANCE = 1, RHO = 0), minuslogl = function (...) 
#>     minus_log_likelihood(object, ...), gr = function (...) 
#>     gradient(object, ...)))
#> 
#> Coefficients:
#>             Estimate Std. Error z value     Pr(z)    
#> D_P        -1.847620   0.102162 -18.085 < 2.2e-16 ***
#> D_CONST     4.845601   0.079952  60.606 < 2.2e-16 ***
#> D_Xd1       2.057105   0.047842  42.998 < 2.2e-16 ***
#> D_Xd2      -0.676054   0.019563 -34.558 < 2.2e-16 ***
#> D_X1        3.503609   0.053540  65.439 < 2.2e-16 ***
#> D_X2        6.213180   0.046915 132.435 < 2.2e-16 ***
#> S_P         2.750472   0.030038  91.567 < 2.2e-16 ***
#> S_CONST     1.311502   0.076848  17.066 < 2.2e-16 ***
#> S_Xs1       0.640863   0.014796  43.313 < 2.2e-16 ***
#> S_X1        1.180568   0.020695  57.045 < 2.2e-16 ***
#> S_X2        4.201078   0.019466 215.811 < 2.2e-16 ***
#> D_VARIANCE  1.001167   0.024052  41.625 < 2.2e-16 ***
#> S_VARIANCE  0.994439   0.011789  84.353 < 2.2e-16 ***
#> RHO         0.510389   0.017863  28.572 < 2.2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> -2 log L: 46374.23
bbmle::summary(bsmdl_est)
#> Maximum likelihood estimation
#> 
#> Call:
#> `bbmle::mle2`(list(control = list(maxit = 10000, reltol = 1e-08), 
#>     method = "BFGS", skip.hessian = FALSE, start = c(D_P = 0.248904000229706, 
#>     D_CONST = 4.01094480918336, D_Xd1 = 1.1137756892884, D_Xd2 = -0.380925520464488, 
#>     D_X1 = 2.44419761908979, D_X2 = 5.29620369779804, S_P = 1.70876661459514, 
#>     S_CONST = 2.885697667298, S_Xs1 = 0.493421828653425, S_X1 = 1.70854379946901, 
#>     S_X2 = 4.66113245104969, D_VARIANCE = 1, S_VARIANCE = 1, 
#>     RHO = 0), minuslogl = function (...) 
#>     minus_log_likelihood(object, ...), gr = function (...) 
#>     gradient(object, ...)))
#> 
#> Coefficients:
#>             Estimate Std. Error  z value     Pr(z)    
#> D_P         0.049812   0.053832   0.9253 0.3548000    
#> D_CONST     4.014641   0.115685  34.7033 < 2.2e-16 ***
#> D_Xd1       1.322024   0.036993  35.7369 < 2.2e-16 ***
#> D_Xd2      -0.455326   0.019782 -23.0177 < 2.2e-16 ***
#> D_X1        2.577838   0.037536  68.6762 < 2.2e-16 ***
#> D_X2        5.390141   0.034642 155.5962 < 2.2e-16 ***
#> S_P         1.761937   0.105878  16.6412 < 2.2e-16 ***
#> S_CONST     2.836331   0.359832   7.8824 3.212e-15 ***
#> S_Xs1       1.119391   0.086554  12.9329 < 2.2e-16 ***
#> S_X1        1.575385   0.091235  17.2674 < 2.2e-16 ***
#> S_X2        4.613717   0.082291  56.0662 < 2.2e-16 ***
#> D_VARIANCE  0.822471   0.017428  47.1932 < 2.2e-16 ***
#> S_VARIANCE  1.372021   0.090718  15.1240 < 2.2e-16 ***
#> RHO         0.204999   0.056839   3.6066 0.0003102 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> -2 log L: 51245.47
bbmle::summary(damdl_est)
#> Maximum likelihood estimation
#> 
#> Call:
#> `bbmle::mle2`(list(control = list(maxit = 10000, reltol = 1e-08), 
#>     method = "BFGS", skip.hessian = FALSE, start = c(D_P = 0.248904000229706, 
#>     D_CONST = 4.01094480918336, D_Xd1 = 1.1137756892884, D_Xd2 = -0.380925520464488, 
#>     D_X1 = 2.44419761908979, D_X2 = 5.29620369779804, S_P = 1.70876661459514, 
#>     S_CONST = 2.885697667298, S_Xs1 = 0.493421828653425, S_X1 = 1.70854379946901, 
#>     S_X2 = 4.66113245104969, P_DIFF = 1, D_VARIANCE = 1, S_VARIANCE = 1, 
#>     RHO = 0), minuslogl = function (...) 
#>     minus_log_likelihood(object, ...), gr = function (...) 
#>     gradient(object, ...)))
#> 
#> Coefficients:
#>             Estimate Std. Error  z value  Pr(z)    
#> D_P        -1.849202   0.102147 -18.1034 <2e-16 ***
#> D_CONST     4.860420   0.082592  58.8486 <2e-16 ***
#> D_Xd1       2.054982   0.047717  43.0664 <2e-16 ***
#> D_Xd2      -0.675406   0.019519 -34.6019 <2e-16 ***
#> D_X1        3.501842   0.053401  65.5767 <2e-16 ***
#> D_X2        6.211275   0.046794 132.7369 <2e-16 ***
#> S_P         2.757014   0.030859  89.3424 <2e-16 ***
#> S_CONST     1.284230   0.081551  15.7476 <2e-16 ***
#> S_Xs1       0.640862   0.014799  43.3037 <2e-16 ***
#> S_X1        1.181013   0.020696  57.0636 <2e-16 ***
#> S_X2        4.200912   0.019469 215.7692 <2e-16 ***
#> P_DIFF     -0.013436   0.013574  -0.9898 0.3223    
#> D_VARIANCE  0.999927   0.023931  41.7841 <2e-16 ***
#> S_VARIANCE  0.994434   0.011789  84.3560 <2e-16 ***
#> RHO         0.511222   0.017821  28.6869 <2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> -2 log L: 46373.32

Model selection

The deterministic adjustment model has price dynamics that are analogous to excess demand and estimates one extra parameter. The directional model estimates one parameter less as the model does not have enough equations to identify prices in both demand and supply equations. The estimated parameters are summarized as follows.

sim_coef <- c(
  alpha_d, beta_d0, beta_d, eta_d,
  alpha_s, beta_s0, beta_s, eta_s,
  NA,
  sigma_d, sigma_s,
  rho_ds
)
names(sim_coef) <- names(damdl_est@coef)

dm_inc <- eq2sls@system_model$coefficients[
  grep(
    "demand",
    names(eq2sls@system_model$coefficients)
  )
]
sp_inc <- eq2sls@system_model$coefficients[
  grep(
    "supply",
    names(eq2sls@system_model$coefficients)
  )
]
lm_coef <- c(
  dm_inc[2], dm_inc[-2], sp_inc[2], sp_inc[-2],
  NA,
  NA, NA,
  NA
)

eqfiml_coef <- append(
  eqfiml_est@coef, c(NA),
  after = which(names(eqfiml_est@coef) ==
    get_prefixed_variance_variable(eqfiml@system@demand)) - 1
)

bsmdl_coef <- append(
  bsmdl_est@coef, c(NA),
  after = which(names(bsmdl_est@coef) ==
    get_prefixed_variance_variable(bsmdl@system@demand)) - 1
)

damdl_coef <- damdl_est@coef

comp <- tibble::tibble(
  parameter = names(sim_coef),
  sim = sim_coef, lm = lm_coef, fi = eqfiml_coef,
  bm = bsmdl_coef, da = damdl_coef,
  lmerr = abs(lm_coef - sim_coef), fierr = abs(eqfiml_coef - sim_coef),
  bmerr = abs(bsmdl_coef - sim_coef), daerr = abs(damdl_coef - sim_coef)
)
comp
#> # A tibble: 15 x 10
#>    parameter   sim     lm     fi      bm      da    lmerr    fierr  bmerr
#>    <chr>     <dbl>  <dbl>  <dbl>   <dbl>   <dbl>    <dbl>    <dbl>  <dbl>
#>  1 D_P       -1.9  -1.84  -1.85   0.0498 -1.85    5.58e-2  0.0524   1.95 
#>  2 D_CONST    4.9   4.88   4.85   4.01    4.86    2.38e-2  0.0544   0.885
#>  3 D_Xd1      2.1   2.05   2.06   1.32    2.05    4.76e-2  0.0429   0.778
#>  4 D_Xd2     -0.7  -0.686 -0.676 -0.455  -0.675   1.42e-2  0.0239   0.245
#>  5 D_X1       3.5   3.50   3.50   2.58    3.50    2.50e-3  0.00361  0.922
#>  6 D_X2       6.25  6.21   6.21   5.39    6.21    3.83e-2  0.0368   0.860
#>  7 S_P        2.8   2.75   2.75   1.76    2.76    4.90e-2  0.0495   1.04 
#>  8 S_CONST    1.2   1.31   1.31   2.84    1.28    1.10e-1  0.112    1.64 
#>  9 S_Xs1      0.65  0.641  0.641  1.12    0.641   9.09e-3  0.00914  0.469
#> 10 S_X1       1.15  1.18   1.18   1.58    1.18    3.08e-2  0.0306   0.425
#> 11 S_X2       4.2   4.20   4.20   4.61    4.20    8.26e-4  0.00108  0.414
#> 12 P_DIFF    NA    NA     NA     NA      -0.0134 NA       NA       NA    
#> 13 D_VARIAN…  1    NA      1.00   0.822   1.00   NA        0.00117  0.178
#> 14 S_VARIAN…  1    NA      0.994  1.37    0.994  NA        0.00556  0.372
#> 15 RHO        0.5  NA      0.510  0.205   0.511  NA        0.0104   0.295
#> # … with 1 more variable: daerr <dbl>

Since we have used simulated data, we can calculate the average absolute error of the parameter estimation for each of the models. In practice, the population values are unknown and this calculation is impossible.

comp_means <- colMeans(comp[, grep("err", colnames(comp))], na.rm = TRUE)
comp_means
#>      lmerr      fierr      bmerr      daerr 
#> 0.03467257 0.03092702 0.74766291 0.02754986

Moreover, the average absolute error cannot provide an overall estimation assessment as the market models have different parameter spaces. To assess the overall model performance one can instead use an information criterion.

model_names <- c(
  eqfiml@model_type_string,
  bsmdl@model_type_string, damdl@model_type_string
)
model_obs <- c(
  get_number_of_observations(eqfiml),
  get_number_of_observations(bsmdl),
  get_number_of_observations(damdl)
)
model_errors <- c(
  comp_means["fierr"],
  comp_means["bmerr"],
  comp_means["daerr"]
)
seltbl <- AIC(eqfiml_est, bsmdl_est, damdl_est) %>%
  tibble::add_column(Model = model_names, .before = 1) %>%
  tibble::add_column(Obs. = model_obs, `Mean Error` = model_errors) %>%
  dplyr::rename(D.F. = df) %>%
  dplyr::arrange(AIC)
seltbl
#>                               Model D.F.      AIC  Obs. Mean Error
#> eqfiml_est         Equilibrium FIML   14 46402.23 20000 0.03092702
#> damdl_est  Deterministic Adjustment   15 46403.32 20000 0.02754986
#> bsmdl_est                     Basic   14 51273.47 20000 0.74766291