diff --git a/.Rbuildignore b/.Rbuildignore index 252d594..001aa5d 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -5,8 +5,6 @@ ^README\.Rmd$ ^LICENSE\.md$ ^cran-comments\.md$ -^\.vscode$ -^\.VSCodeCounter$ ^ignore$ ^data-raw$ ^CRAN-RELEASE$ @@ -21,4 +19,3 @@ ^revdep$ ^doc$ ^Meta$ -^vignettes\.cls$ diff --git a/vignettes/article.Rnw b/vignettes/article.Rnw index e4eee38..6108d29 100644 --- a/vignettes/article.Rnw +++ b/vignettes/article.Rnw @@ -1,5 +1,3 @@ -%\VignetteEngine{knitr::knitr} -%\VignetteIndexEntry{melt: Multiple Empirical Likelihood Tests in R} \documentclass[nojss]{jss} \usepackage{amsmath,amssymb,bm,mathtools,booktabs,thumbpdf,lmodern} @@ -10,7 +8,7 @@ <>= pkgVersion <- packageVersion("melt") options( - width = 70, show.signif.stars = FALSE, str = strOptions(strict.width = "cut"), + width = 70, str = strOptions(strict.width = "cut"), ## prefer empty continuation for reader's cut'n'paste: continue = " ", # JSS: prompt = "R> ", continue = "+ ", useFancyQuotes = FALSE @@ -69,122 +67,100 @@ citations, please refer to \citet{kim2024melt}, as suggested by \end{quote} \section{Introduction}\label{sec:intro} -The likelihood is an essential component of statistical inference. -In a nonparametric or semiparametric setting, where the quantity of interest is +The likelihood is an essential component of statistical inference. In a +nonparametric or semiparametric setting, where the quantity of interest is finite-dimensional, the maximum likelihood approach is not applicable since the -underlying data-generating distribution is left unspecified. -A popular approach in this context is the method of moments or the two-step -generalized method of moments \citep[GMM,][]{hansen1982large} where only partial -information is specified by moment conditions. -Various one-step alternatives to GMM have been proposed over the last decades in -the statistics and econometrics literature \citep[see, -e.g.,][]{efron1981nonparametric, imbens1997one, newey2004higher}. +underlying data-generating distribution is left unspecified. A popular approach +in this context is the method of moments or the two-step generalized method of +moments \citep[GMM,][]{hansen1982large} where only partial information is +specified by moment conditions. Various one-step alternatives to GMM have been +proposed over the last decades in the statistics and econometrics literature +\citep[see, e.g.,][]{efron1981nonparametric, imbens1997one, newey2004higher}. One such alternative is empirical likelihood \citep[EL,][]{owen1988empirical, -owen1990empirical, qin1994empirical}. -EL defines a likelihood function by profiling a nonparametric likelihood subject -to the moment restrictions. -While it is nonparametric in nature, some desirable properties of parametric -likelihood apply to EL. -Most notably, the EL ratio functions have limiting chi-square distributions -under certain conditions. -Without explicit studentization, confidence regions for the parameters can be -constructed in much the same way as with a parametric likelihood. -As the name suggests, however, the empirical distribution of the data determines -the shape of the confidence region. -Also, coverage accuracy of the confidence region can further be improved in -principle, since EL is Bartlett-correctable \citep{diciccio1991empirical}. -In terms of estimation, the standard expansion argument +owen1990empirical, qin1994empirical}. EL defines a likelihood function by +profiling a nonparametric likelihood subject to the moment restrictions. While +it is nonparametric in nature, some desirable properties of parametric +likelihood apply to EL. Most notably, the EL ratio functions have limiting +chi-square distributions under certain conditions. Without explicit +studentization, confidence regions for the parameters can be constructed in much +the same way as with a parametric likelihood. As the name suggests, however, the +empirical distribution of the data determines the shape of the confidence +region. Also, coverage accuracy of the confidence region can further be improved +in principle, since EL is Bartlett-correctable \citep{diciccio1991empirical}. In +terms of estimation, the standard expansion argument \citep[e.g.,][]{yuan1998asymptotics, jacod2018review} establishes the consistency and asymptotic normality of the maximum empirical likelihood -estimator (MELE). -Moreover, \citet{newey2004higher} showed that the MELE generally has a smaller -bias than its competitors and achieves higher-order efficiency after bias -correction. -EL methods have been extended to other areas, including linear models -\citep{owen1991empirical}, generalized linear models +estimator (MELE). Moreover, \citet{newey2004higher} showed that the MELE +generally has a smaller bias than its competitors and achieves higher-order +efficiency after bias correction. EL methods have been extended to other areas, +including linear models \citep{owen1991empirical}, generalized linear models \citep{kolaczyk1994empirical, xi2003extended}, survival analysis \citep{li2005empirical}, time series models \citep{kitamura1997empirical, nordman2014review}, and high-dimensional data analysis \citep{chen2009effects, -hjort2009extending}. -For an overview of EL and its applications, see \citet{owen2001empirical} and -\citet{chen2009review}. +hjort2009extending}. For an overview of EL and its applications, see +\citet{owen2001empirical} and \citet{chen2009review}. In the \proglang{R} language \citep{R}, some software packages implementing EL and related methods are available from the Comprehensive \proglang{R} Archive -Network (CRAN). -The \pkg{emplik} package \citep{emplik} provides a wide range of functions for -analyzing censored and truncated data with EL. -Confidence intervals for a one-dimensional parameter can also be constructed. -Other examples and applications of the package can be found in -\citet{zhou2015empirical}. -The \pkg{emplik2} package \citep{emplik2} is an extension for the two sample -problems. -Both packages cover the methods for the mean with uncensored data, which is the -simplest case in terms of computation. +Network (CRAN). The \pkg{emplik} package \citep{emplik} provides a wide range of +functions for analyzing censored and truncated data with EL. Confidence +intervals for a one-dimensional parameter can also be constructed. Other +examples and applications of the package can be found in +\citet{zhou2015empirical}. The \pkg{emplik2} package \citep{emplik2} is an +extension for the two sample problems. Both packages cover the methods for the +mean with uncensored data, which is the simplest case in terms of computation. In addition, the \pkg{EL} package \citep{EL} performs EL tests for the difference between two sample means and the difference between two smoothed -Huber estimators. -The \pkg{elhmc} package \citep{elhmc} contains a single function \fct{ELHMC} -for Hamiltonian Monte Carlo sampling in Bayesian EL computation -\citep{chaudhuri2017hamiltonian}. -The \pkg{ELCIC} package \citep{ELCIC} develops an EL-based consistent -information criterion in a model selection framework. -In a broader context of GMM and generalized empirical likelihood -\citep{smith1997alternative}, a few packages can be used for estimation with EL. -The \pkg{gmm} package \citep{gmm} provides flexibility in specifying moment -conditions. -Other than GMM and EL, continuous updating \citep{hansen1996finite} and several -estimation methods that belong to the family of generalized empirical likelihood -are available. -The \pkg{gmm} package has been superseded by the \pkg{momentfit} package -\citep{momentfit}, which adds exponential tilting -\citep{kitamura1997information} estimation and methods for constructing -two-dimensional confidence regions. +Huber estimators. The \pkg{elhmc} package \citep{elhmc} contains a single +function \fct{ELHMC} for Hamiltonian Monte Carlo sampling in Bayesian EL +computation \citep{chaudhuri2017hamiltonian}. The \pkg{ELCIC} package +\citep{ELCIC} develops an EL-based consistent information criterion in a model +selection framework. In a broader context of GMM and generalized empirical +likelihood \citep{smith1997alternative}, a few packages can be used for +estimation with EL. The \pkg{gmm} package \citep{gmm} provides flexibility in +specifying moment conditions. Other than GMM and EL, continuous updating +\citep{hansen1996finite} and several estimation methods that belong to the +family of generalized empirical likelihood are available. The \pkg{gmm} package +has been superseded by the \pkg{momentfit} package \citep{momentfit}, which adds +exponential tilting \citep{kitamura1997information} estimation and methods for +constructing two-dimensional confidence regions. This paper presents the \proglang{R} package \pkg{melt} \citep{melt} that -performs multiple empirical likelihood tests for regression analysis. -The primary focus of the package is on linear and generalized linear models, -perhaps most commonly used with the \fct{lm} and \fct{glm} functions in -\proglang{R}. +performs multiple empirical likelihood tests for regression analysis. The +primary focus of the package is on linear and generalized linear models, perhaps +most commonly used with the \fct{lm} and \fct{glm} functions in \proglang{R}. The package considers only just-identified models where the number of moment -conditions equals the number of parameters. -Typical linear models specified by \code{formula} objects in \proglang{R} are -just-identified. -In this case, the MELE is identical to the maximum likelihood estimator, and the -estimate is easily obtained using \fct{lm.fit} or \fct{glm.fit} in the -\pkg{stats} package. +conditions equals the number of parameters. Typical linear models specified by +\code{formula} objects in \proglang{R} are just-identified. In this case, the +MELE is identical to the maximum likelihood estimator, and the estimate is +easily obtained using \fct{lm.fit} or \fct{glm.fit} in the \pkg{stats} package. The fitted model then serves as a basis for testing hypotheses, which is a core -component of the package. -EL-based tests do not involve standard errors and \fct{vcov} methods since they -are asymptotically pivotal and thus avoid explicit studentization. -For this reason it is challenging to incorporate EL methods directly into other -packages that perform inferences for parametric models using \fct{vcov}. -We aim to bridge the gap and provide an easy-to-use interface that enables -applying the methods to tasks routinely accomplished in \proglang{R}. +component of the package. EL-based tests do not involve standard errors and +\fct{vcov} methods since they are asymptotically pivotal and thus avoid explicit +studentization. For this reason it is challenging to incorporate EL methods +directly into other packages that perform inferences for parametric models using +\fct{vcov}. We aim to bridge the gap and provide an easy-to-use interface that +enables applying the methods to tasks routinely accomplished in \proglang{R}. Standard tests performed by \fct{summary.lm} and \fct{summary.glm} methods, such as significance tests of the coefficients and an overall \(F\)~test or a -chi-square test, are available. -Furthermore, in line with \fct{lht} in the \pkg{car} package \citep{car} or -\fct{glht} in the \pkg{multcomp} package \citep{multcomp}, the user can specify -linear hypotheses to be tested. -Multiple testing procedures are provided to control the family-wise error rate. +chi-square test, are available. Furthermore, in line with \fct{lht} in the +\pkg{car} package \citep{car} or \fct{glht} in the \pkg{multcomp} package +\citep{multcomp}, the user can specify linear hypotheses to be tested. Multiple +testing procedures are provided to control the family-wise error rate. Constructing confidence intervals and detecting outliers in a fitted model can -also be done, adding more options for data analysis. -Note that all the tests and methods rely on EL and its asymptotic properties. -Although conceptually advantageous over parametric methods, this can lead to -poor finite sample performance. -Therefore, several calibration techniques are implemented in \pkg{melt} to -mitigate this drawback of EL. - -The rest of the paper is organized as follows. -Section~\ref{sec:background} describes EL methods and computational aspects of -testing hypotheses with EL. +also be done, adding more options for data analysis. Note that all the tests and +methods rely on EL and its asymptotic properties. Although conceptually +advantageous over parametric methods, this can lead to poor finite sample +performance. Therefore, several calibration techniques are implemented in +\pkg{melt} to mitigate this drawback of EL. + +The rest of the paper is organized as follows. Section~\ref{sec:background} +describes EL methods and computational aspects of testing hypotheses with EL. Section~\ref{sec:overview} provides an overview of the \pkg{melt} package. Section~\ref{sec:usage} shows the basic usage of \pkg{melt} with implementation -details. -Section~\ref{sec:example} presents an application to pest control experiments. -We conclude with a summary and directions for future development in +details. Section~\ref{sec:example} presents an application to pest control +experiments. We conclude with a summary and directions for future development in Section~\ref{sec:conclusion}. %%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -508,10 +484,8 @@ likelihood estimator. When overdispersion is present with unknown \({\phi}\), we introduce another estimating function based on the squared residuals. Let \({\eta} = {(\theta, \phi)}\) and - -\vspace*{-0.3cm} - -\begin{equation*}%\label{eq:qglm} +% +\begin{equation*} g_2(Z_i, \eta) = \frac{\left(Y_i - H(X_i^\top \theta)\right)^2}{\phi^2 V\left(H(X_i^\top \theta)\right)} @@ -543,9 +517,7 @@ Confidence regions for \({\theta}\) can be constructed by applying a calibration method to \({l(\theta)}\). One advantage of using EL for linear models is that the confidence regions have data-driven shapes and orientations. - -\vspace*{-0.25cm} - +% \SetKwComment{Comment}{/* }{ */} \begin{algorithm}[t!] \caption{Constrained empirical likelihood optimization using the projected @@ -609,6 +581,7 @@ data-driven shapes and orientations. \KwRet{\({\theta}\) \textnormal{and} \({\lambda}\)}\; \end{algorithm} + \subsection{Hypothesis testing with empirical likelihood}\label{sec:2.3} As seen in Section~\ref{sec:2.2}, it is easy to compute the MELE and evaluate the EL ratio function at a given value for linear models. @@ -677,18 +650,16 @@ Equation~\ref{eq:gradient} with the orthogonal projector matrix \({P} = {I_p - L^\top(LL^\top)^{-1}L}\), where \({I_p}\) denotes the \({p} \times {p}\) identity matrix. Then it would take a relatively small number of iterations for convergence, -reducing the required number of inner layer updates of \({\lambda}\). -The pseudo code is shown in Algorithm~\ref{alg:pgd}. +reducing the required number of inner layer updates of \({\lambda}\). The pseudo +code is shown in Algorithm~\ref{alg:pgd}. Controlling the type 1 error rate is necessary when testing multiple hypotheses -simultaneously. -Recently there has been interest in multiplicity-adjusted test procedures for -Wald-type test statistics that asymptotically have a multivariate chi-square -distribution under the global null hypothesis \citep{dickhaus2015survey, -dickhaus2019simultaneous}. -\citet{kim2023empirical} proposed single-step multiple testing procedures for EL -that asymptotically control the family-wise error rate with Monte Carlo -simulations or bootstrap. +simultaneously. Recently there has been interest in multiplicity-adjusted test +procedures for Wald-type test statistics that asymptotically have a multivariate +chi-square distribution under the global null hypothesis +\citep{dickhaus2015survey, dickhaus2019simultaneous}. \citet{kim2023empirical} +proposed single-step multiple testing procedures for EL that asymptotically +control the family-wise error rate with Monte Carlo simulations or bootstrap. \citet{wang2018f} applied the \({F}\)-calibrated EL statistics to the Benjamini--Hochberg procedure \citep{benjamini1995controlling} to control the false discovery rate. @@ -702,18 +673,15 @@ Computational tasks are implemented in parallel using \pkg{OpenMP} \citep{dagum1998openmp} API in \proglang{C++} with the \pkg{Rcpp} \citep{Rcpp} and \pkg{RcppEigen} \citep{RcppEigen} packages to interface with \proglang{R}. Depending on the platform, the package can be compiled from source with support -for \pkg{OpenMP}. -The overall design of \pkg{melt} adopts the functional object-oriented -programming approach \citep{chambers2014object} with \proglang{S}4 classes and -methods. -Every function in the package is either a wrapper that creates a single instance -of an object or a method that can be applied to a class object. -The workflow of the package consists of three steps: (1) Fitting a model, (2) -examining and diagnosing the fitted model, and (3) testing hypotheses with the -model. -Four functions are available to build a model object whose names start with the -prefix \code{el_}, which stands for empirical likelihood. -A summary of the functions is provided below. +for \pkg{OpenMP}. The overall design of \pkg{melt} adopts the functional +object-oriented programming approach \citep{chambers2014object} with +\proglang{S}4 classes and methods. Every function in the package is either a +wrapper that creates a single instance of an object or a method that can be +applied to a class object. The workflow of the package consists of three +steps: (1) Fitting a model, (2) examining and diagnosing the fitted model, and +(3) testing hypotheses with the model. Four functions are available to build a +model object whose names start with the prefix \code{el_}, which stands for +empirical likelihood. A summary of the functions is provided below. % \begin{itemize} \item \fct{el\_mean}: Creates an \class{EL} object for the mean. @@ -724,14 +692,13 @@ A summary of the functions is provided below. \end{itemize} % For univariate data, \fct{el\_mean} corresponds to \fct{t.test} in the -\pkg{stats} package. -The \fct{el\_lm} and \fct{el\_glm} functions correspond to \fct{lm} and -\fct{glm}, respectively. +\pkg{stats} package. The \fct{el\_lm} and \fct{el\_glm} functions correspond to +\fct{lm} and \fct{glm}, respectively. All model objects inherit from class \class{EL}, and a description of the slots -in \class{EL} is given in Table~\ref{tab:EL}. -Notably, the slot \code{optim} is a \class{list} with the following four -components that summarize the optimization results: +in \class{EL} is given in Table~\ref{tab:EL}. Notably, the slot \code{optim} is +a \class{list} with the following four components that summarize the +optimization results: \pagebreak @@ -780,34 +747,28 @@ A full explanation of the class and slots can be found in the documentation of \end{table} % Note that \code{par} is fixed in the evaluation of EL and should not be confused -with the MELE, which is stored in the \code{coefficients} slot. -The optimization is performed with respect to \code{lambda}, so -\code{iterations} and \code{convergence} need to be understood in terms of -\code{lambda}. -Here we make a distinction between EL evaluation and EL optimization. -The EL optimization refers to the constrained EL problem discussed in -Section~\ref{sec:2.3} and corresponds to another class \class{CEL} that directly -extends \class{EL}. -The \code{optim} slot in a \class{CEL} object has the same components. -However, the optimization results are now interpreted in terms of \code{par}, -the solution to the constrained problem. -The \class{LM} and \class{GLM} classes contain \class{CEL}, meaning that a -constrained optimization is performed initially when \fct{el\_lm} or -\fct{el\_glm} is called. -In order to avoid confusion, the \class{CEL} class only distinguishes EL -optimization from EL evaluation, and the user does not directly interact with a -\class{CEL} object. -Instead, the \code{optim} slot of every model object contains a single logical -\code{cstr} that indicates whether EL optimization is performed or not. -Once \code{par} is obtained through evaluation or optimization, it uniquely -determines \code{lambda} and, in turn, \code{logl} and \code{loglr}. -Then \code{statistic} is equivalent to \code{-2 * loglr} and has an asymptotic -chi-square distribution under the null hypothesis, with the associated \code{df} -and \code{pval}. -All four model fitting functions above accept an optional argument -\code{weights} for weighted data. -A vector of weights is then re-scaled internally for numerical stability in the -computation of weighted EL \citep{glenn2007weighted}. +with the MELE, which is stored in the \code{coefficients} slot. The optimization +is performed with respect to \code{lambda}, so \code{iterations} and +\code{convergence} need to be understood in terms of \code{lambda}. Here we make +a distinction between EL evaluation and EL optimization. The EL optimization +refers to the constrained EL problem discussed in Section~\ref{sec:2.3} and +corresponds to another class \class{CEL} that directly extends \class{EL}. The +\code{optim} slot in a \class{CEL} object has the same components. However, the +optimization results are now interpreted in terms of \code{par}, the solution to +the constrained problem. The \class{LM} and \class{GLM} classes contain +\class{CEL}, meaning that a constrained optimization is performed initially when +\fct{el\_lm} or \fct{el\_glm} is called. In order to avoid confusion, the +\class{CEL} class only distinguishes EL optimization from EL evaluation, and the +user does not directly interact with a \class{CEL} object. Instead, the +\code{optim} slot of every model object contains a single logical \code{cstr} +that indicates whether EL optimization is performed or not. Once \code{par} is +obtained through evaluation or optimization, it uniquely determines +\code{lambda} and, in turn, \code{logl} and \code{loglr}. Then \code{statistic} +is equivalent to \code{-2 * loglr} and has an asymptotic chi-square distribution +under the null hypothesis, with the associated \code{df} and \code{pval}. All +four model fitting functions above accept an optional argument \code{weights} +for weighted data. A vector of weights is then re-scaled internally for +numerical stability in the computation of weighted EL \citep{glenn2007weighted}. Although \fct{weights} and \fct{coef} can extract \code{weights} and \code{coefficients}, these slots are mainly stored for subsequent analyses and methods. @@ -851,27 +812,22 @@ It returns an object of class \class{ELMT} with slots similar to those in \class{ELT}. \end{itemize} An \class{ELT} object also has the \code{optim} slot, which does not necessarily -correspond to the EL optimization. -The user can supply an arbitrary parameter value to test, reducing the problem -to the EL evaluation. -The \fct{elmt} function applies the single-step multiple testing procedure of -\citet{kim2023empirical}. -The multiplicity-adjusted critical value and \({p}\)~values are estimated by -Monte Carlo simulation. -All model objects that inherit from \class{EL}, \class{ELT}, and \class{ELMT} -support \fct{print} and \fct{summary} methods. +correspond to the EL optimization. The user can supply an arbitrary parameter +value to test, reducing the problem to the EL evaluation. The \fct{elmt} +function applies the single-step multiple testing procedure of +\citet{kim2023empirical}. The multiplicity-adjusted critical value and +\({p}\)~values are estimated by Monte Carlo simulation. All model objects that +inherit from \class{EL}, \class{ELT}, and \class{ELMT} support \fct{print} and +\fct{summary} methods. Note that every step of the workflow involves possibly multiple EL evaluations -or optimizations. -Hence, it is necessary to flexibly control the details of the execution and -computation at hand. -All model fitting functions and most methods accept an optional argument -\code{control}, which allows the user to specify the control parameters. -Only an object of class \class{ControlEL} can be supplied as \code{control} to -ensure validity and avoid unexpected errors. -Some of the slots in \class{ControlEL} are described in -Table~\ref{tab:ControlEL}. -This \class{ControlEL} object is stored in every model object, so any subsequent +or optimizations. Hence, it is necessary to flexibly control the details of the +execution and computation at hand. All model fitting functions and most methods +accept an optional argument \code{control}, which allows the user to specify the +control parameters. Only an object of class \class{ControlEL} can be supplied as +\code{control} to ensure validity and avoid unexpected errors. Some of the slots +in \class{ControlEL} are described in Table~\ref{tab:ControlEL}. This +\class{ControlEL} object is stored in every model object, so any subsequent method can use those parameters unless the user overwrites them with new values. \begin{table}[t!] \centering @@ -910,20 +866,17 @@ el_control(maxit = 200L, maxit_l = 25L, tol = 1e-06, tol_l = 1e-06, seed = NULL, b = 10000L, m = 1000000L) \end{Code} Specifically, \code{nthreads} specifies the number of threads for parallel -computation via \pkg{OpenMP} (if available). -By default, it is set to half the available threads and affects the following -functions: \fct{confint}, \fct{confreg}, \fct{el\_lm}, \fct{el\_glm}, \fct{eld}, -and \fct{elt}. -For better performance, it is generally recommended in most platforms to limit -the number of threads to the number of physical cores. -The argument \code{seed} sets the seed for random number generation. -It defaults to a random integer generated from \({1}\) to the maximum integer -supported by \proglang{R} on the machine, which is determined by \fct{set.seed}. -For fast parallel random number generation and compatibility with -\pkg{OpenMP}, the -Xoshiro256+ pseudo-random number generator (period \({2^{256}} - {1}\)) of -\citet{blackman2021scrambled} is used internally with the \pkg{dqrng} package -\citep{dqrng}. +computation via \pkg{OpenMP} (if available). By default, it is set to half the +available threads and affects the following functions: \fct{confint}, +\fct{confreg}, \fct{el\_lm}, \fct{el\_glm}, \fct{eld}, and \fct{elt}. For better +performance, it is generally recommended in most platforms to limit the number +of threads to the number of physical cores. The argument \code{seed} sets the +seed for random number generation. It defaults to a random integer generated +from \({1}\) to the maximum integer supported by \proglang{R} on the machine, +which is determined by \fct{set.seed}. For fast parallel random number +generation and compatibility with \pkg{OpenMP}, the Xoshiro256+ pseudo-random +number generator (period \({2^{256}} - {1}\)) of \citet{blackman2021scrambled} +is used internally with the \pkg{dqrng} package \citep{dqrng}. %%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -932,13 +885,12 @@ Xoshiro256+ pseudo-random number generator (period \({2^{256}} - {1}\)) of \subsection{Model building}\label{sec:4.1} For a simple illustration of building a model, we apply \fct{el\_mean} to the synthetic classification problem data \code{synth.tr} from the \pkg{MASS} -package \citep{MASS}. -The \code{synth.tr} object is a \class{data.frame} with 250 rows and three -columns. We select two columns \code{xs} and \code{ys}, the \(x\) and \(y\) -coordinates, to build an EL model with two-dimensional mean parameter. The -resulting \class{data.frame} is denoted by \code{data}. The \pkg{dplyr} package -\citep{dplyr} and the \pkg{ggplot2} package \citep{ggplot2} are used to aid data -manipulation and visualization. +package \citep{MASS}. The \code{synth.tr} object is a \class{data.frame} with +250 rows and three columns. We select two columns \code{xs} and \code{ys}, the +\(x\) and \(y\) coordinates, to build an EL model with two-dimensional mean +parameter. The resulting \class{data.frame} is denoted by \code{data}. The +\pkg{dplyr} package \citep{dplyr} and the \pkg{ggplot2} package \citep{ggplot2} +are used to aid data manipulation and visualization. % <>= library("melt") @@ -954,7 +906,7 @@ EL function with the convex hull constraint in Figure~\ref{fig:hull}. % \begin{figure}[t!] \centering -<>= +<>= ggplot(data, aes(xs, ys)) + geom_point() + geom_polygon( @@ -979,7 +931,7 @@ Any parameter value inside the convex hull leads to proper EL evaluation. We specify \code{c(0, 0.5)} as \code{par} in \fct{el\_mean} and build an \class{EL} object with the \code{data}. % -<>= +<>= fit_mean <- el_mean(data, par = c(0, 0.5)) @ % @@ -988,7 +940,7 @@ The \code{data} object is implicitly coerced into a \class{matrix} since \fct{print} and \fct{show} methods display relevant information about an \class{EL} object. % -<>= +<>= fit_mean @ % @@ -1032,7 +984,7 @@ infeasible region in Figure~\ref{fig:surface}. % \begin{figure}[t!] \centering -<>= +<>= xs <- seq(-1.5, 1.1, length.out = 60) ys <- seq(-0.4, 1.3, length.out = 40) ctrl <- el_control(th = 400) @@ -1066,7 +1018,7 @@ For a given value of \({\theta}\), we evaluate the estimating function \({g(X_i, The \fct{el\_eval} function takes a \class{matrix} argument \code{g}, where each row corresponds to \({g(X_i, \theta)}\). % -<>= +<>= mu <- 0 sigma <- 1 set.seed(123526) @@ -1077,76 +1029,68 @@ fit_eval$pval @ % Although the user can supply a custom \code{g}, \fct{el\_eval} is not the main -function of the package. -The \fct{el\_eval} function returns a \class{list} with the same components as -in an \class{EL} object, but no other methods are applicable further. -The scope is also limited to just-identified estimating functions. -For more flexible and over-identified estimating functions, it is recommended to -use other packages, e.g.,~\pkg{gmm} or \pkg{momentfit}. +function of the package. The \fct{el\_eval} function returns a \class{list} with +the same components as in an \class{EL} object, but no other methods are +applicable further. The scope is also limited to just-identified estimating +functions. For more flexible and over-identified estimating functions, it is +recommended to use other packages, e.g.,~\pkg{gmm} or \pkg{momentfit}. \subsection{Linear regression analysis}\label{sec:4.2} We illustrate the use of \fct{el\_lm} for regression analysis with the crime -rates data \code{UScrime} available in \pkg{MASS}. -Here we update the control parameters for significance tests of the -coefficients. +rates data \code{UScrime} available in \pkg{MASS}. Here we update the control +parameters for significance tests of the coefficients. % -<>= +<>= data("UScrime", package = "MASS") ctrl <- el_control(maxit = 1000, nthreads = 2) (fit_lm <- el_lm(y ~ Pop + Ineq, data = UScrime, control = ctrl)) @ % The \fct{print} method also applies and shows the MELE, the overall model test -result, and the convergence status. -The estimates are obtained from \fct{lm.fit}. -The hypothesis for the overall test is that all the parameters except the -intercept are \({0}\). -The convergence status shows that a constrained optimization is performed in -testing the hypothesis. -The EL evaluation applies to the test and the convergence status if the model -does not include an intercept. -The \fct{conv} method can be used to extract the convergence status. -It is designed to return a single logical, which can be helpful in a control -flow where the convergence status decides the course of action. -The large chi-square value above implies that the data do not support the -hypothesis, regardless of the convergence. -Note that failure to converge does not necessarily indicate unreliable test -results. -Most commonly, the algorithm fails to converge if the additional constraint -imposed by a hypothesis is incompatible with the convex hull constraint. -The control parameters affect the test results as well. -The \fct{summary} method reports more details, such as the results of -significance tests, where each test involves solving a constrained EL problem. -% -<>= +result, and the convergence status. The estimates are obtained from +\fct{lm.fit}. The hypothesis for the overall test is that all the parameters +except the intercept are \({0}\). The convergence status shows that a +constrained optimization is performed in testing the hypothesis. The EL +evaluation applies to the test and the convergence status if the model does not +include an intercept. The \fct{conv} method can be used to extract the +convergence status. It is designed to return a single logical, which can be +helpful in a control flow where the convergence status decides the course of +action. The large chi-square value above implies that the data do not support +the hypothesis, regardless of the convergence. Note that failure to converge +does not necessarily indicate unreliable test results. Most commonly, the +algorithm fails to converge if the additional constraint imposed by a hypothesis +is incompatible with the convex hull constraint. The control parameters affect +the test results as well. The \fct{summary} method reports more details, such as +the results of significance tests, where each test involves solving a +constrained EL problem. +% +<>= summary(fit_lm) @ -\vspace*{-0.25cm} % -These tests are all asymptotically pivotal without explicit studentization. -As a result, the output does not have standard errors. +These tests are all asymptotically pivotal without explicit studentization. As a +result, the output does not have standard errors. By iteratively solving constrained EL problems for a grid of parameter values, confidence intervals for the parameters can be calculated with \fct{confint}. The chi-square calibration is the default, but the user can specify a critical -value \code{cv} optionally. -Below we calculate asymptotic \({95\%}\) confidence intervals. +value \code{cv} optionally. Below we calculate asymptotic \({95\%}\) confidence +intervals. % -<>= +<>= confint(fit_lm) @ % Without standard errors and \fct{vcov} methods, the \code{lower} and \code{upper} confidence limits do not necessarily correspond to \({2.5}\) and -\({97.5}\) percentiles, respectively. -Similarly, we obtain confidence regions for two parameters with \fct{confreg}. -Starting from the MELE, it computes the boundary points of a confidence region -in full circle. -An optional argument \code{npoints} controls the number of boundary points. -The return value is a \class{ConfregEL} object containing a matrix whose rows -consist of the points, and the \fct{plot} method visualizes the confidence -region (Figure~\ref{fig:confreg}). +\({97.5}\) percentiles, respectively. Similarly, we obtain confidence regions +for two parameters with \fct{confreg}. Starting from the MELE, it computes the +boundary points of a confidence region in full circle. An optional argument +\code{npoints} controls the number of boundary points. The return value is a +\class{ConfregEL} object containing a matrix whose rows consist of the points, +and the \fct{plot} method visualizes the confidence region +(Figure~\ref{fig:confreg}). % <>= cr <- confreg(fit_lm, parm = c("Pop", "Ineq"), npoints = 200) @@ -1155,7 +1099,7 @@ plot(cr, cex = 1.5, cex.axis = 1.5, cex.lab = 1.5, lwd = 2, tck = -0.01) % \begin{figure}[t!] \centering -<>= +<>= cr <- confreg(fit_lm, parm = c("Pop", "Ineq"), npoints = 200) plot(cr, cex = 1.5, cex.axis = 1.5, cex.lab = 1.5, lwd = 2, tck = -0.01) axis(1, lwd.ticks = 2, labels = FALSE, tck = -0.01) @@ -1168,11 +1112,10 @@ At the center of the plot is the MELE \(\hat{\theta}\).} \end{figure} Finally, we apply \fct{eld} to detect influential observations and outliers. -Aside from the model object, \fct{eld} only accepts the control parameters. -By the leave-one-out method of ELD, an \class{ELD} object inherits from the base +Aside from the model object, \fct{eld} only accepts the control parameters. By +the leave-one-out method of ELD, an \class{ELD} object inherits from the base type \class{numeric}, with the length equal to the number of observations in the -data. -Figure~\ref{fig:eld} shows the ELD values from the \fct{plot} method. +data. Figure~\ref{fig:eld} shows the ELD values from the \fct{plot} method. % <>= eld <- eld(fit_lm) @@ -1189,7 +1132,7 @@ summary(eld) The code below shows that the observation with the largest ELD also has the largest Cook's distance from the same linear model fitted by \fct{lm}. % -<>= +<>= fit2_lm <- lm(y ~ Pop + Ineq, data = UScrime) cd <- cooks.distance(fit2_lm) all.equal(which.max(eld), which.max(cd), check.attributes = FALSE) @@ -1197,7 +1140,7 @@ all.equal(which.max(eld), which.max(cd), check.attributes = FALSE) % \begin{figure}[t!] \centering -<>= +<>= plot(eld, cex = 1.5, cex.axis = 1.5, cex.lab = 1.5, lwd = 2, pch = 19, tck = -0.01 ) @@ -1220,214 +1163,133 @@ elt(object, rhs = NULL, lhs = NULL, alpha = 0.05, calibrate = "chisq", \end{Code} The arguments \code{rhs} and \code{lhs} define a linear hypothesis and correspond to \({r}\) and \({L}\) in Equation~\ref{eq:linear hypothesis}, -respectively. -Therefore, either one or the other must be provided. -The argument \code{lhs} takes a numeric matrix or a vector. -Alternatively, a character vector can be supplied to symbolically specify a -hypothesis, which is convenient when there are many variables. -When \code{lhs} is \code{NULL}, it performs the EL evaluation at \({\theta} = -{r}\) by setting \({L} = {I_p}\), where \({I_p}\) is the identity matrix of -order \({p}\). -When \code{rhs} is \code{NULL}, on the other hand, \({r}\) is set to the zero -vector automatically, and the EL optimization is performed with \(L\). -Technically, \fct{elt} can reproduce the test results from \code{fit\_mean} in -Section~\ref{sec:4.1} and \code{fit\_lm} in Section~\ref{sec:4.2}. -Note the equivalence between the optimization results. -% -\begin{Schunk} -\begin{Sinput} -R> elt_mean <- elt(fit_mean, rhs = c(0, 0.5)) -R> all.equal(getOptim(elt_mean), getOptim(fit_mean)) -\end{Sinput} -\begin{Soutput} -[1] TRUE -\end{Soutput} -\begin{Sinput} -R> elt_lm <- elt(fit_lm, lhs = c("Pop", "Ineq")) -R> all.equal(getOptim(elt_lm), getOptim(fit_lm)) -\end{Sinput} -\begin{Soutput} -[1] TRUE -\end{Soutput} -\end{Schunk} +respectively. Therefore, either one or the other must be provided. The argument +\code{lhs} takes a numeric matrix or a vector. Alternatively, a character vector +can be supplied to symbolically specify a hypothesis, which is convenient when +there are many variables. When \code{lhs} is \code{NULL}, it performs the EL +evaluation at \({\theta} = {r}\) by setting \({L} = {I_p}\), where \({I_p}\) is +the identity matrix of order \({p}\). When \code{rhs} is \code{NULL}, on the +other hand, \({r}\) is set to the zero vector automatically, and the EL +optimization is performed with \(L\). Technically, \fct{elt} can reproduce the +test results from \code{fit\_mean} in Section~\ref{sec:4.1} and \code{fit\_lm} +in Section~\ref{sec:4.2}. Note the equivalence between the optimization results. +% +<>= +elt_mean <- elt(fit_mean, rhs = c(0, 0.5)) +all.equal(getOptim(elt_mean), getOptim(fit_mean)) +elt_lm <- elt(fit_lm, lhs = c("Pop", "Ineq")) +all.equal(getOptim(elt_lm), getOptim(fit_lm)) +@ % In addition to specifying an arbitrary linear hypothesis through \code{rhs} and \code{lhs}, extra arguments \code{alpha} and \code{calibrate} expand options for -testing. -The argument \code{alpha} controls the significance level determining the -critical value, and \code{calibrate} chooses the calibration method. -The \fct{critVal} method extracts the critical value from an \class{ELT} -object. -% -\begin{Schunk} -\begin{Sinput} -R> critVal(elt_mean) -\end{Sinput} -\begin{Soutput} -[1] 5.991 -\end{Soutput} -\end{Schunk} +testing. The argument \code{alpha} controls the significance level determining +the critical value, and \code{calibrate} chooses the calibration method. The +\fct{critVal} method extracts the critical value from an \class{ELT} object. +% +<>= +critVal(elt_mean) +@ % We apply the \({F}\) and bootstrap calibrations to \code{fit_mean} at a -significance level of \({0.05}\). -The number of threads is increased to four with 100000 bootstrap replicates in -\fct{el\_control}. -% -\begin{Schunk} -\begin{Sinput} -R> ctrl <- el_control(maxit = 10000, tol = 1e-04, nthreads = 4, b = 100000, -+ step = 1e-05) -R> (elt_mean_f <- elt(fit_mean, rhs = c(0, 0.5), calibrate = "F", -+ control = ctrl)) -\end{Sinput} -\begin{Soutput} - Empirical Likelihood Test - -Hypothesis: -xs = 0.0 -ys = 0.5 - -Significance level: 0.05, Calibration: F - -Statistic: 6.16, Critical value: 6.09 -p-value: 0.0484 -EL evaluation: converged -\end{Soutput} -\begin{Sinput} -R> (elt_mean_boot <- elt(fit_mean, rhs = c(0, 0.5), calibrate = "boot", -+ control = ctrl)) -\end{Sinput} -\begin{Soutput} - Empirical Likelihood Test - -Hypothesis: -xs = 0.0 -ys = 0.5 - -Significance level: 0.05, Calibration: Bootstrap - -Statistic: 6.16, Critical value: 6.06 -p-value: 0.0476 -EL evaluation: converged -\end{Soutput} -\end{Schunk} +significance level of \({0.05}\). The number of threads is increased to four +with \({100000}\) bootstrap replicates in \fct{el\_control}. +% +<>= +ctrl <- el_control( + maxit = 10000, tol = 1e-04, nthreads = 4, b = 100000, step = 1e-05 +) +(elt_mean_f <- elt(fit_mean, + rhs = c(0, 0.5), calibrate = "F", control = ctrl +)) +(elt_mean_boot <- elt(fit_mean, + rhs = c(0, 0.5), calibrate = "boot", control = ctrl +)) +@ % The above output shows that the \({F}\) and bootstrap calibrations tend to -produce slightly larger critical values than the chi-square calibration. -These values can be used as the \code{cv} argument in \fct{confint} and -\fct{confreg}, improving coverage probabilities when the sample size is small. +produce slightly larger critical values than the chi-square calibration. These +values can be used as the \code{cv} argument in \fct{confint} and \fct{confreg}, +improving coverage probabilities when the sample size is small. Next, we compare \fct{elt} with \fct{lht} in \pkg{car} that computes an -asymptotic chi-square statistic from Wald tests. -The two functions have similar syntax with comparable outputs. -For illustration, we fit a logistic regression model to the U.S.~women's -labor-force participation data \code{Mroz} from the \pkg{carData} package -\citep{carData} with \fct{el\_glm} and \fct{glm}. -We include all variables of \code{carData} in the model with the binary response -variable \code{lfp}, which stands for labor-force participation. -See the documentation of \code{carData} for a detailed description of the -variables. -% -\begin{Schunk} -\begin{Sinput} -R> library("car") -R> data("Mroz", package = "carData") -R> fit_glm <- el_glm(lfp ~ ., family = binomial(link = "logit"), -+ data = Mroz, control = ctrl) -R> fit2_glm <- glm(lfp ~ ., family = binomial(link = "logit"), data = Mroz) -\end{Sinput} -\end{Schunk} +asymptotic chi-square statistic from Wald tests. The two functions have similar +syntax with comparable outputs. For illustration, we fit a logistic regression +model to the U.S.~women's labor-force participation data \code{Mroz} from the +\pkg{carData} package \citep{carData} with \fct{el\_glm} and \fct{glm}. We +include all variables of \code{carData} in the model with the binary response +variable \code{lfp}, which stands for labor-force participation. See the +documentation of \code{carData} for a detailed description of the variables. +% +<>= +library("car") +data("Mroz", package = "carData") +fit_glm <- el_glm(lfp ~ ., + family = binomial(link = "logit"), data = Mroz, control = ctrl +) +fit2_glm <- glm(lfp ~ ., family = binomial(link = "logit"), data = Mroz) +@ % Asymptotic \({95\%}\) confidence intervals from \fct{confint} can be compared with the ones from \fct{confint.glm} in the \pkg{MASS} package. % -\begin{Schunk} -\begin{Sinput} -R> matrix(c(confint(fit_glm), confint(fit2_glm)), ncol = 4, -+ dimnames = list( -+ c(names(coef(fit2_glm))), -+ c("EL_lower", "EL_upper", "MASS_2.5%", "MASS_97.5%") -+ )) -\end{Sinput} -\begin{Soutput} - EL_lower EL_upper MASS_2.5% MASS_97.5% -(Intercept) 2.27606 4.09139 1.9370 4.46631 -k5 -1.79757 -1.14646 -1.8609 -1.08747 -k618 -0.18119 0.05263 -0.1984 0.06867 -age -0.07024 -0.05535 -0.0883 -0.03814 -wcyes 0.41881 1.20724 0.3610 1.26378 -hcyes -0.23781 0.46629 -0.2920 0.51679 -lwg 0.32517 0.91532 0.3140 0.90698 -inc -0.04985 -0.01970 -0.0510 -0.01877 -\end{Soutput} -\end{Schunk} +<>= +matrix(c(confint(fit_glm), confint(fit2_glm)), + ncol = 4, dimnames = list( + c(names(coef(fit2_glm))), + c("EL_lower", "EL_upper", "MASS_2.5%", "MASS_97.5%") + ) +) +@ % We employ \fct{coef} to extract only the results of significance tests from the output of \fct{summary}. % -\begin{Schunk} -\begin{Sinput} -R> coef(summary(fit_glm)) -\end{Sinput} -\begin{Soutput} - Estimate Chisq Pr(>Chisq) -(Intercept) 3.18214 539.769 2.116e-119 -k5 -1.46291 85.631 2.169e-20 -k618 -0.06457 1.174 2.785e-01 -age -0.06287 544.866 1.648e-120 -wcyes 0.80727 16.705 4.366e-05 -hcyes 0.11173 0.402 5.261e-01 -lwg 0.60469 19.768 8.743e-06 -inc -0.03445 22.996 1.624e-06 -\end{Soutput} -\end{Schunk} +<>= +coef(summary(fit_glm)) +@ % Based on the estimates and \({p}\)~values above, we test two hypotheses that involve different classes of \code{lhs}: 1) \code{wc} \({=}\) \code{hc} and 2) -\code{k5} \({=}\) \({-1.5}\) and \code{k618} \({=}\) \({0}\). -Wald tests are performed by specifying \code{test = "Chisq"} in \fct{lht}. -\begin{Schunk} -\begin{Sinput} -R> lhs <- c(0, 0, 0, 0, 1, -1, 0, 0) -R> elt_glm <- elt(fit_glm, lhs = lhs) -R> lht_glm <- lht(fit2_glm, hypothesis.matrix = lhs, test = "Chisq") -R> lhs2 <- rbind( -+ c(0, 1, 0, 0, 0, 0, 0, 0), -+ c(0, 0, 1, 0, 0, 0, 0, 0)) -R> rhs2 <- c(-1.5, 0) -R> elt2_glm <- elt(fit_glm, rhs = rhs2, lhs = lhs2) -R> lht2_glm <- lht(fit2_glm, hypothesis.matrix = lhs2, rhs = rhs2, -+ test = "Chisq") -\end{Sinput} -\end{Schunk} +\code{k5} \({=}\) \({-1.5}\) and \code{k618} \({=}\) \({0}\). Wald tests are +performed by specifying \code{test = "Chisq"} in \fct{lht}. +% +<>= +lhs <- c(0, 0, 0, 0, 1, -1, 0, 0) +elt_glm <- elt(fit_glm, lhs = lhs) +lht_glm <- lht(fit2_glm, hypothesis.matrix = lhs, test = "Chisq") +lhs2 <- rbind( + c(0, 1, 0, 0, 0, 0, 0, 0), + c(0, 0, 1, 0, 0, 0, 0, 0) +) +rhs2 <- c(-1.5, 0) +elt2_glm <- elt(fit_glm, rhs = rhs2, lhs = lhs2) +lht2_glm <- lht(fit2_glm, + hypothesis.matrix = lhs2, rhs = rhs2, test = "Chisq" +) +@ % For comparison, we extract the chi-square statistics and \({p}\)~values using -\fct{chisq} and \fct{pVal}. -The results are presented below. -\begin{Schunk} -\begin{Sinput} -R> matrix(c(chisq(elt_glm), pVal(elt_glm), -+ lht_glm$Chisq[2], lht_glm$`Pr(>Chisq)`[2]), -+ nrow = 2, byrow = TRUE, -+ dimnames = list(c("EL", "Wald"), c("Chisq", "Pr(>Chisq)"))) -\end{Sinput} -\begin{Soutput} - Chisq Pr(>Chisq) -EL 3.634 0.05660 -Wald 3.536 0.06004 -\end{Soutput} -\begin{Sinput} -R> matrix(c(chisq(elt2_glm), pVal(elt2_glm), -+ lht2_glm$Chisq[2], lht2_glm$`Pr(>Chisq)`[2]), -+ nrow = 2, byrow = TRUE, -+ dimnames = list(c("EL", "Wald"), c("Chisq", "Pr(>Chisq)"))) -\end{Sinput} -\begin{Soutput} - Chisq Pr(>Chisq) -EL 1.144 0.5643 -Wald 1.011 0.6032 -\end{Soutput} -\end{Schunk} +\fct{chisq} and \fct{pVal}. The results are presented below. +% +<>= +matrix( + c( + chisq(elt_glm), pVal(elt_glm), + lht_glm$Chisq[2], lht_glm$`Pr(>Chisq)`[2] + ), + nrow = 2, byrow = TRUE, + dimnames = list(c("EL", "Wald"), c("Chisq", "Pr(>Chisq)")) +) +matrix( + c( + chisq(elt2_glm), pVal(elt2_glm), + lht2_glm$Chisq[2], lht2_glm$`Pr(>Chisq)`[2] + ), + nrow = 2, byrow = TRUE, + dimnames = list(c("EL", "Wald"), c("Chisq", "Pr(>Chisq)")) +) +@ % The two tests provide similar results with a sample size of 753, which is not surprising given the asymptotic equivalence between these tests \citep[see][and @@ -1436,90 +1298,100 @@ references therein]{qin1995estimating}. %%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\pagebreak \section{Case study}\label{sec:example} This section presents a more in-depth data analysis using EL with an internal dataset of \pkg{melt}, \code{thiamethoxam}, from \citet{obregon2022pest}. Thiamethoxam is a widely used neonicotinoid pesticide that translocates through -plants, leaving residues in crops. -Since pesticides can also affect non-target organisms such as pollinators, it is -important to maintain a balance between pest management and pollinator -protection to maximize crop yield. +plants, leaving residues in crops. Since pesticides can also affect non-target +organisms such as pollinators, it is important to maintain a balance between +pest management and pollinator protection to maximize crop yield. \citet{obregon2022pest} aimed to test how different application methods of thiamethoxam and plant variety impact pest control, bee visits, yield, and -pesticide residues in flowers of squash crops. -Squash crops rely on bee pollination to yield fruits \citep{knapp2019cucurbits}, -and the striped cucumber beetle is the major pest for squash crops -\citep{haber2021striped}. +pesticide residues in flowers of squash crops. Squash crops rely on bee +pollination to yield fruits \citep{knapp2019cucurbits}, and the striped cucumber +beetle is the major pest for squash crops \citep{haber2021striped}. \citet{obregon2022pest} conducted a field experiment with two varieties that differ in their attractiveness to striped cucumber beetles: (1) Golden Zucchini (preferred by the beetle) and (2) Success PM straightneck summer squash (not -preferred by the beetle). -Also, the following four thiamethoxam application methods were used: (1) -In-furrow application after sowing, (2) foliar spray application three weeks -after sowing, (3) seed treatment, and (4) no insecticides. -Specifically, a quasi-Poisson regression model with a log link function was fit -to examine the effects of plant variety and thiamethoxam application methods on -the number of bee visits. -The statistical significance of each variable was also tested, followed by -Tukey's honest significant difference post hoc tests with the \pkg{agricolae} -package \citep{agricolae} for pairwise comparisons among the plant varieties and -the application methods. +preferred by the beetle). Also, the following four thiamethoxam application +methods were used: (1) In-furrow application after sowing, (2) foliar spray +application three weeks after sowing, (3) seed treatment, and (4) no +insecticides. Specifically, a quasi-Poisson regression model with a log link +function was fit to examine the effects of plant variety and thiamethoxam +application methods on the number of bee visits. The statistical significance of +each variable was also tested, followed by Tukey's honest significant difference +post hoc tests with the \pkg{agricolae} package \citep{agricolae} for pairwise +comparisons among the plant varieties and the application methods. Following the original approach of \citet{obregon2022pest}, our goal is to conduct relevant tests with EL, focusing on performing multiple comparisons and -constructing simultaneous confidence intervals. -First, \code{thiamethoxam} is a \class{data.frame} with 165 observations and 11 -variables. -A summary of \code{thiamethoxam} is provided below. -% -\begin{Schunk} -\begin{Sinput} -R> data("thiamethoxam") -R> summary(thiamethoxam) -\end{Sinput} -\begin{Soutput} - trt var rep fruit avg_mass - None :41 SPM:82 1:24 Min. : 1.00 Min. :102 - Spray :40 GZ :83 2:24 1st Qu.: 4.00 1st Qu.:236 - Furrow:42 3:21 Median : 5.75 Median :310 - Seed :42 4:24 Mean : 6.11 Mean :330 - 5:24 3rd Qu.: 7.25 3rd Qu.:401 - 6:24 Max. :13.00 Max. :724 - 7:24 - mass yield visit foliage - Min. : 987 Min. : 2778 Min. : 0.75 Min. :0.00014 - 1st Qu.: 2751 1st Qu.: 7191 1st Qu.: 5.75 1st Qu.:0.00106 - Median : 5729 Median :11998 Median : 8.25 Median :0.00274 - Mean : 6638 Mean :13465 Mean : 8.70 Mean :0.00980 - 3rd Qu.: 9673 3rd Qu.:17418 3rd Qu.:11.00 3rd Qu.:0.01199 - Max. :16016 Max. :34790 Max. :23.50 Max. :0.07131 - - scb defoliation - Min. : 0.125 Min. : 0.00 - 1st Qu.: 1.250 1st Qu.: 0.75 - Median : 1.938 Median : 2.13 - Mean : 2.772 Mean : 7.72 - 3rd Qu.: 3.125 3rd Qu.:12.50 - Max. :22.875 Max. :48.75 - NA's :3 -\end{Soutput} -\end{Schunk} +constructing simultaneous confidence intervals. First, \code{thiamethoxam} is a +\class{data.frame} with 165 observations and 11 variables. A summary of +\code{thiamethoxam} is provided below. +% +<>= +data("thiamethoxam") +summary(thiamethoxam) +@ % The variables \code{trt} and \code{var} are \class{factor} variables for the -application methods and the plant varieties, respectively. -The \code{visit} variable denotes the number of bee visits per plot. -The ridgeline plot in Figure~\ref{fig:ridgeline_plot} created by the -\pkg{ggridges} package \citep{ggridges} shows distinct distributions of -\code{visit} by \code{trt} and \code{var}. -Note that the ranges of \code{visit} differ by \code{trt}. -The seed treatment (\code{Seed}) records the largest number of visits among the -methods compared to no treatment (\code{None}). -As for the variety, Success PM (\code{SPM}) tends to have a larger number of -visits than Golden Zucchini (\code{GZ}). +application methods and the plant varieties, respectively. The \code{visit} +variable denotes the number of bee visits per plot. The ridgeline plot in +Figure~\ref{fig:ridgeline_plot} created by the \pkg{ggridges} package +\citep{ggridges} shows distinct distributions of \code{visit} by \code{trt} and +\code{var}. Note that the ranges of \code{visit} differ by \code{trt}. The seed +treatment (\code{Seed}) records the largest number of visits among the methods +compared to no treatment (\code{None}). As for the variety, Success PM +(\code{SPM}) tends to have a larger number of visits than Golden Zucchini +(\code{GZ}). Considering \code{visit} as our response variable, we also include +\code{fruit} (average number of fruits per plant) and \code{defoliation} +(percentage defoliation) in our model as numeric variables. Particularly, +\citet{obregon2022pest} conducted a path analysis with the \pkg{piecewiseSEM} +package \citep{lefcheck2016piecewisesem}, showing that the percentage +defoliation significantly reduces the number of visits. % \begin{figure}[t!] \centering -% \includegraphics{jss4834-ridgeline_plot} +<>= +library(ggridges) +ggplot(thiamethoxam, aes( + x = visit, y = trt, fill = var, linetype = var, color = var +)) + + geom_density_ridges2( + alpha = 0.5, scale = 0.9, bandwidth = 1.5, rel_min_height = 0.01, + jittered_points = TRUE, point_shape = "|", point_size = 3, + position = position_points_jitter(width = 0.05, height = 0) + ) + + theme( + axis.text = element_text(size = 12, color = "black"), + axis.title = element_text(size = 12), + panel.background = element_blank(), + panel.border = element_rect(fill = NA, linewidth = 1), + panel.grid = element_blank(), + legend.position = "bottom", + legend.text = element_text(size = 10, color = "black"), + legend.background = element_rect(fill = alpha("white", 0)), + legend.margin = margin(t = 0), + legend.key = element_rect(fill = alpha("white", 1)), + ) + + labs( + x = "Number of bee visits", y = "Treatment", + colour = "", linetype = "", fill = "" + ) + + scale_linetype_manual( + breaks = c("GZ", "SPM"), + values = c("solid", "dashed") + ) + + scale_colour_manual( + breaks = c("GZ", "SPM"), + values = c("red", "blue") + ) + + scale_fill_manual( + breaks = c("GZ", "SPM"), + values = c("red", "blue") + ) +@ \caption{\label{fig:ridgeline_plot} Ridgeline plot showing the densities of the number of bee visits (\code{visit}), grouped by the application methods (\code{trt}) and plant varieties(\code{var}). @@ -1527,268 +1399,126 @@ Solid red and dashed blue lines correspond to Golden Zucchini (\code{GZ}) and Success PM (\code{SPM}), respectively. Rugs show jittered data points.} \end{figure} -Considering \code{visit} as our response variable, we also include \code{fruit} -(average number of fruits per plant) and \code{defoliation} (percentage -defoliation) in our model as numeric variables. -Particularly, \citet{obregon2022pest} conducted a path analysis with the -\pkg{piecewiseSEM} package \citep{lefcheck2016piecewisesem}, showing that the -percentage defoliation significantly reduces the number of visits. Next, we fit a quasi-Poisson regression model with a log link function using \fct{el\_glm} to obtain a \class{QGLM} model object. % -\begin{Schunk} -\begin{Sinput} -R> fit3_glm <- el_glm(visit ~ trt + var + fruit + defoliation, -+ family = quasipoisson(link = "log"), data = thiamethoxam, -+ control = ctrl) -R> print(summary(fit3_glm), width.cutoff = 50) -\end{Sinput} -\begin{Soutput} - Empirical Likelihood - -Model: glm (quasipoisson family with log link) - -Call: -el_glm(formula = visit ~ trt + var + fruit + defoliation, - family = quasipoisson(link = "log"), data = thiamethoxam, - control = ctrl) - -Number of observations: 165 -Number of parameters: 7 - -Parameter values under the null hypothesis: -(Intercept) trtSpray trtFurrow trtSeed varGZ - 1.97 0.00 0.00 0.00 0.00 - fruit defoliation phi - 0.00 0.00 1.73 - -Lagrange multipliers: -[1] -0.2032 -0.1863 0.0183 0.1450 -0.1746 0.1096 -0.0487 -0.0877 - -Maximum EL estimates: -(Intercept) trtSpray trtFurrow trtSeed varGZ - 1.9723 -0.1128 0.0800 0.3179 -0.2109 - fruit defoliation - 0.0514 -0.0204 - -logL: -910 , logLR: -67.2 -Chisq: 134, df: 6, Pr(>Chisq): <2e-16 -Constrained EL: converged - -Coefficients: - Estimate Chisq Pr(>Chisq) -(Intercept) 1.9723 421.87 < 2e-16 *** -trtSpray -0.1128 1.68 0.19489 -trtFurrow 0.0800 1.01 0.31404 -trtSeed 0.3179 11.95 0.00055 *** -varGZ -0.2109 9.50 0.00206 ** -fruit 0.0514 14.47 0.00014 *** -defoliation -0.0204 27.15 1.9e-07 *** ---- -Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 - -Dispersion for quasipoisson family: 1.726 -\end{Soutput} -\end{Schunk} +<>= +fit3_glm <- el_glm(visit ~ trt + var + fruit + defoliation, + family = quasipoisson(link = "log"), data = thiamethoxam, + control = ctrl +) +print(summary(fit3_glm), width.cutoff = 50) +@ % The dispersion estimate corresponds to \({\hat{\phi}}\) in -Equation~\ref{eq:phi}. -This estimate is smaller than the one obtained from \fct{summary} when applied -to a \class{glm} object because the denominator in Equation~\ref{eq:phi} is -\({n}\) instead of \({n - p}\). -The solution to the constrained EL problem also includes \code{phi}, which is -not part of the overall model constraint. -Both \code{fruit} and \code{defoliation} are significant, although the estimates -are smaller than other variables. -With only the level \code{Seed} being significant in \code{trt}, we assess the +Equation~\ref{eq:phi}. This estimate is smaller than the one obtained from +\fct{summary} when applied to a \class{glm} object because the denominator in +Equation~\ref{eq:phi} is \({n}\) instead of \({n - p}\). The solution to the +constrained EL problem also includes \code{phi}, which is not part of the +overall model constraint. Both \code{fruit} and \code{defoliation} are +significant, although the estimates are smaller than other variables. With only +the level \code{Seed} being significant in \code{trt}, we assess the significance of \code{trt} by testing whether the coefficients are all zero. The output of \fct{summary} reports a small \({p}\)~value with a different solution from the overall model test. % -\begin{Schunk} -\begin{Sinput} -R> elt2_glm <- elt(fit3_glm, lhs = c("trtSpray", "trtFurrow", "trtSeed")) -R> summary(elt2_glm) -\end{Sinput} -\begin{Soutput} - Empirical Likelihood Test -\end{Soutput} - -\begin{Soutput} -Hypothesis: -trtSpray = 0 -trtFurrow = 0 -trtSeed = 0 - -Significance level: 0.05, Calibration: Chi-square - -Parameter values under the null hypothesis: -(Intercept) trtSpray trtFurrow trtSeed varGZ - 1.9732 0.0000 0.0000 0.0000 -0.2102 - fruit defoliation phi - 0.0596 -0.0254 1.7270 - -Lagrange multipliers: -[1] -0.09787 -0.15872 0.12336 0.25170 0.00985 -0.00207 0.00769 -[8] 0.02068 +<>= +elt2_glm <- elt(fit3_glm, lhs = c("trtSpray", "trtFurrow", "trtSeed")) +summary(elt2_glm) +@ -logL: -850, logLR: -7.34 -Statistic: 14.7, Critical value: 7.81 -p-value: 0.00211 -Constrained EL: converged -\end{Soutput} -\end{Schunk} -% Finally, we extend the hypothesis testing framework of Section~\ref{sec:4.3} to multiple testing with \fct{elmt}, which can be directly applied to the fitted -model object. -Its syntax is similar to \fct{elt}, where \code{rhs} and \code{lhs} now specify -multiple hypotheses. +model object. Its syntax is similar to \fct{elt}, where \code{rhs} and +\code{lhs} now specify multiple hypotheses. \begin{Code} elmt(object, rhs = NULL, lhs = NULL, alpha = 0.05, control = NULL) \end{Code} For general hypotheses involving separate matrices, \fct{elmt} accepts -\class{list} objects for \code{rhs} and \code{lhs}. -The corresponding elements of \code{rhs} and \code{lhs} together form a -hypothesis, as in Equation~\ref{eq:linear hypothesis}. -The \fct{elmt} function employs a multivariate chi-square calibration technique -based on -Monte Carlo simulations to determine the common critical value. -Details of multiple testing procedures are given in \citet{kim2023empirical}. -Continuing on the previous test result, we perform comparisons with the control, -which is our primary interest. -We set the overall significance level at \({0.05}\). -% -\begin{Schunk} -\begin{Sinput} -R> elmt_glm <- elmt(fit3_glm, lhs = list("trtSpray", "trtFurrow", "trtSeed")) -R> summary(elmt_glm) -\end{Sinput} -\begin{Soutput} - Empirical Likelihood Multiple Tests - -Overall significance level: 0.05 - -Calibration: Multivariate chi-square - -Hypotheses: - Estimate Chisq Df p.adj -trtSpray = 0 -0.113 1.68 1 0.4635 -trtFurrow = 0 0.080 1.01 1 0.6625 -trtSeed = 0 0.318 11.95 1 0.0016 ** ---- -Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 - -Common critical value: 5.66 -\end{Soutput} -\end{Schunk} +\class{list} objects for \code{rhs} and \code{lhs}. The corresponding elements +of \code{rhs} and \code{lhs} together form a hypothesis, as in +Equation~\ref{eq:linear hypothesis}. The \fct{elmt} function employs a +multivariate chi-square calibration technique based on Monte Carlo simulations +to determine the common critical value. Details of multiple testing procedures +are given in \citet{kim2023empirical}. Continuing on the previous test result, +we perform comparisons with the control, which is our primary interest. We set +the overall significance level at \({0.05}\). +% +<>= +elmt_glm <- elmt(fit3_glm, lhs = list("trtSpray", "trtFurrow", "trtSeed")) +summary(elmt_glm) +@ % Note the use of a \class{list} for \code{lhs} by \fct{elmt}. While a character vector \code{lhs} acts as a single hypothesis for \fct{elt}, elements of -\code{lhs} in \fct{elmt} define distinct hypotheses for convenience. -The \code{Df} column shows the marginal chi-square degrees of freedom for each +\code{lhs} in \fct{elmt} define distinct hypotheses for convenience. The +\code{Df} column shows the marginal chi-square degrees of freedom for each hypothesis. -Further, we compare the result with the output of \fct{glht} in -\pkg{multcomp}, which relies on (asymptotic) multivariate normal and -\({t}\)~distributions for simultaneous tests. -% -\begin{Schunk} -\begin{Sinput} -R> library("multcomp") -R> fit4_glm <- glm(visit ~ trt + var + fruit + defoliation, -+ family = quasipoisson(link = "log"), data = thiamethoxam) -R> fit4_glm$call <- NULL -R> glht_glm <- glht(fit4_glm, -+ linfct = mcp(trt = c("Spray = 0", "Furrow = 0", "Seed = 0"))) -R> summary(glht_glm) -\end{Sinput} -\begin{Soutput} - Simultaneous Tests for General Linear Hypotheses - -Multiple Comparisons of Means: User-defined Contrasts - -Linear Hypotheses: - Estimate Std. Error z value Pr(>|z|) -Spray == 0 -0.113 0.124 -0.91 0.6995 -Furrow == 0 0.080 0.111 0.72 0.8224 -Seed == 0 0.318 0.103 3.10 0.0057 ** ---- -Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 -(Adjusted p values reported -- single-step method) -\end{Soutput} -\end{Schunk} +Further, we compare the result with the output of \fct{glht} in \pkg{multcomp}, +which relies on (asymptotic) multivariate normal and \({t}\)~distributions for +simultaneous tests. +% +<>= +library("multcomp") +fit4_glm <- glm(visit ~ trt + var + fruit + defoliation, + family = quasipoisson(link = "log"), data = thiamethoxam, +) +fit4_glm$call <- NULL +glht_glm <- glht(fit4_glm, + linfct = mcp(trt = c("Spray = 0", "Furrow = 0", "Seed = 0")) +) +summary(glht_glm) +@ +\vspace*{-0.1cm} % For the hypothesis \code{Seed} vs. \code{None}, the adjusted \({p}\)~values are -\({0.00243}\) for \fct{elmt} and \({0.00563}\) for \fct{glht}. -Both procedures reject this hypothesis at the overall level of \({0.05}\) and -conclude that only the seed treatment is significantly different from the -control. -Since each hypothesis conforms to a linear combination of the parameters, -\fct{confint} can be applied to produce asymptotic \({95\%}\) simultaneous -confidence intervals. +\({0.00243}\) for \fct{elmt} and \({0.00563}\) for \fct{glht}. Both procedures +reject this hypothesis at the overall level of \({0.05}\) and conclude that only +the seed treatment is significantly different from the control. Since each +hypothesis conforms to a linear combination of the parameters, \fct{confint} can +be applied to produce asymptotic \({95\%}\) simultaneous confidence intervals. For an object of class \class{ELMT}, \fct{confint} uses the common critical -value computed by \fct{elmt}. -Below we give the intervals from the two procedures. -% -\begin{Schunk} -\begin{Sinput} -R> confint(elmt_glm) -\end{Sinput} -\begin{Soutput} - lower upper -trtSpray = 0 -0.3689 0.08084 -trtFurrow = 0 -0.1141 0.26524 -trtSeed = 0 0.1041 0.51379 -\end{Soutput} -\begin{Sinput} -R> glht_sci <- confint(glht_glm)$confint -R> attributes(glht_sci)[c("calpha", "conf.level")] <- NULL -R> glht_sci -\end{Sinput} -\begin{Soutput} - Estimate lwr upr -Spray -0.11281 -0.40521 0.1796 -Furrow 0.08001 -0.18256 0.3426 -Seed 0.31794 0.07583 0.5601 -\end{Soutput} -\end{Schunk} +value computed by \fct{elmt}. Below we give the intervals from the two +procedures. +\vspace*{-0.1cm} +% +<>= +confint(elmt_glm) +glht_sci <- confint(glht_glm)$confint +attributes(glht_sci)[c("calpha", "conf.level")] <- NULL +glht_sci +@ \section{Conclusion}\label{sec:conclusion} Empirical likelihood enables a likelihood-driven style of inference without the -restrictive distributional assumptions of parametric models. -Perhaps more importantly, while being nonparametric, empirical likelihood -retains some desirable properties of parametric likelihood. -In many ways, it is an attractive and natural approach to estimation and -hypothesis testing, but its use has been limited due to computational -difficulties compared to other methods. -The \proglang{R} package \pkg{melt} aims to bridge the gap and provide a unified -framework for data analysis with empirical likelihood methods. -The package is developed to conduct statistical inference routinely made in -\proglang{R} with empirical likelihood. -Mainly, hypothesis testing is available for various models with smooth -estimating functions. -Examples in this paper demonstrate the functionality of \pkg{melt}. -We provide more examples and details on the package website -\url{https://docs.ropensci.org/melt/}. -Future work will focus on expanding the scope to additional estimating functions -and models. -The package structure and its adoption of \proglang{S}4 classes and methods are -designed for extensibility. -Optimization algorithms tailored to specific models can also be added in the -process. +restrictive distributional assumptions of parametric models. Perhaps more +importantly, while being nonparametric, empirical likelihood retains some +desirable properties of parametric likelihood. In many ways, it is an attractive +and natural approach to estimation and hypothesis testing, but its use has been +limited due to computational difficulties compared to other methods. The +\proglang{R} package \pkg{melt} aims to bridge the gap and provide a unified +framework for data analysis with empirical likelihood methods. The package is +developed to conduct statistical inference routinely made in \proglang{R} with +empirical likelihood. Mainly, hypothesis testing is available for various models +with smooth estimating functions. Examples in this paper demonstrate the +functionality of \pkg{melt}. We provide more examples and details on the package +website \url{https://docs.ropensci.org/melt/}. Future work will focus on +expanding the scope to additional estimating functions and models. The package +structure and its adoption of \proglang{S}4 classes and methods are designed for +extensibility. Optimization algorithms tailored to specific models can also be +added in the process. \section*{Acknowledgments} - We thank Pierre Chausse and Alex Stringer for their comments and suggestions on the package during the rOpenSci review process. This work was supported by the U.S.~National Science Foundation under Grants No.~SES-1921523 and DMS-2015552. -\bibliography{references2.bib} +\bibliography{references.bib} \end{document} diff --git a/vignettes/references.bib b/vignettes/references.bib index 8b870be..91b7b0f 100644 --- a/vignettes/references.bib +++ b/vignettes/references.bib @@ -1,5 +1,284 @@ -@article{kim2024melt, - title = {{melt}: Multiple Empirical Likelihood Tests in {R}}, +@Article{adimari2010note, + title = {A Note on the Asymptotic Behaviour of Empirical Likelihood Statistics}, + author = {Gianfranco Adimari and Annamaria Guolo}, + journal = {Statistical Methods \& Applications}, + volume = {19}, + number = {4}, + pages = {463--476}, + year = {2010}, + publisher = {Springer-Verlag}, + doi = {10.1007/s10260-010-0137-9}, +} + +@Article{benjamini1995controlling, + author = {Yoav Benjamini and Yosef Hochberg}, + title = {Controlling the False Discovery Rate: A Practical and Powerful Approach to Multiple Testing}, + journal = {Journal of the Royal Statistical Society B}, + volume = {57}, + number = {1}, + pages = {289--300}, + doi = {10.1111/j.2517-6161.1995.tb02031.x}, + year = {1995}, +} + +@Article{blackman2021scrambled, + author = {David Blackman and Sebastiano Vigna}, + title = {Scrambled Linear Pseudorandom Number Generators}, + year = {2021}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + volume = {47}, + number = {4}, + issn = {0098-3500}, + doi = {10.1145/3460772}, + journal = {ACM Transactions on Mathematical Software}, + articleno = {36}, + numpages = {32}, +} + +@Article{chambers2014object, + author = {John M. Chambers}, + title = {Object-Oriented Programming, Functional Programming and \proglang{R}}, + volume = {29}, + journal = {Statistical Science}, + number = {2}, + publisher = {Institute of Mathematical Statistics}, + pages = {167--180}, + year = {2014}, + doi = {10.1214/13-sts452}, +} + +@Article{chaudhuri2017hamiltonian, + author = {Sanjay Chaudhuri and Debashis Mondal and Teng Yin}, + title = {Hamiltonian Monte Carlo Sampling in Bayesian Empirical Likelihood Computation}, + journal = {Journal of the Royal Statistical Society B}, + volume = {79}, + number = {1}, + pages = {293--320}, + doi = {10.1111/rssb.12164}, + year = {2017}, +} + +@Article{chen2009effects, + author = {Song Xi Chen and Liang Peng and Ying-Li Qin}, + title = {Effects of Data Dimension on Empirical Likelihood}, + journal = {Biometrika}, + volume = {96}, + number = {3}, + pages = {711--722}, + year = {2009}, + doi = {10.1093/biomet/asp037}, +} + +@Article{chen2009review, + title = {A Review on Empirical Likelihood Methods for Regression}, + author = {Song Xi Chen and Van Keilegom, Ingrid}, + journal = {Test}, + volume = {18}, + number = {3}, + pages = {415--447}, + year = {2009}, + publisher = {Springer-Verlag}, + doi = {10.1007/s11749-009-0159-5}, +} + +@Article{cook1986assessment, + author = {R. Dennis Cook}, + title = {Assessment of Local Influence}, + journal = {Journal of the Royal Statistical Society B}, + volume = {48}, + number = {2}, + pages = {133--155}, + doi = {10.1111/j.2517-6161.1986.tb01398.x}, + year = {1986}, +} + +@Article{dagum1998openmp, + author = {L. Dagum and R. Menon}, + journal = {IEEE Computational Science and Engineering}, + number = {1}, + pages = {46--55}, + title = {OpenMP: An Industry Standard API for Shared-Memory Programming}, + volume = {5}, + year = {1998}, + doi = {10.1109/99.660313}, +} + +@Article{diciccio1990nonparametric, + author = {Thomas J. DiCiccio and Joseph P. Romano}, + journal = {International Statistical Review}, + number = {1}, + pages = {59--76}, + publisher = {John Wiley \& Sons}, + title = {Nonparametric Confidence Limits by Resampling Methods and Least Favorable Families}, + volume = {58}, + year = {1990}, + doi = {10.2307/1403474}, +} + +@Article{diciccio1991empirical, + author = {Thomas DiCiccio and Peter Hall and Joseph Romano}, + title = {Empirical Likelihood Is Bartlett-Correctable}, + volume = {19}, + journal = {The Annals of Statistics}, + number = {2}, + publisher = {Institute of Mathematical Statistics}, + pages = {1053--1061}, + year = {1991}, + doi = {10.1214/aos/1176348137}, +} + +@Article{dickhaus2015survey, + author = {Thorsten Dickhaus and Thomas Royen}, + title = {A Survey on Multivariate Chi-Square Distributions and Their Applications in Testing Multiple Hypotheses}, + journal = {Statistics}, + volume = {49}, + number = {2}, + pages = {427--454}, + year = {2015}, + publisher = {Taylor \& Francis}, + doi = {10.1080/02331888.2014.993639}, +} + +@Article{dickhaus2019simultaneous, + title = {Simultaneous Statistical Inference in Dynamic Factor Models: Chi-Square Approximation and Model-Based Bootstrap}, + journal = {Computational Statistics \& Data Analysis}, + volume = {129}, + pages = {30--46}, + year = {2019}, + doi = {10.1016/j.csda.2018.08.012}, + author = {Thorsten Dickhaus and Natalia Sirotko-Sibirskaya}, +} + +@Article{efron1981nonparametric, + author = {Bradley Efron}, + title = {Nonparametric Standard Errors and Confidence Intervals}, + journal = {Canadian Journal of Statistics}, + volume = {9}, + number = {2}, + pages = {139--158}, + doi = {10.2307/3314608}, + year = {1981}, +} + +@Article{glenn2007weighted, + title = {Weighted Empirical Likelihood Estimates and Their Robustness Properties}, + journal = {Computational Statistics \& Data Analysis}, + volume = {51}, + number = {10}, + pages = {5130--5141}, + year = {2007}, + doi = {10.1016/j.csda.2006.07.032}, + author = {N. L. Glenn and Yichuan Zhao}, +} + +@Article{gmm, + title = {Computing Generalized Method of Moments and Generalized Empirical + Likelihood with \proglang{R}}, + author = {Pierre Chauss\'e}, + journal = {Journal of Statistical Software}, + year = {2010}, + volume = {34}, + number = {11}, + pages = {1--35}, + doi = {10.18637/jss.v034.i11}, +} + +@Article{haber2021striped, + author = {Ariela I Haber and Anna K Wallingford and Ian M Grettenberger and Jasmin P {Ramirez Bonilla} and Amber C Vinchesi-Vahl and Donald C Weber}, + title = {Striped Cucumber Beetle and Western Striped Cucumber Beetle (Coleoptera: Chrysomelidae)}, + journal = {Journal of Integrated Pest Management}, + volume = {12}, + number = {1}, + year = {2021}, + pages = {1--10}, + doi = {10.1093/jipm/pmaa026}, +} + +@Article{hall1990methodology, + author = {Peter Hall and Barbara La Scala}, + journal = {International Statistical Review}, + number = {2}, + pages = {109--127}, + publisher = {John Wiley \& Sons}, + title = {Methodology and Algorithms of Empirical Likelihood}, + volume = {58}, + year = {1990}, + doi = {10.2307/1403462}, +} + +@Article{hansen1982large, + author = {Lars Peter Hansen}, + journal = {Econometrica}, + number = {4}, + pages = {1029--1054}, + publisher = {John Wiley \& Sons}, + title = {Large Sample Properties of Generalized Method of Moments Estimators}, + volume = {50}, + year = {1982}, + doi = {10.2307/1912775}, +} + +@Article{hansen1996finite, + author = {Lars Peter Hansen and John Heaton and Amir Yaron}, + journal = {Journal of Business \& Economic Statistics}, + number = {3}, + pages = {262--280}, + title = {Finite-Sample Properties of Some Alternative GMM Estimators}, + volume = {14}, + year = {1996}, + doi = {10.2307/1392442}, +} + +@Article{hjort2009extending, + author = {Nils Lid Hjort and Ian W. McKeague and Van Keilegom, Ingrid}, + title = {Extending the Scope of Empirical Likelihood}, + volume = {37}, + journal = {The Annals of Statistics}, + number = {3}, + publisher = {Institute of Mathematical Statistics}, + pages = {1079--1111}, + year = {2009}, + doi = {10.1214/07-aos555}, +} + +@Article{imbens1997one, + author = {Guido W. Imbens}, + title = {One-Step Estimators for Over-Identified Generalized Method of Moments Models}, + journal = {The Review of Economic Studies}, + volume = {64}, + number = {3}, + pages = {359--383}, + year = {1997}, + doi = {10.2307/2971718}, +} + +@Article{jacod2018review, + title = {A Review of Asymptotic Theory of Estimating Functions}, + author = {Jean Jacod and Michael S{\o}rensen}, + journal = {Statistical Inference for Stochastic Processes}, + volume = {21}, + number = {2}, + pages = {415--434}, + year = {2018}, + publisher = {Springer-Verlag}, + doi = {10.1007/s11203-018-9178-8}, +} + +@Article{kim2023empirical, + author = {Eunseop Kim and Steven N. MacEachern and Mario Peruggia}, + title = {Empirical Likelihood for the Analysis of Experimental Designs}, + journal = {Journal of Nonparametric Statistics}, + volume = {35}, + number = {4}, + pages = {709--732}, + year = {2023}, + publisher = {Taylor & Francis}, + doi = {10.1080/10485252.2023.2206919}, +} + +@Article{kim2024melt, + title = {\pkg{melt}: Multiple Empirical Likelihood Tests in \proglang{R}}, author = {Eunseop Kim and Steven N. MacEachern and Mario Peruggia}, journal = {Journal of Statistical Software}, year = {2024}, @@ -9,16 +288,305 @@ @article{kim2024melt doi = {10.18637/jss.v108.i05} } -@article{kim2023empirical, - title = {Empirical Likelihood for the Analysis of Experimental Designs}, - author = {Eunseop Kim and Steven N. MacEachern and Mario Peruggia}, - journal = {Journal of Nonparametric Statistics}, - volume = {35}, - number = {4}, - pages = {709--732}, - year = {2023}, +@Article{kitamura1997empirical, + author = {Yuichi Kitamura}, + title = {Empirical Likelihood Methods with Weakly Dependent Processes}, + volume = {25}, + journal = {The Annals of Statistics}, + number = {5}, + publisher = {Institute of Mathematical Statistics}, + pages = {2084--2102}, + doi = {10.1214/aos/1069362388}, + year = {1997}, +} + +@Article{kitamura1997information, + author = {Yuichi Kitamura and Michael Stutzer}, + journal = {Econometrica}, + number = {4}, + pages = {861--874}, + publisher = {John Wiley \& Sons}, + title = {An Information-Theoretic Alternative to Generalized Method of Moments Estimation}, + volume = {65}, + year = {1997}, + doi = {10.2307/2171942}, +} + +@Article{kitamura2004empirical, + author = {Yuichi Kitamura and Gautam Tripathi and Hyungtaik Ahn}, + title = {Empirical Likelihood-Based Inference in Conditional Moment Restriction Models}, + journal = {Econometrica}, + volume = {72}, + number = {6}, + pages = {1667--1714}, + doi = {10.1111/j.1468-0262.2004.00550.x}, + year = {2004}, +} + +@Article{knapp2019cucurbits, + title = {Cucurbits as a Model System for Crop Pollination Management}, + volume = {25}, + doi = {10.26786/1920-7603(2019)535}, + journal = {Journal of Pollination Ecology}, + author = {Jessica Louise Knapp and Juliet Laura Osborne}, + pages = {88--102}, + year = {2019}, +} + +@Article{kolaczyk1994empirical, + author = {Eric D. Kolaczyk}, + journal = {Statistica Sinica}, + number = {1}, + pages = {199--218}, + publisher = {Institute of Statistical Science, Academia Sinica}, + title = {Empirical Likelihood for Generalized Linear Models}, + volume = {4}, + year = {1994} +} + +@Article{lazar2005assessing, + author = {Nicole A Lazar}, + title = {Assessing the Effect of Individual Data Points on Inference From Empirical Likelihood}, + journal = {Journal of Computational and Graphical Statistics}, + volume = {14}, + number = {3}, + pages = {626--642}, + year = {2005}, + publisher = {Taylor \& Francis}, + doi = {10.1198/106186005x59568}, +} + +@Article{lefcheck2016piecewisesem, + title = {\pkg{piecewiseSEM}: Piecewise Structural Equation Modeling in \proglang{R} for Ecology, Evolution, and Systematics}, + journal = {Methods in Ecology and Evolution}, + volume = {7}, + number = {5}, + pages = {573-579}, + year = {2016}, + author = {Jonathan S. Lefcheck}, + doi = {10.1111/2041-210x.12512}, +} + +@Article{multcomp, + title = {Simultaneous Inference in General Parametric Models}, + author = {Torsten Hothorn and Frank Bretz and Peter Westfall}, + journal = {Biometrical Journal}, + year = {2008}, + volume = {50}, + number = {3}, + pages = {346--363}, + doi = {10.1002/bimj.200810425}, +} + +@Article{newey2004higher, + author = {Whitney K. Newey and Richard J. Smith}, + title = {Higher Order Properties of GMM and Generalized Empirical Likelihood Estimators}, + journal = {Econometrica}, + volume = {72}, + number = {1}, + pages = {219--255}, + doi = {10.1111/j.1468-0262.2004.00482.x}, + year = {2004}, +} + +@Article{nordman2014review, + title = {A Review of Empirical Likelihood Methods for Time Series}, + journal = {Journal of Statistical Planning and Inference}, + volume = {155}, + pages = {1--18}, + year = {2014}, + doi = {10.1016/j.jspi.2013.10.001}, + author = {Daniel J. Nordman and Soumendra N. Lahiri}, +} + +@Article{obregon2022pest, + doi = {10.1371/journal.pone.0267984}, + author = {Diana Obregon and Grace Pederson and Alan Taylor and Katja Poveda}, + journal = {PLOS One}, + publisher = {Public Library of Science}, + title = {The Pest Control and Pollinator Protection Dilemma: The Case of Thiamethoxam Prophylactic Applications in Squash Crops}, + year = {2022}, + volume = {17}, + pages = {1--18}, + number = {5}, +} + +@Article{owen1988empirical, + author = {Art Owen}, + title = {Empirical Likelihood Ratio Confidence Intervals for a Single Functional}, + journal = {Biometrika}, + volume = {75}, + number = {2}, + pages = {237--249}, + year = {1988}, + doi = {10.1093/biomet/75.2.237}, +} + +@Article{owen1990empirical, + author = {Art Owen}, + title = {Empirical Likelihood Ratio Confidence Regions}, + volume = {18}, + journal = {The Annals of Statistics}, + number = {1}, + publisher = {Institute of Mathematical Statistics}, + pages = {90--120}, + year = {1990}, + doi = {10.1214/aos/1176347494}, +} + +@Article{owen1991empirical, + author = {Art Owen}, + title = {Empirical Likelihood for Linear Models}, + volume = {19}, + journal = {The Annals of Statistics}, + number = {4}, + publisher = {Institute of Mathematical Statistics}, + pages = {1725--1747}, + year = {1991}, + doi = {10.1214/aos/1176348368}, +} + +@Article{qin1994empirical, + author = {Jin Qin and Jerry Lawless}, + title = {Empirical Likelihood and General Estimating Equations}, + volume = {22}, + journal = {The Annals of Statistics}, + number = {1}, + publisher = {Institute of Mathematical Statistics}, + pages = {300--325}, + year = {1994}, + doi = {10.1214/aos/1176325370}, +} + +@Article{qin1995estimating, + author = {Jing Qin and Jerry Lawless}, + title = {Estimating Equations, Empirical Likelihood and Constraints on Parameters}, + journal = {Canadian Journal of Statistics}, + volume = {23}, + number = {2}, + pages = {145--159}, + doi = {10.2307/3315441}, + year = {1995}, +} + +@Article{Rcpp, + title = {Extending \proglang{R} with \proglang{C++}: A Brief Introduction to \pkg{Rcpp}}, + author = {Dirk Eddelbuettel and James Joseph Balamuta}, + journal = {The American Statistician}, + year = {2018}, + volume = {72}, + number = {1}, + pages = {28--36}, + doi = {10.1080/00031305.2017.1375990}, +} + +@Article{RcppEigen, + title = {Fast and Elegant Numerical Linear Algebra Using the \pkg{RcppEigen} Package}, + author = {Douglas Bates and Dirk Eddelbuettel}, + journal = {Journal of Statistical Software}, + year = {2013}, + volume = {52}, + number = {5}, + pages = {1--24}, + doi = {10.18637/jss.v052.i05}, +} + +@Article{smith1997alternative, + author = {Richard J. Smith}, + title = {Alternative Semi-Parametric Likelihood Approaches to Generalised Method of Moments Estimation}, + journal = {The Economic Journal}, + volume = {107}, + number = {441}, + pages = {503--519}, + year = {1997}, + doi = {10.1111/j.0013-0133.1997.174.x}, +} + +@Article{tang2014nested, + author = {Cheng Yong Tang and Tong Tong Wu}, + title = {Nested Coordinate Descent Algorithms for Empirical Likelihood}, + journal = {Journal of Statistical Computation and Simulation}, + volume = {84}, + number = {9}, + pages = {1917--1930}, + year = {2014}, + publisher = {Taylor \& Francis}, + doi = {10.1080/00949655.2013.770514}, +} + +@Article{wang2018f, + author = {Lei Wang and Dan Yang}, + title = {$F$-Distribution Calibrated Empirical Likelihood Ratio Tests for Multiple Hypothesis Testing}, + journal = {Journal of Nonparametric Statistics}, + volume = {30}, + number = {3}, + pages = {662--679}, + year = {2018}, publisher = {Taylor & Francis}, - doi = {10.1080/10485252.2023.2206919} + doi = {10.1080/10485252.2018.1461867}, +} + +@Article{wedderburn1974quasi, + author = {R. W. M. Wedderburn}, + title = {Quasi-Likelihood Functions, Generalized Linear Models, and the Gauss-Newton Method}, + journal = {Biometrika}, + volume = {61}, + number = {3}, + pages = {439--447}, + year = {1974}, + doi = {10.1093/biomet/61.3.439}, +} + +@Article{xi2003extended, + author = {Song Xi Chen and Hengjian Cui}, + journal = {Statistica Sinica}, + number = {1}, + pages = {69--81}, + publisher = {Institute of Statistical Science, Academia Sinica}, + title = {An Extended Empirical Likelihood for Generalized Linear Models}, + volume = {13}, + year = {2003} +} + +@Article{yuan1998asymptotics, + title = {Asymptotics of Estimating Equations under Natural Conditions}, + journal = {Journal of Multivariate Analysis}, + volume = {65}, + number = {2}, + pages = {245--260}, + year = {1998}, + doi = {10.1006/jmva.1997.1731}, + author = {Ke-Hai Yuan and Robert I. Jennrich}, +} + +@Article{zhu2008diagnostic, + author = {Hongtu Zhu and Joseph G. Ibrahim and Niansheng Tang and Heping Zhang}, + title = {Diagnostic Measures for Empirical Likelihood of General Estimating Equations}, + journal = {Biometrika}, + volume = {95}, + number = {2}, + pages = {489--507}, + year = {2008}, + doi = {10.1093/biomet/asm094}, +} + +@Book{car, + title = {An \proglang{R} Companion to Applied Regression}, + edition = {3rd}, + author = {John Fox and Sanford Weisberg}, + year = {2019}, + publisher = {Sage}, + address = {Thousand Oaks}, + url = {https://socialsciences.mcmaster.ca/jfox/Books/Companion/}, +} + +@Book{ggplot2, + author = {Hadley Wickham}, + title = {\pkg{ggplot2}: Elegant Graphics for Data Analysis}, + publisher = {Springer-Verlag}, + year = {2016}, + isbn = {978-3-319-24277-4}, + doi = {10.1007/978-0-387-98141-3}, } @Book{MASS, @@ -32,11 +600,154 @@ @Book{MASS url = {https://www.stats.ox.ac.uk/pub/MASS4/}, } -@manual{melt, - title = {\pkg{melt}: Multiple Empirical Likelihood Tests}, +@Book{owen2001empirical, + title = {Empirical Likelihood}, + author = {Art Owen}, + year = {2001}, + address = {New York}, + publisher = {Chapman \& Hall/CRC}, + doi = {10.1201/9781420036152}, +} + +@Book{zhou2015empirical, + title = {Empirical Likelihood Method in Survival Analysis}, + author = {Mai Zhou}, + year = {2015}, + publisher = {Chapman \& Hall/CRC}, + doi = {10.1201/b18598}, +} + +@InBook{li2005empirical, + author = {Gang Li and Runze Li and Mai Zhou}, + title = {Empirical Likelihood in Survival Analysis}, + chapter = {Empirical Likelihood in Survival Analysis}, + booktitle = {Contemporary Multivariate Analysis And Design Of Experiments}, + pages = {337--349}, + doi = {10.1142/9789812567765_0020}, + year = {2005}, + publisher = {World Scientific}, +} + +@InProceedings{stein1956efficient, + title = {Efficient Nonparametric Testing and Estimation}, + author = {Charles Stein}, + booktitle = {Proceedings of the Third Berkeley Symposium on Mathematical Statistics and Probability}, + volume = {1}, + pages = {187--195}, + year = {1956}, + doi = {10.1525/9780520313880}, +} + +@Manual{agricolae, + title = {\pkg{agricolae}: Statistical Procedures for Agricultural Research}, + author = {Felipe {de Mendiburu}}, + year = {2023}, + note = {\proglang{R} package version 1.3-7}, + url = {https://CRAN.R-project.org/package=agricolae}, +} + +@Manual{carData, + title = {\pkg{carData}: Companion to Applied Regression Data Sets}, + author = {John Fox and Sanford Weisberg and Brad Price}, + year = {2022}, + note = {\proglang{R} package version 3.0-5}, + url = {https://CRAN.R-project.org/package=carData}, +} + +@Manual{dplyr, + title = {\pkg{dplyr}: A Grammar of Data Manipulation}, + author = {Hadley Wickham and Romain Fran\c{c}ois and Lionel Henry and Kirill + M\"uller}, + year = {2023}, + note = {\proglang{R} package version 1.1.3}, + url = {https://CRAN.R-project.org/package=dplyr}, +} + +@Manual{dqrng, + title = {\pkg{dqrng}: Fast Pseudo Random Number Generators}, + author = {Ralf Stubner}, + year = {2023}, + note = {\proglang{R} package version 0.3.1}, + url = {https://CRAN.R-project.org/package=dqrng}, +} + +@Manual{eel, + title = {\pkg{eel}: Extended Empirical Likelihood}, + author = {Fan Wu and Yu Zhang}, + year = {2015}, + note = {\proglang{R} package version 1.1}, + url = {https://CRAN.R-project.org/package=eel}, +} + +@Manual{EL, + title = {\pkg{EL}: Two-Sample Empirical Likelihood}, + author = {Janis Valeinis and Edmunds Cers}, + year = {2022}, + note = {\proglang{R} package version 1.2}, + url = {https://CRAN.R-project.org/package=EL}, +} + +@Manual{ELCIC, + title = {\pkg{ELCIC}: The Empirical Likelihood-Based Consistent Information Criterion}, + author = {Biyi Shen and Ming Wang}, + year = {2023}, + note = {\proglang{R} package version 0.2.1}, + url = {https://CRAN.R-project.org/package=ELCIC}, +} + +@Manual{elhmc, + title = {\pkg{elhmc}: Sampling from a Empirical Likelihood Bayesian Posterior of Parameters Using Hamiltonian Monte Carlo}, + author = {Dang Trung Kien and Sanjay Chaudhuri and Neo Han Wei}, + year = {2017}, + note = {\proglang{R} package version 1.1.0}, + url = {https://CRAN.R-project.org/package=elhmc}, +} + +@Manual{emplik, + title = {\pkg{emplik}: Empirical Likelihood Ratio for Censored/Truncated Data}, + author = {Mai Zhou}, + year = {2023}, + note = {\proglang{R} package version 1.3.1}, + url = {https://CRAN.R-project.org/package=emplik}, +} + +@Manual{emplik2, + title = {\pkg{emplik2}: Empirical Likelihood Ratio Test for Two Samples with Censored Data}, + author = {William H. Barton}, + year = {2022}, + note = {\proglang{R} package version 1.32}, + url = {https://CRAN.R-project.org/package=emplik2}, +} + +@Manual{ggridges, + title = {\pkg{ggridges}: Ridgeline Plots in \pkg{ggplot2}}, + author = {Claus O. Wilke}, + year = {2022}, + note = {\proglang{R} package version 0.5.4}, + url = {https://CRAN.R-project.org/package=ggridges}, +} + +@Manual{melt, + title = {\pkg{melt}: Multiple Empirical Likelihood Tests}, author = {Eunseop Kim}, - year = {2023}, - note = {\proglang{R} package version 1.10.0}, - url = {https://CRAN.R-project.org/package=melt} + year = {2023}, + note = {\proglang{R} package version 1.10.0}, + url = {https://CRAN.R-project.org/package=melt}, +} + +@Manual{momentfit, + title = {\pkg{momentfit}: Methods of Moments}, + author = {Pierre Chauss\'e}, + year = {2023}, + note = {\proglang{R} package version 0.5}, + url = {https://CRAN.R-project.org/package=momentfit}, } +@Manual{R, + title = {\proglang{R}: A Language and Environment for Statistical Computing}, + author = {{\proglang{R} Core Team}}, + organization = {\proglang{R} Foundation for Statistical Computing}, + address = {Vienna, Austria}, + year = {2023}, + url = {https://www.R-project.org/}, +} diff --git a/vignettes/references2.bib b/vignettes/references2.bib deleted file mode 100644 index 4192c63..0000000 --- a/vignettes/references2.bib +++ /dev/null @@ -1,753 +0,0 @@ -@Article{adimari2010note, - title = {A Note on the Asymptotic Behaviour of Empirical Likelihood Statistics}, - author = {Gianfranco Adimari and Annamaria Guolo}, - journal = {Statistical Methods \& Applications}, - volume = {19}, - number = {4}, - pages = {463--476}, - year = {2010}, - publisher = {Springer-Verlag}, - doi = {10.1007/s10260-010-0137-9}, -} - -@Article{benjamini1995controlling, - author = {Yoav Benjamini and Yosef Hochberg}, - title = {Controlling the False Discovery Rate: A Practical and Powerful Approach to Multiple Testing}, - journal = {Journal of the Royal Statistical Society B}, - volume = {57}, - number = {1}, - pages = {289--300}, - doi = {10.1111/j.2517-6161.1995.tb02031.x}, - year = {1995}, -} - -@Article{blackman2021scrambled, - author = {David Blackman and Sebastiano Vigna}, - title = {Scrambled Linear Pseudorandom Number Generators}, - year = {2021}, - publisher = {Association for Computing Machinery}, - address = {New York, NY, USA}, - volume = {47}, - number = {4}, - issn = {0098-3500}, - doi = {10.1145/3460772}, - journal = {ACM Transactions on Mathematical Software}, - articleno = {36}, - numpages = {32}, -} - -@Article{chambers2014object, - author = {John M. Chambers}, - title = {Object-Oriented Programming, Functional Programming and \proglang{R}}, - volume = {29}, - journal = {Statistical Science}, - number = {2}, - publisher = {Institute of Mathematical Statistics}, - pages = {167--180}, - year = {2014}, - doi = {10.1214/13-sts452}, -} - -@Article{chaudhuri2017hamiltonian, - author = {Sanjay Chaudhuri and Debashis Mondal and Teng Yin}, - title = {Hamiltonian Monte Carlo Sampling in Bayesian Empirical Likelihood Computation}, - journal = {Journal of the Royal Statistical Society B}, - volume = {79}, - number = {1}, - pages = {293--320}, - doi = {10.1111/rssb.12164}, - year = {2017}, -} - -@Article{chen2009effects, - author = {Song Xi Chen and Liang Peng and Ying-Li Qin}, - title = {Effects of Data Dimension on Empirical Likelihood}, - journal = {Biometrika}, - volume = {96}, - number = {3}, - pages = {711--722}, - year = {2009}, - doi = {10.1093/biomet/asp037}, -} - -@Article{chen2009review, - title = {A Review on Empirical Likelihood Methods for Regression}, - author = {Song Xi Chen and Van Keilegom, Ingrid}, - journal = {Test}, - volume = {18}, - number = {3}, - pages = {415--447}, - year = {2009}, - publisher = {Springer-Verlag}, - doi = {10.1007/s11749-009-0159-5}, -} - -@Article{cook1986assessment, - author = {R. Dennis Cook}, - title = {Assessment of Local Influence}, - journal = {Journal of the Royal Statistical Society B}, - volume = {48}, - number = {2}, - pages = {133--155}, - doi = {10.1111/j.2517-6161.1986.tb01398.x}, - year = {1986}, -} - -@Article{dagum1998openmp, - author = {L. Dagum and R. Menon}, - journal = {IEEE Computational Science and Engineering}, - number = {1}, - pages = {46--55}, - title = {OpenMP: An Industry Standard API for Shared-Memory Programming}, - volume = {5}, - year = {1998}, - doi = {10.1109/99.660313}, -} - -@Article{diciccio1990nonparametric, - author = {Thomas J. DiCiccio and Joseph P. Romano}, - journal = {International Statistical Review}, - number = {1}, - pages = {59--76}, - publisher = {John Wiley \& Sons}, - title = {Nonparametric Confidence Limits by Resampling Methods and Least Favorable Families}, - volume = {58}, - year = {1990}, - doi = {10.2307/1403474}, -} - -@Article{diciccio1991empirical, - author = {Thomas DiCiccio and Peter Hall and Joseph Romano}, - title = {Empirical Likelihood Is Bartlett-Correctable}, - volume = {19}, - journal = {The Annals of Statistics}, - number = {2}, - publisher = {Institute of Mathematical Statistics}, - pages = {1053--1061}, - year = {1991}, - doi = {10.1214/aos/1176348137}, -} - -@Article{dickhaus2015survey, - author = {Thorsten Dickhaus and Thomas Royen}, - title = {A Survey on Multivariate Chi-Square Distributions and Their Applications in Testing Multiple Hypotheses}, - journal = {Statistics}, - volume = {49}, - number = {2}, - pages = {427--454}, - year = {2015}, - publisher = {Taylor \& Francis}, - doi = {10.1080/02331888.2014.993639}, -} - -@Article{dickhaus2019simultaneous, - title = {Simultaneous Statistical Inference in Dynamic Factor Models: Chi-Square Approximation and Model-Based Bootstrap}, - journal = {Computational Statistics \& Data Analysis}, - volume = {129}, - pages = {30--46}, - year = {2019}, - doi = {10.1016/j.csda.2018.08.012}, - author = {Thorsten Dickhaus and Natalia Sirotko-Sibirskaya}, -} - -@Article{efron1981nonparametric, - author = {Bradley Efron}, - title = {Nonparametric Standard Errors and Confidence Intervals}, - journal = {Canadian Journal of Statistics}, - volume = {9}, - number = {2}, - pages = {139--158}, - doi = {10.2307/3314608}, - year = {1981}, -} - -@Article{glenn2007weighted, - title = {Weighted Empirical Likelihood Estimates and Their Robustness Properties}, - journal = {Computational Statistics \& Data Analysis}, - volume = {51}, - number = {10}, - pages = {5130--5141}, - year = {2007}, - doi = {10.1016/j.csda.2006.07.032}, - author = {N. L. Glenn and Yichuan Zhao}, -} - -@Article{gmm, - title = {Computing Generalized Method of Moments and Generalized Empirical - Likelihood with \proglang{R}}, - author = {Pierre Chauss\'e}, - journal = {Journal of Statistical Software}, - year = {2010}, - volume = {34}, - number = {11}, - pages = {1--35}, - doi = {10.18637/jss.v034.i11}, -} - -@Article{haber2021striped, - author = {Ariela I Haber and Anna K Wallingford and Ian M Grettenberger and Jasmin P {Ramirez Bonilla} and Amber C Vinchesi-Vahl and Donald C Weber}, - title = {Striped Cucumber Beetle and Western Striped Cucumber Beetle (Coleoptera: Chrysomelidae)}, - journal = {Journal of Integrated Pest Management}, - volume = {12}, - number = {1}, - year = {2021}, - pages = {1--10}, - doi = {10.1093/jipm/pmaa026}, -} - -@Article{hall1990methodology, - author = {Peter Hall and Barbara La Scala}, - journal = {International Statistical Review}, - number = {2}, - pages = {109--127}, - publisher = {John Wiley \& Sons}, - title = {Methodology and Algorithms of Empirical Likelihood}, - volume = {58}, - year = {1990}, - doi = {10.2307/1403462}, -} - -@Article{hansen1982large, - author = {Lars Peter Hansen}, - journal = {Econometrica}, - number = {4}, - pages = {1029--1054}, - publisher = {John Wiley \& Sons}, - title = {Large Sample Properties of Generalized Method of Moments Estimators}, - volume = {50}, - year = {1982}, - doi = {10.2307/1912775}, -} - -@Article{hansen1996finite, - author = {Lars Peter Hansen and John Heaton and Amir Yaron}, - journal = {Journal of Business \& Economic Statistics}, - number = {3}, - pages = {262--280}, - title = {Finite-Sample Properties of Some Alternative GMM Estimators}, - volume = {14}, - year = {1996}, - doi = {10.2307/1392442}, -} - -@Article{hjort2009extending, - author = {Nils Lid Hjort and Ian W. McKeague and Van Keilegom, Ingrid}, - title = {Extending the Scope of Empirical Likelihood}, - volume = {37}, - journal = {The Annals of Statistics}, - number = {3}, - publisher = {Institute of Mathematical Statistics}, - pages = {1079--1111}, - year = {2009}, - doi = {10.1214/07-aos555}, -} - -@Article{imbens1997one, - author = {Guido W. Imbens}, - title = {One-Step Estimators for Over-Identified Generalized Method of Moments Models}, - journal = {The Review of Economic Studies}, - volume = {64}, - number = {3}, - pages = {359--383}, - year = {1997}, - doi = {10.2307/2971718}, -} - -@Article{jacod2018review, - title = {A Review of Asymptotic Theory of Estimating Functions}, - author = {Jean Jacod and Michael S{\o}rensen}, - journal = {Statistical Inference for Stochastic Processes}, - volume = {21}, - number = {2}, - pages = {415--434}, - year = {2018}, - publisher = {Springer-Verlag}, - doi = {10.1007/s11203-018-9178-8}, -} - -@Article{kim2023empirical, - author = {Eunseop Kim and Steven N. MacEachern and Mario Peruggia}, - title = {Empirical Likelihood for the Analysis of Experimental Designs}, - journal = {Journal of Nonparametric Statistics}, - volume = {35}, - number = {4}, - pages = {709--732}, - year = {2023}, - publisher = {Taylor & Francis}, - doi = {10.1080/10485252.2023.2206919}, -} - -@Article{kitamura1997empirical, - author = {Yuichi Kitamura}, - title = {Empirical Likelihood Methods with Weakly Dependent Processes}, - volume = {25}, - journal = {The Annals of Statistics}, - number = {5}, - publisher = {Institute of Mathematical Statistics}, - pages = {2084--2102}, - doi = {10.1214/aos/1069362388}, - year = {1997}, -} - -@Article{kitamura1997information, - author = {Yuichi Kitamura and Michael Stutzer}, - journal = {Econometrica}, - number = {4}, - pages = {861--874}, - publisher = {John Wiley \& Sons}, - title = {An Information-Theoretic Alternative to Generalized Method of Moments Estimation}, - volume = {65}, - year = {1997}, - doi = {10.2307/2171942}, -} - -@Article{kitamura2004empirical, - author = {Yuichi Kitamura and Gautam Tripathi and Hyungtaik Ahn}, - title = {Empirical Likelihood-Based Inference in Conditional Moment Restriction Models}, - journal = {Econometrica}, - volume = {72}, - number = {6}, - pages = {1667--1714}, - doi = {10.1111/j.1468-0262.2004.00550.x}, - year = {2004}, -} - -@Article{knapp2019cucurbits, - title = {Cucurbits as a Model System for Crop Pollination Management}, - volume = {25}, - doi = {10.26786/1920-7603(2019)535}, - journal = {Journal of Pollination Ecology}, - author = {Jessica Louise Knapp and Juliet Laura Osborne}, - pages = {88--102}, - year = {2019}, -} - -@Article{kolaczyk1994empirical, - author = {Eric D. Kolaczyk}, - journal = {Statistica Sinica}, - number = {1}, - pages = {199--218}, - publisher = {Institute of Statistical Science, Academia Sinica}, - title = {Empirical Likelihood for Generalized Linear Models}, - volume = {4}, - year = {1994} -} - -@Article{lazar2005assessing, - author = {Nicole A Lazar}, - title = {Assessing the Effect of Individual Data Points on Inference From Empirical Likelihood}, - journal = {Journal of Computational and Graphical Statistics}, - volume = {14}, - number = {3}, - pages = {626--642}, - year = {2005}, - publisher = {Taylor \& Francis}, - doi = {10.1198/106186005x59568}, -} - -@Article{lefcheck2016piecewisesem, - title = {\pkg{piecewiseSEM}: Piecewise Structural Equation Modeling in \proglang{R} for Ecology, Evolution, and Systematics}, - journal = {Methods in Ecology and Evolution}, - volume = {7}, - number = {5}, - pages = {573-579}, - year = {2016}, - author = {Jonathan S. Lefcheck}, - doi = {10.1111/2041-210x.12512}, -} - -@Article{multcomp, - title = {Simultaneous Inference in General Parametric Models}, - author = {Torsten Hothorn and Frank Bretz and Peter Westfall}, - journal = {Biometrical Journal}, - year = {2008}, - volume = {50}, - number = {3}, - pages = {346--363}, - doi = {10.1002/bimj.200810425}, -} - -@Article{newey2004higher, - author = {Whitney K. Newey and Richard J. Smith}, - title = {Higher Order Properties of GMM and Generalized Empirical Likelihood Estimators}, - journal = {Econometrica}, - volume = {72}, - number = {1}, - pages = {219--255}, - doi = {10.1111/j.1468-0262.2004.00482.x}, - year = {2004}, -} - -@Article{nordman2014review, - title = {A Review of Empirical Likelihood Methods for Time Series}, - journal = {Journal of Statistical Planning and Inference}, - volume = {155}, - pages = {1--18}, - year = {2014}, - doi = {10.1016/j.jspi.2013.10.001}, - author = {Daniel J. Nordman and Soumendra N. Lahiri}, -} - -@Article{obregon2022pest, - doi = {10.1371/journal.pone.0267984}, - author = {Diana Obregon and Grace Pederson and Alan Taylor and Katja Poveda}, - journal = {PLOS One}, - publisher = {Public Library of Science}, - title = {The Pest Control and Pollinator Protection Dilemma: The Case of Thiamethoxam Prophylactic Applications in Squash Crops}, - year = {2022}, - volume = {17}, - pages = {1--18}, - number = {5}, -} - -@Article{owen1988empirical, - author = {Art Owen}, - title = {Empirical Likelihood Ratio Confidence Intervals for a Single Functional}, - journal = {Biometrika}, - volume = {75}, - number = {2}, - pages = {237--249}, - year = {1988}, - doi = {10.1093/biomet/75.2.237}, -} - -@Article{owen1990empirical, - author = {Art Owen}, - title = {Empirical Likelihood Ratio Confidence Regions}, - volume = {18}, - journal = {The Annals of Statistics}, - number = {1}, - publisher = {Institute of Mathematical Statistics}, - pages = {90--120}, - year = {1990}, - doi = {10.1214/aos/1176347494}, -} - -@Article{owen1991empirical, - author = {Art Owen}, - title = {Empirical Likelihood for Linear Models}, - volume = {19}, - journal = {The Annals of Statistics}, - number = {4}, - publisher = {Institute of Mathematical Statistics}, - pages = {1725--1747}, - year = {1991}, - doi = {10.1214/aos/1176348368}, -} - -@Article{qin1994empirical, - author = {Jin Qin and Jerry Lawless}, - title = {Empirical Likelihood and General Estimating Equations}, - volume = {22}, - journal = {The Annals of Statistics}, - number = {1}, - publisher = {Institute of Mathematical Statistics}, - pages = {300--325}, - year = {1994}, - doi = {10.1214/aos/1176325370}, -} - -@Article{qin1995estimating, - author = {Jing Qin and Jerry Lawless}, - title = {Estimating Equations, Empirical Likelihood and Constraints on Parameters}, - journal = {Canadian Journal of Statistics}, - volume = {23}, - number = {2}, - pages = {145--159}, - doi = {10.2307/3315441}, - year = {1995}, -} - -@Article{Rcpp, - title = {Extending \proglang{R} with \proglang{C++}: A Brief Introduction to \pkg{Rcpp}}, - author = {Dirk Eddelbuettel and James Joseph Balamuta}, - journal = {The American Statistician}, - year = {2018}, - volume = {72}, - number = {1}, - pages = {28--36}, - doi = {10.1080/00031305.2017.1375990}, -} - -@Article{RcppEigen, - title = {Fast and Elegant Numerical Linear Algebra Using the \pkg{RcppEigen} Package}, - author = {Douglas Bates and Dirk Eddelbuettel}, - journal = {Journal of Statistical Software}, - year = {2013}, - volume = {52}, - number = {5}, - pages = {1--24}, - doi = {10.18637/jss.v052.i05}, -} - -@Article{smith1997alternative, - author = {Richard J. Smith}, - title = {Alternative Semi-Parametric Likelihood Approaches to Generalised Method of Moments Estimation}, - journal = {The Economic Journal}, - volume = {107}, - number = {441}, - pages = {503--519}, - year = {1997}, - doi = {10.1111/j.0013-0133.1997.174.x}, -} - -@Article{tang2014nested, - author = {Cheng Yong Tang and Tong Tong Wu}, - title = {Nested Coordinate Descent Algorithms for Empirical Likelihood}, - journal = {Journal of Statistical Computation and Simulation}, - volume = {84}, - number = {9}, - pages = {1917--1930}, - year = {2014}, - publisher = {Taylor \& Francis}, - doi = {10.1080/00949655.2013.770514}, -} - -@Article{wang2018f, - author = {Lei Wang and Dan Yang}, - title = {$F$-Distribution Calibrated Empirical Likelihood Ratio Tests for Multiple Hypothesis Testing}, - journal = {Journal of Nonparametric Statistics}, - volume = {30}, - number = {3}, - pages = {662--679}, - year = {2018}, - publisher = {Taylor & Francis}, - doi = {10.1080/10485252.2018.1461867}, -} - -@Article{wedderburn1974quasi, - author = {R. W. M. Wedderburn}, - title = {Quasi-Likelihood Functions, Generalized Linear Models, and the Gauss-Newton Method}, - journal = {Biometrika}, - volume = {61}, - number = {3}, - pages = {439--447}, - year = {1974}, - doi = {10.1093/biomet/61.3.439}, -} - -@Article{xi2003extended, - author = {Song Xi Chen and Hengjian Cui}, - journal = {Statistica Sinica}, - number = {1}, - pages = {69--81}, - publisher = {Institute of Statistical Science, Academia Sinica}, - title = {An Extended Empirical Likelihood for Generalized Linear Models}, - volume = {13}, - year = {2003} -} - -@Article{yuan1998asymptotics, - title = {Asymptotics of Estimating Equations under Natural Conditions}, - journal = {Journal of Multivariate Analysis}, - volume = {65}, - number = {2}, - pages = {245--260}, - year = {1998}, - doi = {10.1006/jmva.1997.1731}, - author = {Ke-Hai Yuan and Robert I. Jennrich}, -} - -@Article{zhu2008diagnostic, - author = {Hongtu Zhu and Joseph G. Ibrahim and Niansheng Tang and Heping Zhang}, - title = {Diagnostic Measures for Empirical Likelihood of General Estimating Equations}, - journal = {Biometrika}, - volume = {95}, - number = {2}, - pages = {489--507}, - year = {2008}, - doi = {10.1093/biomet/asm094}, -} - -@Book{car, - title = {An \proglang{R} Companion to Applied Regression}, - edition = {3rd}, - author = {John Fox and Sanford Weisberg}, - year = {2019}, - publisher = {Sage}, - address = {Thousand Oaks}, - url = {https://socialsciences.mcmaster.ca/jfox/Books/Companion/}, -} - -@Book{ggplot2, - author = {Hadley Wickham}, - title = {\pkg{ggplot2}: Elegant Graphics for Data Analysis}, - publisher = {Springer-Verlag}, - year = {2016}, - isbn = {978-3-319-24277-4}, - doi = {10.1007/978-0-387-98141-3}, -} - -@Book{MASS, - title = {Modern Applied Statistics with \proglang{S}}, - author = {W. N. Venables and B. D. Ripley}, - publisher = {Springer-Verlag}, - edition = {4th}, - address = {New York}, - year = {2002}, - note = {ISBN 0-387-95457-0}, - url = {https://www.stats.ox.ac.uk/pub/MASS4/}, -} - -@Book{owen2001empirical, - title = {Empirical Likelihood}, - author = {Art Owen}, - year = {2001}, - address = {New York}, - publisher = {Chapman \& Hall/CRC}, - doi = {10.1201/9781420036152}, -} - -@Book{zhou2015empirical, - title = {Empirical Likelihood Method in Survival Analysis}, - author = {Mai Zhou}, - year = {2015}, - publisher = {Chapman \& Hall/CRC}, - doi = {10.1201/b18598}, -} - -@InBook{li2005empirical, - author = {Gang Li and Runze Li and Mai Zhou}, - title = {Empirical Likelihood in Survival Analysis}, - chapter = {Empirical Likelihood in Survival Analysis}, - booktitle = {Contemporary Multivariate Analysis And Design Of Experiments}, - pages = {337--349}, - doi = {10.1142/9789812567765_0020}, - year = {2005}, - publisher = {World Scientific}, -} - -@InProceedings{stein1956efficient, - title = {Efficient Nonparametric Testing and Estimation}, - author = {Charles Stein}, - booktitle = {Proceedings of the Third Berkeley Symposium on Mathematical Statistics and Probability}, - volume = {1}, - pages = {187--195}, - year = {1956}, - doi = {10.1525/9780520313880}, -} - -@Manual{agricolae, - title = {\pkg{agricolae}: Statistical Procedures for Agricultural Research}, - author = {Felipe {de Mendiburu}}, - year = {2023}, - note = {\proglang{R} package version 1.3-7}, - url = {https://CRAN.R-project.org/package=agricolae}, -} - -@Manual{carData, - title = {\pkg{carData}: Companion to Applied Regression Data Sets}, - author = {John Fox and Sanford Weisberg and Brad Price}, - year = {2022}, - note = {\proglang{R} package version 3.0-5}, - url = {https://CRAN.R-project.org/package=carData}, -} - -@Manual{dplyr, - title = {\pkg{dplyr}: A Grammar of Data Manipulation}, - author = {Hadley Wickham and Romain Fran\c{c}ois and Lionel Henry and Kirill - M\"uller}, - year = {2023}, - note = {\proglang{R} package version 1.1.3}, - url = {https://CRAN.R-project.org/package=dplyr}, -} - -@Manual{dqrng, - title = {\pkg{dqrng}: Fast Pseudo Random Number Generators}, - author = {Ralf Stubner}, - year = {2023}, - note = {\proglang{R} package version 0.3.1}, - url = {https://CRAN.R-project.org/package=dqrng}, -} - -@Manual{eel, - title = {\pkg{eel}: Extended Empirical Likelihood}, - author = {Fan Wu and Yu Zhang}, - year = {2015}, - note = {\proglang{R} package version 1.1}, - url = {https://CRAN.R-project.org/package=eel}, -} - -@Manual{EL, - title = {\pkg{EL}: Two-Sample Empirical Likelihood}, - author = {Janis Valeinis and Edmunds Cers}, - year = {2022}, - note = {\proglang{R} package version 1.2}, - url = {https://CRAN.R-project.org/package=EL}, -} - -@Manual{ELCIC, - title = {\pkg{ELCIC}: The Empirical Likelihood-Based Consistent Information Criterion}, - author = {Biyi Shen and Ming Wang}, - year = {2023}, - note = {\proglang{R} package version 0.2.1}, - url = {https://CRAN.R-project.org/package=ELCIC}, -} - -@Manual{elhmc, - title = {\pkg{elhmc}: Sampling from a Empirical Likelihood Bayesian Posterior of Parameters Using Hamiltonian Monte Carlo}, - author = {Dang Trung Kien and Sanjay Chaudhuri and Neo Han Wei}, - year = {2017}, - note = {\proglang{R} package version 1.1.0}, - url = {https://CRAN.R-project.org/package=elhmc}, -} - -@Manual{emplik, - title = {\pkg{emplik}: Empirical Likelihood Ratio for Censored/Truncated Data}, - author = {Mai Zhou}, - year = {2023}, - note = {\proglang{R} package version 1.3.1}, - url = {https://CRAN.R-project.org/package=emplik}, -} - -@Manual{emplik2, - title = {\pkg{emplik2}: Empirical Likelihood Ratio Test for Two Samples with Censored Data}, - author = {William H. Barton}, - year = {2022}, - note = {\proglang{R} package version 1.32}, - url = {https://CRAN.R-project.org/package=emplik2}, -} - -@Manual{ggridges, - title = {\pkg{ggridges}: Ridgeline Plots in \pkg{ggplot2}}, - author = {Claus O. Wilke}, - year = {2022}, - note = {\proglang{R} package version 0.5.4}, - url = {https://CRAN.R-project.org/package=ggridges}, -} - -@Manual{melt, - title = {\pkg{melt}: Multiple Empirical Likelihood Tests}, - author = {Eunseop Kim}, - year = {2023}, - note = {\proglang{R} package version 1.10.0}, - url = {https://CRAN.R-project.org/package=melt}, -} - -@Manual{momentfit, - title = {\pkg{momentfit}: Methods of Moments}, - author = {Pierre Chauss\'e}, - year = {2023}, - note = {\proglang{R} package version 0.5}, - url = {https://CRAN.R-project.org/package=momentfit}, -} - -@Manual{R, - title = {\proglang{R}: A Language and Environment for Statistical Computing}, - author = {{\proglang{R} Core Team}}, - organization = {\proglang{R} Foundation for Statistical Computing}, - address = {Vienna, Austria}, - year = {2023}, - url = {https://www.R-project.org/}, -} - -@Article{kim2024melt, - title = {\pkg{melt}: Multiple Empirical Likelihood Tests in \proglang{R}}, - author = {Eunseop Kim and Steven N. MacEachern and Mario Peruggia}, - journal = {Journal of Statistical Software}, - year = {2024}, - volume = {108}, - number = {5}, - pages = {1--33}, - doi = {10.18637/jss.v108.i05} -}