Ho avuto la stessa domanda. Di seguito ho modificato una funzione del pacchetto mfx per calcolare gli effetti marginali utilizzando i dati organizzati come oggetto sondaggio. Non ho fatto molto, principalmente sostituendo "mean()" e comandi simili progettati per essere eseguiti su dati non di rilievo con gli equivalenti del pacchetto di rilevamento. Dopo il codice mfx modificato, c'è un codice che esegue un esempio.
Sfondo
dettagli sulla confezione mfx da Alan Fernihough: https://cran.r-project.org/web/packages/mfx/mfx.pdf
codice per il pacchetto mfx su github (i file che ho modificato sono probitmfxest.r e probitmfx.r): https://github.com/cran/mfx/tree/master/R
Nel calcolatore mfx, ho commentato un sacco della flessibilità incorporata nelle funzioni originali che gestivano diverse ipotesi su cluster e SE robusti. Potrei sbagliarmi, ma penso che questi problemi siano già spiegati utilizzando il comando di stima della regressione dal pacchetto sondaggio, svyglm().
effetti marginali calcolatrice
library(survey)
probitMfxEstSurv <-
function(formula,
design,
atmean = TRUE,
robust = FALSE,
clustervar1 = NULL,
clustervar2 = NULL,
start = NULL
# control = list() # this option is found in the original mfx package
){
if(is.null(formula)){
stop("model formula is missing")
}
for(i in 1:length(class(design))){
if(!((class(design)[i] %in% "survey.design2") | (class(design)[i] %in% "survey.design"))){
stop("design arguement must contain survey object")
}
}
# from Fernihough's original mfx function
# I dont think this is needed because the
# regression computed by the survey package should
# take care of stratification and robust SEs
# from the survey info
#
# # cluster sort part
# if(is.null(clustervar1) & !is.null(clustervar2)){
# stop("use clustervar1 arguement before clustervar2 arguement")
# }
# if(!is.null(clustervar1)){
# if(is.null(clustervar2)){
# if(!(clustervar1 %in% names(data))){
# stop("clustervar1 not in data.frame object")
# }
# data = data.frame(model.frame(formula, data, na.action=NULL),data[,clustervar1])
# names(data)[dim(data)[2]] = clustervar1
# data=na.omit(data)
# }
# if(!is.null(clustervar2)){
# if(!(clustervar1 %in% names(data))){
# stop("clustervar1 not in data.frame object")
# }
# if(!(clustervar2 %in% names(data))){
# stop("clustervar2 not in data.frame object")
# }
# data = data.frame(model.frame(formula, data, na.action=NULL),
# data[,c(clustervar1,clustervar2)])
# names(data)[c(dim(data)[2]-1):dim(data)[2]] = c(clustervar1,clustervar2)
# data=na.omit(data)
# }
# }
# fit the probit regression
fit = svyglm(formula,
design=design,
family = quasibinomial(link = "probit"),
x=T
)
# TS: summary(fit)
# terms needed
x1 = model.matrix(fit)
if (any(alias <- is.na(coef(fit)))) { # this conditional removes any vars with a NA coefficient
x1 <- x1[, !alias, drop = FALSE]
}
xm = as.matrix(svymean(x1,design)) # calculate means of x variables
be = as.matrix(na.omit(coef(fit))) # collect coefficients: be as in beta
k1 = length(na.omit(coef(fit))) # collect number of coefficients or x variables
xb = t(xm) %*% be # get the matrix product of xMean and beta, which is the model prediction at the mean
fxb = ifelse(atmean==TRUE, dnorm(xb), mean(dnorm(x1 %*% be))) # collect either the overall predicted mean, or the average of every observation's predictions
# get variances
vcv = vcov(fit)
# from Fernihough's original mfx function
# I dont think this is needed because the
# regression computed by the survey package should
# take care of stratification and robust SEs
# from the survey info
#
# if(robust){
# if(is.null(clustervar1)){
# # white correction
# vcv = vcovHC(fit, type = "HC0")
# } else {
# if(is.null(clustervar2)){
# vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=NULL)
# } else {
# vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=clustervar2)
# }
# }
# }
#
# if(robust==FALSE & is.null(clustervar1)==FALSE){
# if(is.null(clustervar2)){
# vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=NULL)
# } else {
# vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=clustervar2)
# }
# }
# set mfx equal to predicted mean (or other value) multiplied by beta
mfx = data.frame(mfx=fxb*be, se=NA)
# get standard errors
if(atmean){# fxb * id matrix - avg model prediction * (beta X xmean)
gr = as.numeric(fxb)*(diag(k1) - as.numeric(xb) *(be %*% t(xm)))
mfx$se = sqrt(diag(gr %*% vcv %*% t(gr)))
} else {
gr = apply(x1, 1, function(x){
as.numeric(as.numeric(dnorm(x %*% be))*(diag(k1) - as.numeric(x %*% be)*(be %*% t(x))))
})
gr = matrix(apply(gr,1,mean),nrow=k1)
mfx$se = sqrt(diag(gr %*% vcv %*% t(gr)))
}
# pick out constant and remove from mfx table
temp1 = apply(x1,2,function(x)length(table(x))==1)
const = names(temp1[temp1==TRUE])
mfx = mfx[row.names(mfx)!=const,]
# pick out discrete change variables
temp1 = apply(x1,2,function(x)length(table(x))==2)
disch = names(temp1[temp1==TRUE])
# calculate the disctrete change marginal effects and standard errors
if(length(disch)!=0){
for(i in 1:length(disch)){
if(atmean){
disx0 = disx1 = xm
disx1[disch[i],] = max(x1[,disch[i]])
disx0[disch[i],] = min(x1[,disch[i]])
# mfx equal to prediction @ x=1 minus prediction @ x=0
mfx[disch[i],1] = pnorm(t(be) %*% disx1) - pnorm(t(be) %*% disx0)
# standard errors
gr = dnorm(t(be) %*% disx1) %*% t(disx1) - dnorm(t(be) %*% disx0) %*% t(disx0)
mfx[disch[i],2] = sqrt(gr %*% vcv %*% t(gr))
} else {
disx0 = disx1 = x1
disx1[,disch[i]] = max(x1[,disch[i]])
disx0[,disch[i]] = min(x1[,disch[i]])
mfx[disch[i],1] = mean(pnorm(disx1 %*% be) - pnorm(disx0 %*% be))
# standard errors
gr = as.numeric(dnorm(disx1 %*% be)) * disx1 - as.numeric(dnorm(disx0 %*% be)) * disx0
avegr = as.matrix(colMeans(gr))
mfx[disch[i],2] = sqrt(t(avegr) %*% vcv %*% avegr)
}
}
}
mfx$discretechgvar = ifelse(rownames(mfx) %in% disch, 1, 0)
output = list(fit=fit, mfx=mfx)
return(output)
}
probitMfxSurv <-
function(formula,
design,
atmean = TRUE,
robust = FALSE,
clustervar1 = NULL,
clustervar2 = NULL,
start = NULL
# control = list() # this option is found in original mfx package
)
{
# res = probitMfxEstSurv(formula, design, atmean, robust, clustervar1, clustervar2, start, control)
res = probitMfxEstSurv(formula, design, atmean, robust, clustervar1, clustervar2, start)
est = NULL
est$mfxest = cbind(dFdx = res$mfx$mfx,
StdErr = res$mfx$se,
z.value = res$mfx$mfx/res$mfx$se,
p.value = 2*pt(-abs(res$mfx$mfx/res$mfx$se), df = Inf))
colnames(est$mfxest) = c("dF/dx","Std. Err.","z","P>|z|")
rownames(est$mfxest) = rownames(res$mfx)
est$fit = res$fit
est$dcvar = rownames(res$mfx[res$mfx$discretechgvar==1,])
est$call = match.call()
class(est) = "probitmfx"
est
}
Esempio
# initialize sample data
nObs = 100
x1 = rbinom(nObs,1,.5)
x2 = rbinom(nObs,1,.3)
#x3 = rbinom(100,1,.9)
x3 = runif(nObs,0,.9)
id = 1:nObs
w1 = sample(c(10,50,100),nObs,replace=TRUE)
# dependnt variables
ystar = x1 + x2 - x3 + rnorm(nObs)
y = ifelse(ystar>0,1,0)
# set up data frame
data = data.frame(id, w1, x1, x2, x3, ystar, y)
# initialize survey
survey.design <- svydesign(ids = data$id,
weights = data$w1,
data = data)
mean(data$x2)
sd(data$x2)/(length(data$x2))^0.5
svymean(x=x2,design=survey.design)
probit = svyglm(y~x1 + x2 + x3, design=survey.design, family=quasibinomial(link='probit'))
summary(probit)
probitMfxSurv(formula = y~x1 + x2 + x3, design = survey.design)
http://r-survey.r-forge.r-project.org/survey/html/marginpred.html –