/* ** MBAYES - Bayesian Inference from Weighted Likelihood Bootstrap ** ** written by Ronald Schoenberg April 10, 1995 ** RJS Software ** rons@u.washington.edu ** ** This program is in the public domain. While the author disclaims ** any responsibility for the performance of this software, he ** would appreciate receiving any comments. ** *************************************************************************** ** ** PROC MBAYES ** ** This program requires the GAUSS Maximum Likelihood (MAXLIK 4.0) ** Application module. It uses the weighted maximum likelihood ** feature together with an SIR, a Sample-Importance-Resample ** method, to generate a simulation of the Bayesian posterior ** distribution of the parameters. This method is described in ** M.A. Newton and A. Raftery's paper, "Approximate Bayesian ** Inference with the Weighted Likelihood Bootstrap", J.R.Statist. ** Soc.B. (56):3-48, 1994. ** ** MBAYES generates a GAUSS data set containing a simulated ** posterior disribution of the parameters. It first produces ** a bootstrap sample, calling MAXLIK "nsamp" times using Dirichlet ** weights. Then this sample is re-sampled using weights ** computed from the log-likelihoods times the prior divided by ** a multivariate kernel density estimate of the parameters. ** This re-sampled data set is stored in the GAUSS data set. ** ** In this example "nsamp" is set to 300. This is probably fine for ** simple models like the one in the example. However, the more ** nonlinear, the more re-samplings required. For a nonlinear model ** "nsamp" may need to be set as high as 1000. ** ** There is an "alpha" parameter that is set near the top of MBAYES. ** See Newton and Raftery (op.cit. page 44) for a discussion of setting ** this parameter. The goal is to disperse the Dirichlet random ** variates enough to cover the posterior, but not too much. Values ** less than 1 are under-dispersed, and values greater than 1 are ** over-dispersed. ** ** For the analysis of the posterior distribution of the parameters, ** the MAXLIK functions, MAXDensity, MAXBLimits, and MAXHist, may be ** called to generate univariate kernel density plots, confidence ** limits, univariate histograms, and bivariate surface plots of ** the parameters from the GAUSS data set of posterior distribution ** of the parameters. ** ** A prior density may be specified by the user using a proc that ** returns the prior probability given a vector of parameters. ** ** For information about MAXLIK, contact Aptech Systems at info@aptech.com, ** or call +1-206-432-7855, or fax +1-206-432-7832. ** ** To execute the example, put this file into a directory, then from ** DOS type ** ** gauss mbayes.src ** ** or from the GAUSS command line while in that directory: ** ** run mbayes.src ** ** For general use, strip the example from the file, put into the ** (GAUSSHOME)/src subdirectory, and put the entry ** ** mbayes.src ** mbayes : proc ** ** into the cml.lcg library file. */ library maxlik; #include maxlik.ext; maxset; rndseed 4577889; /* ** First, simulate some data */ nobs = 100; x = rndn(nobs,2) + 2; cr = { 1 -.7, -.7 1 }; /* let's correlate the independent variables */ x = ones(nobs,1)~(x*chol(cr)); b = { 1, 1, 1 }; /* coefficients */ y = rndp(nobs,1,x*b+rndn(nobs,1)); /* RNDP generates Poisson variates */ proc lpsn(b0,z); /* a Poisson regression likelihood function */ local m; m = z[.,2:cols(z)]*b0; retp(z[.,1].*m-exp(m)); endp; z = y~x; /* input data set */ vars = 0; /* select all variables from input data set */ start = b; /* start values */ _max_ParNames = { B0, B1, B2 }; /* ** This call to MAXLIK is just to generate confidence limits ** from the standard errors for comparision. */ { b1,f1,g1,cov1,ret1 } = MAXLIK(z,vars,&lpsn,start); output file = psn.out reset; cl1 = MAXTlimits(b1,cov1); print; print "Confidence Limits from Standard Errors"; print; call printfmt(_max_ParNames~cl1,0~1~1); /* ** This call to MBAYES generates a GAUSS dataset with the filename ** in containing the simulated posterior distribution of ** the parameters. */ nsamp = 300; /* size of weighted bootstrap */ fname = "pbayes"; /* data set name for posterior data set */ prior = 0; /* no prior */ _max_MaxIters = 20; /* skips any run that gets bogged down */ call MBAYES(z,vars,&lpsn,start,nsamp,fname,prior); /* ** The call to MAXBlimits produces confidence limits from ** the posterior data set stored in . Calling MAXDensity ** with as an argument would produce kernel density plots. */ cl2 = MAXBlimits(fname); print; print; print "Bayesian Confidence Limits"; print; call printfmt(_max_ParNames~cl2,0~1~1); output off; end; /* ** Here is the output from this run ** ** Confidence Limits from Standard Errors ** ** B0 0.17239183 0.93513768 ** B1 0.060472679 0.40203863 ** B2 0.079390976 0.40084091 ** ** ** Bayesian Confidence Limits ** ** B0 0.15302745 0.94491687 ** B1 0.052377254 0.40671607 ** B2 0.12806375 0.41115856 ** */ /**************************************************************************** ** ** PROC MBAYES ** ** FORMAT ** MBAYES(dataset,vars,&fct,start,numsamp,ofname,&prior) ** ** INPUT ** ** dataset - string containing name of GAUSS data set, or ** name of data matrix stored in memory ** ** vars - character vector of labels selected for analysis, or ** numeric vector of column numbers in data set ** of variables selected for analysis ** ** fct - the name of the log-likelihood procedure. It has ** two input arguments, first, the vector of parameters, ** second, a matrix of observations, and one output argument, ** a vector of log-likelihoods computed for each of the ** observations given the parameters ** ** start - a Kx1 vector of start values ** ** numsamp - scalar, the number of re-samples for the weighted ** likelihood bootstrap ** ** ofname - string containing name of a GAUSS data set which ** will contain the simulated posterior distribution ** of the parameters. ** ** prior - the name of a procedure that returns the prior ** probability given parameters. It has one input argument, ** the vector parameters, and one output argument, a scalar ** probability. ** ** ** GLOBALS ** ** MAXLIK globals are relevant. See MAXLIK.SRC for their description. */ #include maxlik.ext; proc (0) = mbayes(dataset,var,lfct,start,nsamp,ofname,prior); local x,f,g,h,retcode,isPrior,alpha,oldt,covpar,lg,ii,jj, nobs,tt0,fhandle,fout,mm,mn,iter,title0,i,j, np,fhat,terrell,num,ch,datx,datf; alpha = .7; /* alpha can be any number greater than zero. */ /* Greater than one is platykurtic and less than */ /* is leptokurtic */ if prior /= 0; local prior:proc; isPrior = 1; else; isPrior = 0; endif; if type(dataset) == 13; if dataset $/= ""; open fhandle = ^dataset; if fhandle == -1; errorlog dataset $+ " could not be opened"; retp(start,error(0),error(0),error(0),error(34)); endif; nobs = rowsf(fhandle); fhandle = close(fhandle); else; errorlog "data set not specified"; retp(start,error(0),error(0),error(0),error(34)); endif; else; nobs = rows(dataset); endif; iter = 1; datx = zeros(nsamp,rows(start)); datf = zeros(nsamp,1); tt0 = ftos(nsamp,"%0*.*lf",1,0); title0 = __title; covpar = _max_CovPar; _max_CovPar = 0; clear mn; do until iter > nsamp; __weight = (-ln(rndu(nobs,1)))^alpha; __title = title0 $+ " - " $+ ftos(iter,"%0*.*lf",1,0) $+ " of " $+ tt0 $+ " -"; { x,f,g,h,retcode } = maxlik(dataset,var,lfct,start); if retcode == 0; datx[iter,.] = x'; datf[iter] = f; mn = mn + x; start = mn / iter; else; datx[iter,.] = miss(zeros(1,cols(datx)),0); datf[iter] = error(0); endif; iter = iter + 1; endo; datx = packr(datx); datf = packr(datf); _max_CovPar = covpar; mm = vcx(datx); np = rows(start); terrell = ( ((np+8)^((np+6)/2)) / (16*rows(datx)*(np+2)*gamma((np+8)/2)) )^(2/(np+4)); oldt = trapchk(1); trap 1,1; ch = inv(chol(mm*terrell)); trap oldt,1; if scalmiss(ch); errorlog "estimated covariance matrix not positive definite"; retp; endif; cls; locate 1,1; ii = 19; jj = 1; printdos "Computing weights "; fhat = zeros(rows(datx),1); i = 1; do until i > rows(datx); if ii > 78; ii = 1; jj = jj + 1; else; ii = ii + 1; endif; locate jj,ii; printdos "."; if isPrior; fhat[i] = 1 / sumc(exp(sumc((-.5 * ((datx[i,.] - datx)*ch)^2)')) / (rows(datx)*datf[i])*prior(datx[i,.])); else; fhat[i] = 1 / sumc(exp(sumc((-.5 * ((datx[i,.] - datx)*ch)^2)')) / (rows(datx)*datf[i])); endif; i = i + 1; endo; fhat = fhat / sumc(fhat); num = sumc(fhat); create fout = ^ofname with PAR_,rows(start),8; cls; locate 1,1; ii = 18; jj = 1; printdos "Writing data set "; i = 1; do until i > rows(fhat); if ii > 78; ii = 1; jj = jj + 1; else; ii = ii + 1; endif; locate jj,ii; printdos "."; num = _rndp(fhat[i]*rows(datx)); j = 1; do until j > num; call writer(fout,datx[i,.]); j = j + 1; endo; i = i + 1; endo; fout = close(fout); cls; endp; /* ** _rndp - Poisson random number generator ** ** r - desired rows of result ** c - desired cols of result ** l - expected value(s) of Poisson variates ** if 1x1 scalar, used for all variates ** if rx1 vector, row of l used for each row of result ** if cx1 vector, col of l used for each col of result ** if rxc matrix, element of l used for each element of result */ proc rndp(r,c,l); local i,j,z,l0; z = zeros(r,c); i = 1; do until i > r; j = 1; do until j > c; if cols(l) == c and rows(l) == r; l0 = l[i,j]; elseif rows(l) == r; l0 = l[i,.]; elseif cols(l) == c; l0 = l[.,j]; else; l0 = l[1,1]; endif; z[i,j] = _rndp(l0); j = j + 1; endo; i = i + 1; endo; retp(z); endp; proc _rndp(l); local x,r,r0; x = 0; r0 = rndu(1,1); r = exp(-l); do while r < R0; r0 = r0 - r; x = x + 1; r = r*l/x; endo; retp(x); endp;