/* ** Confirmatory Factor Analysis (Schoenberg Oct 95) ** ** The source code below estimates the parameters of a confirmatory ** factor analysis model. Let ** ** Y = Lambda * eta + E ** ** where Y is an NxK matrix of observations on K variables, eta ** is a NxL matrix of unobserved latent variables, Lambda a KxL ** coefficient (or "loadings") matrix, and E an NxK matrix of ** unobserved residuals (or "measurement error"). Then ** ** Sigma = Lambda * Phi * Lambda' + Theta ** ** where Sigma is the moment matrix of the variables in Y, Phi is ** moment matrix of the latent variables, and Theta is the moment ** matrix of the residuals or measurement errors. If we assume ** the variables are measured as deviations from their means (an ** assumption without consequence to the estimation), the moment ** matrices are all covariance matrices. ** ** The elements of Lambda, Phi, and Theta may be estimated by ** assuming multinormality of the variables in Y, eta, and E. ** When this rather strong assumption seems untenable, the ** PML or pseudo-maximum likelihood standard errors are recommended ** (see Arminger and Schoenberg, 1989, "Pseudo maximum likelihood ** estimation and a test for misspecification in mean- and covariance- ** structure models", _Psychometrika_, 54: 409-425). ** ** For particular applications, you must supply three procs, putPars, ** putPhi, and putTheta which construct Lambda, Phi, and Theta given, ** the vector of parameters, and a vector, numPars, with three elements ** indicating the number of parameters to be estimated in each matrix. ** CML does the rest. ** ** The example below illustrates constraining Phi and Theta to be ** positive definite, and the loadings as well to be positive. ** Positive definite Phi and Theta are necessary constraints. ** Positivity constraints on loadings are generally applicable. */ library cml; #include cml.ext; cmlset; output file = cfa.out reset; /* ** Simulation of the data */ rndseed 54897; pars = { .7, .7, 1.5, .3, 1.5, .5, .5, .5, .5 }; lambda = putLambda(pars,1); phi = putPhi(pars); theta = putTheta(pars); sigma = lambda * phi * lambda' + theta; dat = rndn(100,4) * chol(sigma); /* ** PutLambda, PutPhi, PutTheta - enters the parameters in the ** parameter vector into appropriate locations in the model matrices */ proc PutLambda(b,n); local lambda; lambda = zeros(4,2); if n; /* Inserts a 1 for normalization for "reference indicators" */ /* in function calculation. For derivatives n == 0, i.e., */ /* the ones are not inserted for derivatives. */ lambda[1,1] = 1; lambda[3,2] = 1; endif; lambda[2,1] = b[1]; lambda[4,2] = b[2]; retp(lambda); endp; proc PutPhi(b); local phi; phi = xpnd(b[3:5]); retp(phi); endp; proc PutTheta(b); local theta; theta = eye(4).*b[6:9]; retp(theta); endp; /* ** numPars - vector indicating the number of parameters to be ** estimated in each model matrix */ numPars = { 2, 3, 4 }; /* ** start values and labels for parameters */ start = { .6, .6, 1, .7, 1, .6, .6, .6, .6 }; _cml_ParNames = { LA_21, LA_42, PH_11, PH_12, PH_22, TH_11, TH_22, TH_33, TH_44 }; /* ** constrain Phi and Theta to be positive definite. Since Theta ** is a diagonal matrix in this example, the constraint is ** equivalent to constraining the diagonal elements to be positive, ** and thus a simpler implementation would be to deal with it in ** _cml_Bounds. However, because Theta is not always diagonal, ** the constraint is left in here for illustrative purposes. */ proc ineqp(b); local phi, theta, c; phi = putPhi(b); theta = putTheta(b); c = eigh(phi)|eigh(theta) - .0001; retp(c); endp; _cml_IneqProc = &ineqp; /* ** constrain loadings to be positive */ _cml_Bounds = { .0001 100, .0001 100, .0001 100, -100 100, .0001 100, .0001 100, .0001 100, .0001 100, .0001 100 }; { b1,f1,g1,cov1,ret1 } = cml(dat,0,&lpr,start); __title = "Unconstrained Wald C.L.'s"; cl1 = cmltlimits(b1,cov1); call cmlclprt(b1,f1,g1,cl1,ret1); print; print "A nonzero Lagrangean below means that the constraints"; print "on the positive definiteness of Phi and Theta are active"; print; print vread(_cml_Lagrange,"nlinineq"); _cml_FeasibleTest = 0; /* turns off feasibility test for line search */ __tol = 1e-3; /* sets tolerance for 3 significant places */ __title = "Constrained Wald C.L.'s"; __output = 10; _cml_GradProc = &grd; _cml_HessProc = &hss; cl2 = cmlclimits(b1,cov1); call cmlclprt(b1,f1,g1,cl2,ret1); output off; /* ** PROCEDURES */ proc lpr(b,x); /* FUNCTION */ local lambda,phi,theta,sigma,oldt,si,l0; lambda = putLambda(b,1); phi = putPhi(b); theta = putTheta(b); retp(lpdfn2(x,lambda*phi*lambda'+theta)); endp; proc grd(b,x); /* GRADIENT */ local lambda,phi,theta,sigma,oldt,si; local dsigma,dlambda, n, i, k, d, x1, g; n = rows(b); g = zeros(rows(x),n); lambda = putLambda(b,1); phi = putPhi(b); theta = putTheta(b); oldt = trapchk(1); trap 1,1; si = invpd(lambda*phi*lambda' + theta); trap oldt,1; if scalmiss(si); retp(error(0)); endif; k = 1; i = 1; do until i > numPars[1]; dlambda = putLambda(mu(k,n),0); dsigma = si*(dlambda*phi*lambda' + lambda*phi*dlambda'); g[.,k] = sumc( ((x*dsigma*si).*x)' ) - sumc(diag(dsigma)); i = i + 1; k = k + 1; endo; i = 1; do until i > numPars[2]; dsigma = si*(lambda*putPhi(mu(k,n))*lambda'); g[.,k] = sumc( ((x*dsigma*si).*x)' ) - sumc(diag(dsigma)); i = i + 1; k = k + 1; endo; i = 1; do until i > numPars[3]; dsigma = si*putTheta(mu(k,n)); g[.,k] = sumc( ((x*dsigma*si).*x)' ) - sumc(diag(dsigma)); i = i + 1; k = k + 1; endo; retp(0.5*g); endp; proc hss(b,x); /* HESSIAN */ local lambda,phi,theta,sigma,dsigma,oldt,si,h,i,j,n,cnum; local dsigmai, dsigmaj, W; n = rows(b); h = zeros(n,n); lambda = putLambda(b,1); phi = putPhi(b); theta = putTheta(b); oldt = trapchk(1); trap 1,1; si = invpd(lambda*phi*lambda' + theta); trap oldt,1; if scalmiss(si); retp(error(0)); endif; cnum = cumsumc(numPars); /* lambda, lambda */ i = 1; do until i > cnum[1]; W = putLambda(mu(i,n),0)*phi*lambda'; dsigmai = si*(W + W'); j = 1; do until j > i; W = putLambda(mu(j,n),0)*phi*lambda'; dsigmaj = si*(W + W'); h[i,j] = sumc(sumc(dsigmai.*dsigmaj')); if i /= j; h[j,i] = h[i,j]; endif; j = j + 1; endo; i = i + 1; endo; /* phi, lambda */ i = cnum[1] + 1; do until i > cnum[2]; dsigmai = si*(lambda*putPhi(mu(i,n))*lambda'); j = 1; do until j > cnum[1]; W = putLambda(mu(j,n),0)*phi*lambda'; dsigmaj = si*(W + W'); h[i,j] = sumc(sumc(dsigmai.*dsigmaj')); if i /= j; h[j,i] = h[i,j]; endif; j = j + 1; endo; i = i + 1; endo; /* phi, phi */ i = cnum[1] + 1; do until i > cnum[2]; dsigmai = si*lambda*putPhi(mu(i,n))*lambda'; j = cnum[1] + 1; do until j > i; dsigmaj = si*lambda*putPhi(mu(j,n))*lambda'; h[i,j] = sumc(sumc(dsigmai.*dsigmaj')); if i /= j; h[j,i] = h[i,j]; endif; j = j + 1; endo; i = i + 1; endo; /* psi, lambda */ i = cnum[2] + 1; do until i > cnum[3]; dsigmai = si*putTheta(mu(i,n)); j = 1; do until j > cnum[1]; W = putLambda(mu(j,n),0)*phi*lambda'; dsigmaj = si*(W + W'); h[i,j] = sumc(sumc(dsigmai.*dsigmaj')); if i /= j; h[j,i] = h[i,j]; endif; j = j + 1; endo; i = i + 1; endo; /* psi, phi */ i = cnum[2] + 1; do until i > cnum[3]; dsigmai = si*putTheta(mu(i,n)); j = cnum[1] + 1; do until j > cnum[2]; dsigmaj = si*lambda*putPhi(mu(j,n))*lambda'; h[i,j] = sumc(sumc(dsigmai.*dsigmaj')); if i /= j; h[j,i] = h[i,j]; endif; j = j + 1; endo; i = i + 1; endo; /* psi, psi */ i = cnum[2] + 1; do until i > cnum[3]; dsigmai = si*putTheta(mu(i,n)); j = cnum[2] + 1; do until j > i; dsigmaj = si*putTheta(mu(j,n)); h[i,j] = sumc(sumc(dsigmai.*dsigmaj')); if i /= j; h[j,i] = h[i,j]; endif; j = j + 1; endo; i = i + 1; endo; retp(-0.5*rows(x)*h); endp; proc mu(i,k); retp(seqa(1,1,k).==i); endp; proc lpdfn2(x,s); local si, oldt, lndet; if cols(x) /= cols(s); if not trapchk(1); errorlog "ERROR: covariance matrix not conformable"; end; else; retp(error(0)); endif; endif; if cols(x) == 1; retp(-.918938533204672741 - 0.5 * ( ln(s) + x.*x / s ) ); endif; if rows(s) /= cols(s); if not trapchk(1); errorlog "ERROR: covariance matrix not square"; end; else; retp(error(0)); endif; endif; oldt = trapchk(1); trap 1,1; si = invpd(s); trap oldt,1; if scalmiss(si); if not trapchk(1); errorlog "ERROR: covariance matrix not positive definite"; end; else; retp(error(0)); endif; endif; lndet = ln(detl); retp(-0.5*(rows(s)*1.83787706640934548 + lndet + sumc(((x*si).*x)'))); endp;