function [OUT,cov,DAT] = sar(y,X,W,vnames,eigvals)

%SAR computes Simultaneous AutoRegression of y on X given weights W 
%    using the multivariate FMINS procedure in MATLAB.

%Written by: TONY E. SMITH, 4/10/98 (Modified: 3/25/06)

%NOTE: P-values are only reported if StatToolbox is running.


%INPUTS: (i)   y = n-vector of yi values
%        (ii)  X = (nxk)-matrix of explanatory variables
%                  (Xi1,..,Xik) [NOT including unit vector]
%        (iii) W = (nxn)-matrix of spatial weights
%        (iv)  vnames = (optional) names of variables in X. If the variables 
%              are say "altitude" and "temperature", then set:
%              vnames = strvcat('altitude','temperature');For the 
%              case of a single variable, say "altitude", just 
%              write 'altitude'.
%        (v)   eigvals (optional) = vector if eigenvalues of W.
%              [NOTE: for large W-matrices it pays to compute
%               eigenvalues ONCE and then use these as inputs]

%OUTPUT: OUT = nX6 matrix with columns:
%        OUT(:,1) = SAR predicted yHats .
%        OUT(:,2) = Observed y values .
%        OUT(:,3) = SAR residual vector, r .
%        OUT(:,4) = Weighted residuals, W*r .
%        OUT(:,5) = OLS residuals, r0 .
%        OUT(:,6) = Weighted OLS residuals, W*r0 .
%        OUT(:,7) = Transformed Residual, B*r.
%
%        cov = Asymptotic covariance matrix
%
%        DAT = [B*y,B*X] = transformed data
%               
%        NOTE: The residual vector, r, is for the reduced
%              OLS model, B*y = B*X*b + e. Hence, a comparison
%              of the regressions, (r0 on W*r0) and (r on W*r),
%              indicates the degree to which the SAR model has
%              reduced (W-type) spatial autocorrelation.
%
%
%SCREEN OUTPUT:
%
%         TABLE1: (REGRESSION RESULTS)
%                 beta coeffs plus z-values for significance tests
%                 (If MATLAB Statistics Toolbox is present, the P-values
%                  are also displayed as 'PROB').
%
%         TABLE2: (GOODNESS OF FIT)
%                 Ps_R2 = Pseudo R-Square (in reduced model)
%                 Sq-Corr = squared correlation coefficient
%                 Lik = value of log likelihood
%
%         TABLE3: (AUTOCORRELATION PARAMETER)
%                 Rho estimate plus z-value for significance tests
%                 (If MATLAB Statistics Toolbox is present, the P-values
%                  are also displayed as 'PROB').
%
%         TABLE4: (TESTS OF SPATIAL AUTOCORRELATION MODEL)
%                 LR  = likelihood ratio for spatial autocorrelation
%                 Com-LR = likelihood ratio for common-factor hyp.

%PLOT OUTPUT: See the program SAR_PLOT to plot output.

%dbstop if error;

t0 = clock;  %start timer

[n,k] = size(X) ;

%Augment X with unit vector

X0 = X ;

clear('X') ;

X(:,1) = ones(n,1) ;

X(:,2:k+1) = X0 ;

%Initial OLS estimation of beta

b = X\y ;

r0 = y - X * b ; %OLS residuals

var0 = (1/n)*r0'*r0 ; %OLS error variance

%Eigen values

if nargin == 5
   
   val = real(eigvals);
   
   v0 = min(val);
   
   v1 = max(val);
   
elseif nargin < 5
   
   val = real(eig(full(W)));

	[II,JJ,V] = find(val); %find nonzero values

	v0 = min(V);

	v1 = max(V) ;
 
end

if (exist('V'))&(isempty(V))
    
    low = -.99999;
    
    high = .99999;
    
else

	if v0 ~= 0
        
        low = 1/(-abs(v0)) ;
        
    else 
        
        low = 0;
        
    end

    high = 1/v1 ;
    
end


%Estimate rho and beta

criterion = .0001; %internal convergence critirion
itermax = 100;     %internal iteration limit
converge = 1;

econverge = r0;
iter = 1;
e = r0;

while (converge > criterion & iter < itermax) 
    
    %The strategy here is to fix beta and solve for rho, then fix rho
    %and solve for beta. So fminbnd is used many times.
   
   rho = fminbnd('sar_lik',low,high,[],low,high,val,W,e,X) ;
   
	B = speye(n) - rho * sparse(W) ;

	b = inv(X'*B'*B*X)*X'*B'*B*y;

	e = y - X * b ;  
   
   converge = sum(abs(e - econverge));

	econverge = e ;

	iter = iter + 1;

end %end estimation loop

if (iter == itermax)
   
   error('No convergence within 100 iterations');
   
end

%Compute Log Likelihood 

var = (1/n) * e'*B'*B*e ; %Final residual variance

u = ones(length(val),1) ;

LogDet = u'*log(1 - rho*val) ;

L = -(n/2)*(1 + log(2*pi) + log(var)) + LogDet ;

LR = n * (log(var0) - log(var)) + 2 * LogDet ;


%COMMON FACTOR HYPOTHESIS

XX(:,1:k+1) = X ;

XX(:,(k+2):(2*k)+1) = W*X0 ; %Note that X0 is used to avoid (1,W*1) singularity

if rank(XX'*XX) < (2*k)+1
    
    S = inv(diag(max(abs(XX))));
    
    XX = XX*S;
    
end
         
   M = eye(n) - XX*inv(XX'*XX)*XX' ;
   
   e0 = M*y ;
   
   e1 = M*(W*y) ;
   
   z = fminbnd('sp_lag_lik',low,high,[],low,high,val,e0,e1,n) ; 
   
   
   %Compute Log Likelihood
   
   lam = min([z ,high - .001]) ;
   
   lam = max([lam,low + .001]) ;
   
   B0 = eye(n) - lam * W;
   
   b0 = inv(XX'*XX)*XX'*B0*y ;
   
   e0 = y - lam*W*y - XX*b0 ;
   
   var0 = (1/n) * e0'*e0 ;
   
   u = ones(length(val),1);
   
   LogDet = u'*log(1 - lam*val) ;
   
   L0 = -(n/2)*(1 + log(2*pi) + log(var0)) + LogDet ;
   
   %Need to compute L1 (for SAR) as a special case of L0
   
   b1(1,1) = (1 - rho)*b(1);
   b1(2:k+1,1) = b(2:k+1);
   b1(k+2:2*k+1,1) = - rho*b(2:k+1);
   
   e1 = y - rho*W*y - XX*b1 ; %(I-rho*W)*(y - X*b) = B*(y - X*b)
   
   var1 = (1/n) * e1'*e1 ;
        
   LogDet = u'*log(1 - rho*val) ;
   
   L1 = -(n/2)*(1 + log(2*pi) + log(var1)) + LogDet ;
     
   LR0 = 2*(L0 - L1) ;
   


%Calculate Asymptotic Covariance Matrix for the parameter
%vector, (b,var,rho)

G = W*inv(sparse(B)) ;

I(1:k+1,1:k+1) = var*X'*B'*B*X ;

I(1:k+1,k+2) = zeros(k+1,1) ;

I(1:k+1,k+3) = zeros(k+1,1) ;

I(k+2,1:k+1) = I(1:k+1,k+2)' ;

I(k+2,k+2) = n/2 ;

I(k+2,k+3) = var*trace(G) ;

I(k+3,1:k+1) = I(1:k+1,k+3)' ;

I(k+3,k+2) = I(k+2,k+3) ;

I(k+3,k+3) = (var^2)*trace(G*(G' + G)) ;

cov = (var^2)*inv(I) ;

se = sqrt(diag(cov)) ;


%SCREEN OUTPUT

info.fmt = '%12.6f'; %print format

%Make Coefficient Display

z = b./se(1:k+1);

%Construct Variable Names

if ~exist('vnames')

    nvar = k;
    
    vnames = [];
    
    for i=1:nvar
        
        tmp = ['Variable ',num2str(i)];
    
        vnames = strvcat(vnames,tmp);
        
    end;
    
 end;


if exist('normcdf') == 2  %check to see if statistics toolbox exists
   
   prob = 2*( 1 - normcdf(abs(z)) );   
    
   
   info.cnames = strvcat('COEFF','Z-VAL','PROB');
   info.rnames = strvcat('VAR','const',vnames);
  
   mat(:,1) = b;
   mat(:,2) = z;
   mat(:,3) = prob;
	
     
else %otherwise, just report the z-values
   
   
   info.cnames = strvcat('COEFF','Z-VAL');
   info.rnames = strvcat('VAR','const',vnames);
  
   mat(:,1) = b;
   mat(:,2) = z;
   
     
end

disp(' ');
disp('FINAL REGRESSION RESULTS:');
disp(' ');

mprint_new(mat,info) %Display Regression Results


%Make Autocorrelation Display

clear( 'mat','info');

info.fmt = '%12.6f';

rho_z = rho/se(k+3);

mat(1,1) = rho;
mat(1,2) = rho_z;
   
info.rnames = strvcat(' ','rho');  

if exist('normcdf') == 2 %Check for Stat Toolbox  
   
   info.cnames = strvcat('VAL','Z-VAL','PROB');

	mat(1,3) = 2*(1 - normcdf(abs(rho_z))); 

else
   
   info.cnames = strvcat('VAL','Z-VAL');
   
end

disp(['Variance = ',num2str(var)]);

disp(' ');
disp('AUTOCORRELATION RESULTS:');
disp(' ');

mprint_new(mat,info);

   
%Goodness-of-Fit Display

yy = X*b ;

y1 = y - mean(y) ;

y2 = yy - mean(yy) ;

corr = (y1'*y2)/(norm(y1)*norm(y2)) ;

U = ones(n,1) ;

D = eye(n) - (1/n)*U*U' ; %Deviation Matrix


b = inv(X'*B'*B*X)*X'*B'*B*y;

%Compute PSEUDO R-SQUARE (Uses Transformed Model)

%      NOTE: It is important to observe here that if W is row 
%      normalized then B*1 = (1-rho)*1, so the model still
%      behaves like a standard regression WITH INTERCEPT. 
%      Hence R-square makes perfect sense.

e = B*y - B*X*b;

Ps_R2 = 1 - (e'*e)/(y'*B'*D*B*y);  % Note that D*e = e since the 
                                   % identity 1'*e = 0 still holds. 

Ps_R2_Adj = 1 - ((n-1)/(n-(k+1)))*(e'*e)/(y'*B'*D*B*y);


%Compute Squared Correlation

yhat = X*b;

corr = (yhat'*D*y)/((n-1)*std(yhat)*std(y));

%Compute Kullback_Leibler (KL) R-SQUARE

% uu = ones(n,1);
% 
% mu = (uu'*B'*B*y)/(uu'*B'*B*uu);
% 
% e = y - X*b;
% 
% RKL =  1 - (e'*B'*B*e)/((y - mu*uu)'*B'*B*(y - mu*uu));
% 
% RKL_Adj =  1 - ((n-1)/(n-(k+1)))*(e'*B'*B*e)/((y - mu*uu)'*B'*B*(y - mu*uu));

%Compute corrected AIC (AIC_c) [Burnham and Anderson, MODEL SELECTION...,p.66]

K = k + 3; %Number of parameters estimated (b + var +rho)

AIC = 2*(-L + K);

AIC_c = AIC + 2*K*(K+1)/(n-K-1);

   
disp(' ');
disp('GOODNESS-OF-FIT RESULTS:');
disp(' ');

disp(strvcat(['Pseudo R-Square      = ',num2str(Ps_R2)])); 
disp(strvcat(['Pseudo R-Square Adj  = ',num2str(Ps_R2_Adj)]));
disp(strvcat(['Squared_Correlation  = ',num2str(corr^2)]));
%disp(strvcat(['KL R-Square          = ',num2str(RKL)])); 
%disp(strvcat(['KL R-Square Adj      = ',num2str(RKL_Adj)])); 
disp(strvcat(['Log Likelihood Value = ',num2str(L)])); 
disp(strvcat(['AIC = ',num2str(AIC)])); 
disp(strvcat(['AIC_corrected = ',num2str(AIC_c)])); 





%Test Display

clear('mat','info');

info.fmt = '%12.6f';

mat(1,1) = LR;
mat(2,1) = LR0;

info.rnames = strvcat('TEST','LR','Com-LR');

if exist('chi2cdf') == 2 %Check for Stat Toolbox     
        
   mat(1,2) = 1 - chi2cdf(LR,1);
   mat(2,2) = 1 - chi2cdf(LR0,k);
   
   info.cnames = strvcat('VAL','PROB');
   
else
   
   info.cnames = strvcat('VAL');
      
end

disp(' ');
disp('TESTS OF SAR MODEL:');
disp(' ');

mprint_new(mat,info);


%Prepare OUTPUT

OUT(:,1) = yy ; %SAR predicted y's

OUT(:,2) = y ; %Observed y's

r = B*y - B*X*b ; %SAR Residuals

OUT(:,3) = r ;

OUT(:,4) = W*r ; %Weighted residuals

OUT(:,5) = r0;

OUT(:,6) = W*r0;

OUT(:,7) = B*r;

DAT = [B*y,B*X];


ELAPSED_TIME = etime(clock,t0)


