Cross Validation Stata Example
[Stata Program]
/* Step 1 */
use http://www.gseis.ucla.edu/courses/data/cross1
corr api99 pctmeal pctel pctcred avged
sw regress api99 pctmeal pctel pctcred avged, pe(.05)
/* Step 2 */
use http://www.gseis.ucla.edu/courses/data/cross2
predict api99la1
generate api99la2 = 62.5067 + 130.5665*avged + 2.574684*pctcred - .6885639*pctmeal
label variable api99la1 "OC api99 score using LA schools using predict"
label variable api99la2 "OC api99 score using equation for LA schools"
corr api99 api99la1 api99la2
corr api99 pctmeal pctel pctcred avged
sw regress api99 pctmeal pctel pctcred avged, pe(.05)
[Stata Output]
/* Step 1 */
use http://www.gseis.ucla.edu/courses/data/cross1
corr api99 pctmeal pctel pctcred avged
| api99 pctmeal pctel pctcred avged
-------------+---------------------------------------------
api99 | 1.0000
pctmeal | -0.7559 1.0000
pctel | -0.7556 0.6551 1.0000
pctcred | 0.5909 -0.4211 -0.4041 1.0000
avged | 0.8863 -0.7410 -0.7833 0.4114 1.0000
sw regress api99 pctmeal pctel pctcred avged, pe(.05)
begin with empty model
p = 0.0000 < 0.0500 adding avged
p = 0.0000 < 0.0500 adding pctcred
p = 0.0003 < 0.0500 adding pctmeal
Source | SS df MS Number of obs = 188
-------------+------------------------------ F( 3, 184) = 369.37
Model | 2347454.49 3 782484.831 Prob > F = 0.0000
Residual | 389795.423 184 2118.45338 R-squared = 0.8576
-------------+------------------------------ Adj R-squared = 0.8553
Total | 2737249.91 187 14637.7001 Root MSE = 46.027
------------------------------------------------------------------------------
api99 | Coef. Std. Err. t P>|t| [95% Conf. Interval]
-------------+----------------------------------------------------------------
avged | 130.5665 8.206563 15.91 0.000 114.3755 146.7576
pctcred | 2.574684 .3193867 8.06 0.000 1.944553 3.204815
pctmeal | -.6885639 .1870544 -3.68 0.000 -1.057611 -.3195166
_cons | 62.5067 37.88756 1.65 0.101 -12.2432 137.2566
------------------------------------------------------------------------------
/* Step 2 */
use http://www.gseis.ucla.edu/courses/data/cross2
predict api99la1
generate api99la2 = 62.5067 + 130.5665*avged + 2.574684*pctcred - .6885639*pctmeal
label variable api99la1 "OC api99 score using LA schools using predict"
label variable api99la2 "OC api99 score using equation for LA schools"
corr api99 api99la1 api99la2
(obs=52)
| api99 api99la1 api99la2
-------------+---------------------------
api99 | 1.0000
api99la1 | 0.9312 1.0000
api99la2 | 0.9312 1.0000 1.0000
corr api99 pctmeal pctel pctcred avged
(obs=52)
| api99 pctmeal pctel pctcred avged
-------------+---------------------------------------------
api99 | 1.0000
pctmeal | -0.9103 1.0000
pctel | -0.9293 0.9431 1.0000
pctcred | 0.5772 -0.5138 -0.4906 1.0000
avged | 0.9191 -0.9308 -0.9092 0.5342 1.0000
sw regress api99 pctmeal pctel pctcred avged, pe(.05)
begin with empty model
p = 0.0000 < 0.0500 adding pctel
p = 0.0003 < 0.0500 adding avged
p = 0.0310 < 0.0500 adding pctcred
Source | SS df MS Number of obs = 52
-------------+------------------------------ F( 3, 48) = 152.62
Model | 690783.164 3 230261.055 Prob > F = 0.0000
Residual | 72420.2779 48 1508.75579 R-squared = 0.9051
-------------+------------------------------ Adj R-squared = 0.8992
Total | 763203.442 51 14964.7734 Root MSE = 38.843
------------------------------------------------------------------------------
api99 | Coef. Std. Err. t P>|t| [95% Conf. Interval]
-------------+----------------------------------------------------------------
pctel | -2.835101 .5641433 -5.03 0.000 -3.969387 -1.700815
avged | 59.32612 17.71861 3.35 0.002 23.70046 94.95178
pctcred | 2.645574 1.190507 2.22 0.031 .251899 5.039249
_cons | 336.6347 118.8344 2.83 0.007 97.7021 575.5672
------------------------------------------------------------------------------