SUMMARY OF REGRESSION PLOTTING IN CASE-STUDY WITH PIMA INDIAN DATA ON DIABETES AND DIASTOLIC BLOOD PRESSURE ================================================= 11/01/08 libname SAS "SASdat"; options nocenter nodate linesize=75; proc contents data=SAS.pima; run; * FOLLOWING ILLUSTRATION IS AS IN MULTREG, but more detailed and elaborate ; data pima2; set SAS.pima; if diast ne 0 and gluc ne 0 and triceps ne 0 and insulin ne 0 and bmi ne 0 ; run; /* remove missing values: keep only 392 out of 768 obs */ proc reg data=pima2; model diast = bmi age gluc Diab; plot residual. * predicted.; output out = pim2rsd p= Yprd r = Yrsd; run; symbol1 v=circle color=black; symbol2 v=none i=rlclm95 color=red; symbol3 v=none i=rlcli95 color=blue; proc gplot data=pim2rsd; plot diast*bmi = 1 diast*bmi = 2 diast*bmi = 3 /overlay; run; * Overplotted graph for simple linear regression plots in graphical window. Here we have red confidence-interval bands for the regression line itself, and blue for prediction intervals. In LOG window you can find: NOTE: Regression equation : diast = 52.75473 + 0.541269*bmi. But this simple shortcut applies only to simple linear regressions !! If we want similar plots for multiple regression we must make more elaborate use of the regression output-file. ; proc reg data=pima2 ; model diast = bmi age gluc Diab; plot residual. * predicted.; output out = pim3rsd p= Yprd r = Yrsd lclm = loreg uclm = hireg lcl = lopred ucl = hipred ; run; data pim3rsd; set pim3rsd; loregC = loreg-Yprd; hiregC = hireg-Yprd; loregR = lopred-Yprd; hiregR = hipred-Yprd; run; goptions reset=all; symbol1 v = circle color = black; symbol2 v = NONE i=JOIN line=3 color=red; symbol3 v=NONE i=Join line =1 color=blue; proc sort data=pim3rsd; by Yprd; proc gplot data=pim3rsd; title "Overplotting Mult Reg Residuals & Bands"; plot Yrsd * Yprd = 1 loregC*Yprd = 2 hiregC*Yprd = 2 loregR*Yprd = 3 hiregR*Yprd = 3 / overlay; run; * Observed picture in graph window, then printed to file ResPlot1.ps in my home directory, then sftp'd it to home computer and saved it ; *Now try to modify by changing alpha, also plotting horizontal black line at 0 ; symbol4 v=NONE i=Join color=Black; proc reg data=pima2 alpha = .01; model diast = bmi age gluc Diab; plot residual. * predicted.; output out = pim3rsd p= Yprd r = Yrsd lclm = loreg uclm = hireg lcl = lopred ucl = hipred ; run; data pim3rsd; set pim3rsd; loregC = loreg-Yprd; hiregC = hireg-Yprd; loregR = lopred-Yprd; hiregR = hipred-Yprd; ZER = 0; proc sort; by Yprd; run; symbol4 v=NONE i=Join color=Black; proc gplot data=pim3rsd; title "Overplotting Mult Reg Residuals & Bands"; plot Yrsd * Yprd = 1 loregC*Yprd = 2 hiregC*Yprd = 2 loregR*Yprd = 3 hiregR*Yprd = 3 ZER * Yprd = 4 / overlay; run; * again printed to file ResPlot2.ps then ftp'd to home computer and converted to pdf ; * Finally, can also do the plotting against some other potential predictor variable, but must first make sure to sort on it !! ; proc sort data=pim3rsd out=pim4rsd; by insulin; proc gplot data=pim4rsd; title "Overplotting Mult Reg Residuals & Bands"; plot Yrsd * insulin = 1 loregC*insulin = 2 hiregC*insulin = 2 loregR*insulin = 3 hiregR*insulin = 3 ZER * insulin = 4 / overlay; run; * Similar print-to-file and sftp operation You can see the resulting pdf pictures in the Scripts directory as ResPlot1.pdf, ResPlot2.pdf, and ResPlot3.pdf ;