/* OpenIntro LAB 3 */ *Read in the data from the CSV file located on the OpenIntro website; filename bdims url 'http://www.openintro.org/stat/data/bdims.csv'; proc import datafile=bdims out=bdims dbms=csv replace; getnames=yes; run; *Print the data; proc print data=bdims (obs=6); run; *Create separate male and female data sets; data mdims fdims; set bdims; if sex=1 then output mdims; if sex=0 then output fdims; run; *Create a histogram of heights for females; title 'Histogram of hgt for females'; ods select histogram; proc univariate data=fdims; var hgt; histogram / normal; output out=estimates n=n mean=mean std=std; run; *Save the sample size, mean, and standard deviation as macro variables; data _null_; set estimates; call symputx('n',n); call symputx('mean',mean); call symputx('std',std); run; *Create a Q-Q plot of height for females; title 'Q-Q plot of hgt for females'; ods select qqplot; proc univariate data=fdims; var hgt; qqplot / normal(mu=est sigma=est); run; *Simulate data from a normal distribution; data sim_norm; do i=1 to &n; x1=rand('NORMAL',&mean,&std); output; end; run; *Create a Q-Q plot for the simulated data; title 'Q-Q plot for simulate x1'; ods select qqplot; proc univariate data=sim_norm; var x1; qqplot x1/ normal(mu=&mean sigma=&std); run; *Create nine simulated variables; data simulated; array x {9} x1-x9; do i=1 to &n; do j=1 to 9; x[j]=rand('NORMAL',&mean,&std); end; output; end; run; *Generate Q-Q plots for the nine simulated variables; title 'Q-Q plots for simulate x1-x9'; ods select qqplot; proc univariate data=simulated; var x1-x9; qqplot x1-x9 / normal(mu=&mean sigma=&std); run; *Calculate the theoretical probability of a female being taller than 182cm and print the result; data temp; result=1 - cdf('NORMAL',182,&mean,&std); run; title 'Probability of a female being taller than 182cm'; proc print data=temp; var result; run; *Calculate the propostion of females that are taller than 182cm and print the result; title 'Proportion of females taller than 182cm'; proc sql; select b.n_tall / a.n as proportion from (select count(hgt) as n from fdims) as a, (select count(hgt) as n_tall from fdims where hgt>182) as b; quit; title;