Solution to Homework 8.

> unix.time({ inds = sample(c(T,F),51e5, replace=T)
  xval = numeric(51e5)
  xval[inds] = abs(rnorm(sum(inds)))
  xval[!inds] = -rexp(sum(!inds))
  tmpsum = c(array(xval, c(1e5,51)) %*% rep(1,51))})

> quantile(tmpsum, probs=c(.99,.999,.9999))
     99%    99.9%   99.99%
14.40716 20.15382 24.08738

> unix.time({
+      cat("proportion > 15 = ",mean(tmpsum > 15),"\n") })
proportion > 15 =  0.00813
   user  system elapsed
  0.004   0.000   0.009

### 99% CI width based on R = 1e5 
> qnorm(.995)*sqrt(.00813*(1-.00813)/1e5)
[1] 0.0007314586

### So the problem could be done by a naive method, 
###   with a required sample size something like 
> .00813*(1-.00813)*qnorm(.995)^2/(.0002)^2
[1] 1337579
### Generating the one million plus samples of 
###   size 51 is certainly possible in this case, 
###   but I asked for importance sampling.


IMPORTANCE SAMPLING METHOD STILL TO BE DONE !!
PART (A):

Begin with calculus: we need to evaluate 

integral of  f(x) * x^k * exp(lambda*x)

as a convenient function of lambda, both for k=0,1.

> c0 = function(lam) 1/(2*(1+lam)) + exp(lam^2/2)*pnorm(lam)
> c1 = function(lam)
   exp(lam^2/2)*(1/sqrt(2*pi)+lam*pnorm(lam))-1/(2*(1+lam)^2)

## Then the density g is a mixture with weights
##   0.5/c0(lam) and  exp(lam^2/2)*pnorm(lam)/c0(lam)
##  of a r.v. equal to the negative of an Expon(1+lam) and
##     a r.v. with density dnorm(x-lam)/pnorm(lam) for x>0
## and the second of these r.v.'s can be simulated as
##  lam + qnorm(U*pnorm(lam))  for fixed lam and U ~ Unif(0,1)

## To choose lam , we set the expectation of a single X ~ g
## equal to to 15/51, i.e. (using functions c0,c1 above) 
## we solve the equation : c1(lam)/c0(lam) = 15/51

unlist(uniroot(function(lam) c1(lam)/c0(lam)-15/51, c(0,1))[c(1,4)])
        root   estim.prec
2.907810e-01 6.103516e-05   ### so we will use lam = .291 to start


wt = c(0.5/(1.291*c0(.291)), exp(.291^2/2)*pnorm(.291)/c0(.291))
wt
[1] 0.3766207 0.6233793   ### compare with (1/2, 1/2) for original f

unix.time({ 
    inds = sample(c(T,F), 51*4e4, rep=T, prob=wt)
    xval = numeric(51*4e4)
    xval[inds] = - rexp(sum(inds))/1.291
    xval[!inds] =  .291+qnorm(1+(runif(sum(!inds))-1)*pnorm(.291))
    tmpsum = c(array(xval,c(4e4,51)) %*% rep(1,51))
    est=sum( exp(51*log(c0(.291))-0.291*tmpsum[tmpsum>15])/4e4 ) 
    frac=mean(tmpsum>15)
    cat("est=",est,", frac=",frac,"\n") })
est= 0.007918638 , frac= 0.4615
   user  system elapsed
  1.384   0.195   1.603      ## only 1 second this way, R = 4e4

## Estimated precision = 
> sd(exp(51*log(c0(.291))-0.291*tmpsum)*(tmpsum>15))*
            qnorm(.995)/sqrt(4e4)
[1] 0.0001685945                  ### < .0002 as desired !!

### This shows that in place of a needed naive-method 
    sample size of   1.3e6, importance sampling required 
    only 4e4  !!! This is a 30-fold improvement.

## We might do still better if we use control variates 
    within the importance sampling example. Let's treat 
    the variable whose mean we want as
    I[X1+...+X51>15] * c0(lam)^51 * exp(-lam*(X1+...+X51))
    and the regressor variable as 
      (X1+...+X51-51*c1(lam)/c0(lam)) * c0(lam)^51 * exp(-lam*(X1+...+X51))
    where we know that the mean 51*c1(lam)/c0(lam)  
    is 15 when  lam = .2907810
> cor(exp(51*log(c0(.2907810))-0.2907810*tmpsum)*(tmpsum>15),
+    (tmpsum-15)*exp(51*log(c0(.2907810))-0.2907810*tmpsum))
### But here the correlation is so small that there will be no
    improvement over straight importance sampling.

####### NOW FOR PART (B)     we have found that lam=.291
is a good choice: but an optimal choice would minimize the true 
variance of 

Vest = function(lambda, R=1e3) {
    w = 0.5*c0(lambda)/(1+lambda)
    inds = sample(c(T,F), R, rep=T, prob=c(w,1-w))
    xval = numeric(51*R)
    xval[inds] = - rexp(sum(inds))/(1+lambda)
    xval[!inds] =  lambda+qnorm(1+(runif(sum(!inds))-1)*pnorm(lambda))
    tmpsum = c(array(xval,c(R,51)) %*% rep(1,51))
    var( exp(51*log(c0(lambda))-lambda*tmpsum)*(tmpsum>15)) 
    }

Vdat = numeric(41)
for(i in 1:41) Vdat[i] = Vest(.291 + (i-21)*.005)

Vdat = numeric(41)
for(i in 1:41) Vdat[i] = Vest(.291 + (i-21)*.005)
> plot(.291+((1:41)-21)*.005, Vdat)
        #### shows that there is likely a minimum between .29 and .35,
	###  but the function is very noisy !!
Vdat2 = numeric(41)
for(i in 1:41) Vdat2[i] = Vest(.291 + (i-21)*.005, 1e4)
> plot(.291+((1:41)-21)*.005, Vdat2)
   ### Much more stable, but there seems to be a very 
 ### flat minimum near .33 !!?

> xt = .291+((1:41)-21)*.005
  tmp = lm(Vdat2 ~ xt + I(xt^2))
  lines(xt, tmp$fit, lty=3)
  tmp2 = lm(Vdat2 ~ xt + I(xt^2)+ I(xt^3))
  lines(xt, tmp$fit, lty=5)   ### somewhat better, but not perfect
  tmp3 = lm(Vdat2 ~ xt + I(xt^2)+ I(xt^3)+I(xt^4))
  lines(xt, tmp3$fit, lty=8) 

> optimize(function(x) c(cbind(rep(1,length(x)),x,x^2,x^3) %*%
      tmp2$coef), c(.25,.38))
$minimum
[1] 0.3152671         ### could probably improve the fit with more
    ###  points further to the right ...

$objective
[1] 3.141898e-05

> tmpspl = smooth.spline(xt, Vdat2, spar=.5)
> lines(xt, predict(tmpspl,xt)$y)

> optimize(function(x) predict(tmpspl,x)$y, c(.25,.38))$minimum
[1] 0.3412287
               #### answer here is a little better, not because of 
    ### the objective function value but because the fit is much better
$objective
[1] 3.076827e-05