<<echo=FALSE, results=hide>>=
## DATA GENERATION
n <- 9 + sample(1:6, 4)
dat <- data.frame(Evaluation = rnorm(sum(n),
    mean = rep(sample(seq(from = sample(25:55, 1), by = 1, length.out = sample(15:20, 1)), 4)/10, n),
    sd = rep(sample(70:80, 4)/100, n)),
  Occupation = factor(rep(1:4, n),
    labels = c("Student", "Employee", "Self-Employed", "Retired")))
dat$Evaluation[dat$Evaluation > 10] <- 10
dat$Evaluation[dat$Evaluation <  0] <-  0

## QUESTION/ANSWER GENERATION
questions <- character(5)
solutions <- logical(5)
explanations <- character(5)

fm0 <- lm(Evaluation ~ 1, data = dat)
fm1 <- lm(Evaluation ~ Occupation, data = dat)
myanova <- anova(fm0, fm1)
myanova_print <- matrix(nrow = 2, ncol = 6)
dimnames(myanova_print) <- dimnames(myanova)
myanova_print[, 1] <- round(myanova[, 1])
myanova_print[, 3] <- fmt(myanova[, 3], 0)
rss <- round(myanova[,2], digits = max(0, min(3, 5-max(nchar(round(myanova[, 2], digits = 0))))))
myanova_print[, 2] <- format(rss, nsmall = max(0, min(3, 5-max(nchar(round(myanova[, 2], digits = 0))))))
myanova_print[2, 4] <- format(-diff(rss), nsmall = max(0, min(3, 5-max(nchar(round(myanova[, 2], digits = 0))))))
myanova_print[1, 3:6] <- ""
myanova_print[2, 5] <- round(myanova[2, 5], digits = 3)
myanova_print[2, 6] <- format.pval(myanova[2, 6])
r2 <- 1 - as.numeric(rss[2])/as.numeric(rss[1])

f2 <- sample(10:250, 1)/10
if(runif(1) > 0.5) {
  questions[1] <- paste("The test statistic is smaller than $", f2, "$.", sep = "")
  solutions[1] <- myanova[2,5] < f2
  explanations[1] <- paste("The test statistic is $F = ", fmt(myanova[2,5], 3),
    "$ and hence ", ifelse(solutions[1], "", "\\textit{not}"), " smaller than $", f2, "$.", sep = "")
} else {
  questions[1] <- paste("The test statistic is larger than $", f2, "$.", sep = "")
  solutions[1] <- myanova[2,5] > f2
  explanations[1] <- paste("The test statistic is $F = ", fmt(myanova[2,5], 3), 
  "$ and hence ", ifelse(solutions[1], "", "\\textit{not}"), " larger than $", f2, "$.", sep = "")
}

questions[2] <- "A one-sided alternative was tested for the mean values."
solutions[2] <- FALSE
explanations[2] <- paste("An ANOVA always tests the null hypothesis,",
  "that all mean values are equal against the alternative hypothesis that they are different.")

r2a <- sample(10:60, 1)/100
questions[3] <- paste("The fraction of explained variance is larger than $", 100 * r2a, "$\\%.", sep = "")
solutions[3] <- r2 > r2a
explanations[3] <- paste("The fraction of explained variance is $", fmt(r2, 3), 
  "$ and hence ", ifelse(solutions[3], "", "\\textit{not}"), " larger than ", r2a, ".", sep = "")

questions[4] <- paste("It can be shown that the evaluation of the respondents depends on their occupation.",
                      "(Significance level $5\\%$)")
solutions[4] <- myanova[2,6] < 0.05
explanations[4] <- paste("The $p$~value is $", format.pval(myanova[2,6], digits = 3),
  "$ and hence", ifelse(solutions[4], "", "\\textit{not}"),
  "significant. It can ", ifelse(solutions[4], "", "\\textit{not}"),
  "be shown that the evaluations differ with respect to the occupation of the respondents.")
    
## assure at least one correct answer
r2b <- if(any(solutions)) {
  sample(10:60, 1)/100
} else {
  min(sample(ceiling(100 * r2) + 1:10, 1), 100)/100
}
questions[5] <- paste("The fraction of explained variance is smaller than $", 100 * r2b, "$\\%.", sep = "")
solutions[5] <- r2 < r2b
explanations[5] <- paste("The fraction of explained variance is $", fmt(r2, 3), 
  "$ and hence ", ifelse(solutions[5], "", "\\textit{not}"), " smaller than ", r2b, ".", sep = "")

## permute order of solutions/questions
o <- sample(1:5)
questions <- questions[o]
solutions <- solutions[o]
explanations <- explanations[o]
@

\begin{question}
A survey with \Sexpr{sum(n)} persons was conducted to analyze the
design of an advertising campaign. Each respondent was asked to
evaluate the overall impression of the advertisement on an
eleven-point scale from 0 (bad) to 10 (good). The evaluations are
summarized separately with respect to type of occupation of the
respondents in the following figure.

\setkeys{Gin}{width=0.8\textwidth}
<<fig=TRUE, height = 4, width = 7, echo=FALSE, eps=FALSE, results=hide>>=
par(mar = c(4, 4, 1, 1))
plot(Evaluation ~ Occupation, data = dat)
@

To analyze the influence of occupation on the evaluation of the
advertisement an analysis of variance was performed:

<<echo=FALSE, results=verbatim>>=
options(show.signif.stars = FALSE)
print(myanova_print, quote = FALSE, right = TRUE)
@

Which of the following statements are correct?

<<echo=FALSE, results=tex>>=
answerlist(questions)
@

\end{question}

\begin{solution}
In order to be able to answer the questions the fraction of
explained variance has to be determined. The residual sum of squares
when using only a single overall mean value ($\mathit{RSS}_0$) as
well as the residual sum of squares when allowing different mean
values given occupation ($\mathit{RSS}_1$) are required. Both are
given in the \texttt{RSS}~column of the ANOVA~table.  The
fraction of explained variance is given by $1 -
\mathit{RSS}_1/\mathit{RSS}_0 = 1 - \Sexpr{rss[2]}/\Sexpr{rss[1]} =
\Sexpr{round(r2, digits = 3)}$.  

The statements above can now be evaluated as right or wrong.

<<echo=FALSE, results=tex>>=
answerlist(ifelse(solutions, "True", "False"), explanations)
@

\end{solution}

%% META-INFORMATION
%% \extype{mchoice}
%% \exsolution{\Sexpr{mchoice2string(solutions)}}
%% \exname{Analysis of variance}