<>= ## DATA GENERATION r <- sample(c(-0.97, 0, 0.5, 0.97), 1) if(runif(1) < 1/3) { mx <- my <- 0 sx <- sy <- 1 } else { mx <- sample(10 * -5:5, 1) my <- sample(20 * 0:5, 1) sx <- sample(c(1, 10, 20), 1) sy <- sample(c(1, 10, 20), 1) } b <- r * sy/sx a <- my - b*mx x <- rnorm(200, mx, sx) y <- b * x + rnorm(200, a, sy * sqrt(1- r^2)) ## QUESTION/ANSWER GENERATION questions <- character(5) solutions <- logical(5) explanations <- character(5) if (runif(1) < 0.5) { questions[1] <- "The scatterplot is standardized." solutions[1] <- mx == 0 & my == 0 & sx == 1 & sy == 1 explanations[1] <- if (solutions[1]) "$X$ and $Y$ have both mean $0$ and variance $1$." else "The scatterplot is not standardized, because $X$ and $Y$ do not both have mean $0$ and variance $1$." } else { questions[1] <- "The slope of the regression line is about $1$." solutions[1] <- abs(b - 1) < 0.1 explanations[1] <- paste("The slope of the regression line is given by $r \\cdot s_y/s_x$ and hence", ifelse(abs(b - 1) < 0.1, "", "not"), "about equal to $1$.") } if (runif(1) < 0.5) { questions[2] <- "The absolute value of the correlation coefficient is at least $0.8$." solutions[2] <- abs(r) >= 0.8 } else { questions[2] <- "The absolute value of the correlation coefficient is at most $0.8$." solutions[2] <- abs(r) <= 0.8 } explanations[2] <- if(abs(r) >= 0.9) { paste("A strong association between the variables is given in the scatterplot.", "Hence the absolute value of the correlation coefficient is close to $1$", "and therefore larger than $0.8$.") } else if (abs(r) == 0) { paste("No association between the variables is observed in the scatterplot.", "This implies a correlation coefficient close to $0$.") } else paste("Only a slightly positive association between the variables is observable in the scatterplot.", "This implies a correlation coefficient with an absolute value smaller than $0.8$.") if (runif(1) < 0.5) { questions[3] <- "The standard deviation of $X$ is at least $6$." solutions[3] <- sx >= 6 explanations[3] <- paste("The standard deviation of $X$ is about equal to $", sx, "$ and is therefore", ifelse(sx < 6, "smaller", "larger"), "than $6$.") } else { questions[3] <- "The standard deviation of $Y$ is at least $6$." solutions[3] <- sy >= 6 explanations[3] <- paste("The standard deviation of $Y$ is about equal to $", sy, "$ and is therefore", ifelse(sy < 6, "smaller", "larger"), "than $6$.") } if (runif(1) < 0.5) { questions[4] <- "The mean of $X$ is at most $5$." solutions[4] <- mx <= 5 explanations[4] <- paste("The mean of $X$ is about equal to $", mx, "$ and hence is", ifelse(mx < 5, "smaller", "larger"), "than $5$.") } else { questions[4] <- "The mean of $Y$ is at least $30$." solutions[4] <- my >= 30 explanations[4] <- paste("The mean of $Y$ is about equal to $", my, "$ and hence is", ifelse(my < 30, "smaller", "larger"), "than $30$.") } xh <- round(runif(1, -1, 1)*sx + mx, 1) yhr <- round(a + b*xh, 1) alpha <- if (abs(r) > 0 & abs(mx - xh) > 0) sign(mx - xh) * sign(r) else 1 yhf <- round(yhr + 2 * sy * alpha, 1) ## assure at least one correct and one incorrect solution yh <- switch(as.character(sum(solutions)), "0" = yhr, "4" = yhf, sample(c(yhr, yhf), 1) ) questions[5] <- paste("For $X = ", as.character(xh), "$, $Y$ can be expected to be about ", as.character(yh), ".", collapse="") solutions[5] <- abs(yh - yhr) < 0.01 * sy explanations[5] <- paste("The regression line at $X=", xh, "$ implies a value of about $Y = ", yhr, "$.", sep="") ## permute order of solutions/questions o <- sample(1:5) questions <- questions[o] solutions <- solutions[o] explanations <- explanations[o] @ \begin{question} The following figure shows a scatterplot. Which of the following statements are correct? \setkeys{Gin}{width=0.7\textwidth} <>= plot(x, y) @ <>= answerlist(questions) @ \end{question} \begin{solution} <>= answerlist(ifelse(solutions, "True", "False"), explanations) @ \end{solution} %% META-INFORMATION %% \extype{mchoice} %% \exsolution{\Sexpr{mchoice2string(solutions)}} %% \exname{Scatterplot}