################################################################################ ## Optimally robust estimation by means of R packages ################################################################################ ################################################################################ ## Example 1: Normal location and scale ################################################################################ ## load ROptEst - also loads: distr, evd, distrEx, RandVar require(ROptEst) require(MASS) #par(mfrow = c(2,1)) plot(chem, ylab = "parts per million") title("Copper in wholemeal flour") #qqnorm(chem) #qqline(chem) options(warn = -1) ## Step 1: ## initial estimate: Kolomogorov (-Smirnov) minimum distance estimator (est0 <- ksEstimator(chem, Norm())) ## ideal model: normal location and scale (N1 <- NormLocationScaleFamily(mean = est0$mean, sd = est0$sd)) ## Step 2: ## infinitesimal robust model (R1 <- InfRobModel(center = N1, neighbor = ContNeighborhood(radius = 2/sqrt(24)))) ## optimally robust IC # IC1 <- optIC(model = R1, risk=asMSE()) ## Step 3: ## optimally robust estimator via one-step method # est1 <- oneStepEstimator(chem, IC1, est0) ## special case: normal location and scale ## use package "RobLox"!!! - computations are clearly faster require(RobLox) ## Step 2 + 3: ## computes radius-minimax estimator via one-step method est2 <- roblox(chem, eps = 1/12, returnIC = TRUE) ## check the IC properties resp. precision of computation # checkIC(est2$optIC) ## let's have a look at the optimally robust IC plot(est2$optIC) ## estimator comparison require(RColorBrewer) mypalette <- brewer.pal(4,"Dark2") palette(mypalette) par(mfrow=c(1,1)) plot(chem[-17]) abline(h = mean(chem), col = 1, lwd = 2) abline(h = mean(chem) - sd(chem), col = 1, lwd = 2) abline(h = mean(chem) + sd(chem), col = 1, lwd = 2) abline(h = median(chem), col = 2, lwd = 2) abline(h = median(chem) - mad(chem), col = 2, lty = 2, lwd = 2) abline(h = median(chem) + mad(chem), col = 2, lty = 2, lwd = 2) abline(h = est0$mean, col = 3, lwd = 2) abline(h = est0$mean - est0$sd, col = 3, lty = 2, lwd = 2) abline(h = est0$mean + est0$sd, col = 3, lty = 2, lwd = 2) abline(h = est2$mean, col = 4, lwd = 2) abline(h = est2$mean - est2$sd, col = 4, lty = 2, lwd = 2) abline(h = est2$mean + est2$sd, col = 4, lty = 2, lwd = 2) legend(1, max(chem[-17])-0.1, legend = c(expression("mean" %+-% "sd"), expression("median" %+-% "mad"), expression("ksMD$mean" %+-% "ksMD$sd"), expression("optRob$mean" %+-% "optRob$sd")), fill = 1:4) title("Optimally Robust Estimation") ## many things to do! ## in particular, a better user interface for the Methods in package ROptEst ################################################################################ ## Example 2: Linear Regression with normal errors ################################################################################ require(ROptRegTS) require(robustbase) data(exAM) plot(exAM) title("Artificial data set by Antille and May") ## regressor distribution: design measure K <- DiscreteMVDistribution(cbind(1,exAM$x)) ## Step 1: ## initial estimate: MM estimator (est0 <- rlm(y ~ x, data=exAM, method = "MM")) ## ideal model: normal location and scale (N1 <- NormLinRegScaleFamily(theta = est0$coeff, scale = est0$s, RegDistr = K)) ## Step 2: ## infinitesimal robust model (1 observation contaminated) (R1 <- InfRobRegTypeModel(center = N1, neighbor = ContNeighborhood(radius = 1/sqrt(12)))) ## optimally robust IC: takes about 80 sec. # IC1 <- optIC(model = R1, risk=asMSE()) ## special case: normal errors ## use package "RobRex"!!! - computations are clearly faster require(RobRex) # takes about 8 sec. (IC2 <- rgsOptIC.AL(r = 1/sqrt(12), K = K, theta = est0$coeff, scale = est0$s)) ## check the IC properties resp. precision of computation # checkIC(IC2) ## Step 3: ## optimally robust estimator via one-step method #est1 <- oneStepEstimator(cbind(1,as.matrix(exAM)), IC1, c(est0$coeff, est0$s)) est2 <- oneStepEstimator(cbind(1,as.matrix(exAM)), IC2, c(est0$coeff, est0$s)) ## estimator comparison require(RColorBrewer) mypalette <- brewer.pal(6, "Dark2") palette(mypalette) ## LS-estimator ls <- lm(y ~ x, data = exAM) ## Huber M-estimator rls <- rlm(y ~ x, data = exAM) ## LTS-estimator lts <- ltsReg(y ~ x, data = exAM) plot(exAM) abline(ls, col = 1, lwd = 2) abline(rls, col = 2, lwd = 2) abline(lts, col = 3, lwd = 2) abline(est0, col = 6, lwd = 2) lines(c(1, exAM$x), est2[1] + est2[2]*c(1,exAM$x), col = 4, lwd = 2) legend("topleft", legend = c("LS", "Huber", "LTS", "MM", "optRob"), fill = c(1:3,6,4), yjust = 0) title("Optimally Robust Estimation") ## many things to do! ## in particular, formula interface, summary and plot methods like summary(lts) plot(lts)