#--------------------------------------------------------------------- #Andreas Christmann #University of Dortmund, HRZ, D-44221 Dortmund, Germany #Email: a.christmann@hrz.uni-dortmund.de #--------------------------------------------------------------------- # hbdp # Modifications of the high breakdown point estimators # LMS, LTS and MM # for logistic regression, Poisson regression and Gamma # regression with large groups. # Written by Andreas Christmann !!! NO WARRANTY !!! # (a.christmann@hrz.uni-dortmund.de), 10.09.1998, # for S-Plus Version 4.0, Windows NT #--------------------------------------------------------------------- #Copyright (C) Andreas Christmann, 1998 #This code is to be understood as Free Software. #Please note, that there is NO WARRANTY. #You can freely use and redistribute this software for non-commercial #purposes only. #You even are allowed to enhance and improve it as much as you like, #as long as you distribute the source code together with the software. #I want you to preserve the copyright of the original author(s), and #encourage you very much to send me any improvements by e-mail. #I'll willing to help with problems and bugs, too. #--------------------------------------------------------------------- #The S-Plus function mmreg developed by Andreas Ruckstuhl #(ETH Zuerich, 1995) is used to compute MM-estimators. #The function mmreg is currently available from StatLib #(http://lib.stat.cmu.edu/S/mmreg) #--------------------------------------------------------------------- # Parametric model Y_i ~ F(m_i, x_i^T theta) # email: a.christmann@hrz.uni-dortmund.de # !!! NO WARRANTY !!! # Christmann, A. (1994). Least median of weighted squares # in logistic regression with large strata. # Biometrika 81, 413-417 # Christmann, A. (1996). High breakdown point estimators # for regression models with large strata. # In: A. Prat, E. Ripoll (Eds.). # COMPSTAT, Proceedings in Computational Statistics, # UPC, Barcelona. # written for: S-Plus 4.0 Windows NT # x design matrix (n,k) # y response vector (k,1) # m vector of individual sample sizes # model "Logistic", "Poisson", or "Gamma" # estim "LMWS", "LTWS", or "WMM" # nsamp number of subsamples # integer value or "all" # randomn random.n used in ltsreg # birthsn births.n used in ltsreg # quan.lms quantile used in lmsreg # quan.lts quantile used in ltsreg # iter =50, number of iterations for computation of # MM estimators # output results #--------------------------------------------------------------------- # Examples: # x <- matrix(c(1,1,1,1,1,1,1,1,1,1, # 0,1,2,3,4,5,6,7,8,9), ncol=2) # y <- matrix(c(0,1,2,5,6,8,9,1,10,10),ncol=1) # m <- matrix(c(10,10,10,12,10,10,11,10,10,10),ncol=1) # tmp <- hbdp(x,y,m,model="Logistic", estim="LMWS") # plot(tmp) # summary(hbdp(x,y,m,model="Poisson", estim="LTWS")) # tmp <- hbdp(x,y,m,model="Gamma", estim="WMM") # hbdp(x,y,m,model="Logistic",estim="WMM",iter=50)$coef #--------------------------------------------------------------------- hbdp <- function( x, y, m, model, estim, d=0.25, lambda=1, nsamp=10000, quan.lms=floor((nrow(x)/2))+floor((ncol(x)+1)/2), quan.lts=floor((nrow(x)+ncol(x)+1)/2), randomn=10000, birthsn=10000, iter=50) { # Step 0: some checks if ( (model != "Logistic") & (model != "Poisson") & (model != "Gamma") & (estim != "LMWS") & (estim != "LTWS") & (estim != "WMM") ) { output <- c("invalid model and estimator definition") print(output) return(output) } if ( (model != "Logistic") & (model != "Poisson") & (model != "Gamma") ) { output <- c("invalid model definition") print(output) return(output) } if ( (estim != "LMWS") & (estim != "LTWS") & (estim != "WMM") ) { output <- c("invalid estimator definition") print(output) return(output) } # Step 1: transformation if ( model == "Logistic" ) { p <- y/m pmod <- (y + 0.5) / (m + 1) p[y == 0 | y == m] <- pmod[y == 0 | y == m] v <- sqrt(m * p * (1 - p)) xt <- x k <- ncol(xt) for(i in 1:k) xt[, i] <- v * x[, i] yt <- v * log( p/(1 - p) ) } if ( model == "Poisson" ) { p <- y/m pmod <- (y + d) / m p[y == 0] <- pmod[y == 0] v <- sqrt( m * p ) xt <- x k <- ncol(xt) for(i in 1:k) xt[, i] <- v * x[, i] yt <- v * log(p) } if ( model == "Gamma" ) { p <- y/m pmod <- d / m p[y == 0] <- pmod[y == 0] v <- sqrt(m * p * lambda) xt <- x k <- ncol(xt) for(i in 1:k) xt[, i] <- v * x[, i] yt <- v * log( lambda*p ) } # Step 2: Compute the estimate # if estim="LMWS" : call function lmsreg if ( estim == "LMWS" ) { output <- lmsreg.formula ( yt ~ xt -1 , x=T, y=T, nsamp=nsamp, wt=T, diagnostic=T, quan=quan.lms, mve=T) } # if estim="LTWS" : call function ltsreg if ( estim == "LTWS" ) { randomn <- max(randomn, 250 * k) birthsn <- max(birthsn, 250 * k + 75 * k * k) output <- ltsreg.formula(yt ~ xt -1, intercept=F, quan=quan.lts, random.n = randomn, births.n = birthsn, x=T, y=T, wt=T, mcd=T) } # if estim="WMM" : call function mmreg developed by # Andreas Ruckstuhl, ETH Zuerich, 1995 if ( estim == "WMM" ) { zt <- matrix(cbind(xt,yt), ncol=(k+1)) output <- mmreg( yt ~ xt -1, iter=iter) } return(output) }