#---------------------------------------------------------------------
#Andreas Christmann <a.christmann@hrz.uni-dortmund.de>
#University of Dortmund, HRZ, D-44221 Dortmund, Germany
#Email: a.christmann@hrz.uni-dortmund.de
#---------------------------------------------------------------------
# hbdp
# Modifications of the high breakdown point estimators
# LMS, LTS and MM
# for logistic regression, Poisson regression and Gamma
# regression with large groups.
# Written by Andreas Christmann !!! NO WARRANTY !!!
# (a.christmann@hrz.uni-dortmund.de), 10.09.1998,
# for S-Plus Version 4.0, Windows NT
#---------------------------------------------------------------------
#Copyright (C) Andreas Christmann, 1998
#This code is to be understood as Free Software.
#Please note, that there is NO WARRANTY.
#You can freely use and redistribute this software for non-commercial
#purposes only.
#You even are allowed to enhance and improve it as much as you like,
#as long as you distribute the source code together with the software.
#I want you to preserve the copyright of the original author(s), and
#encourage you very much to send me any improvements by e-mail.
#I'll willing to help with problems and bugs, too.
#---------------------------------------------------------------------
#The S-Plus function mmreg developed by Andreas Ruckstuhl
#(ETH Zuerich, 1995) is used to compute MM-estimators.
#The function mmreg is currently available from StatLib
#(http://lib.stat.cmu.edu/S/mmreg)
#---------------------------------------------------------------------
# Parametric model Y_i ~ F(m_i, x_i^T theta)
# email: a.christmann@hrz.uni-dortmund.de
# !!! NO WARRANTY !!!
# Christmann, A. (1994). Least median of weighted squares
#   in logistic regression with large strata.
#   Biometrika 81, 413-417
# Christmann, A. (1996). High breakdown point estimators
#   for regression models with large strata.
#   In: A. Prat, E. Ripoll (Eds.).
#   COMPSTAT, Proceedings in Computational Statistics,
#   UPC, Barcelona.
# written for: S-Plus 4.0 Windows NT
#   x               design matrix (n,k)
#   y               response vector (k,1)
#   m               vector of individual sample sizes
#   model           "Logistic", "Poisson", or "Gamma"
#   estim           "LMWS", "LTWS", or "WMM"
#   nsamp           number of subsamples
#                   integer value or "all"
#   randomn         random.n used in ltsreg
#   birthsn         births.n used in ltsreg
#   quan.lms        quantile used in lmsreg
#   quan.lts        quantile used in ltsreg
#   iter            =50, number of iterations for computation of
#                   MM estimators
#   output          results
#---------------------------------------------------------------------
# Examples:
# x <- matrix(c(1,1,1,1,1,1,1,1,1,1,
#                0,1,2,3,4,5,6,7,8,9), ncol=2)
# y <- matrix(c(0,1,2,5,6,8,9,1,10,10),ncol=1)
# m <- matrix(c(10,10,10,12,10,10,11,10,10,10),ncol=1)
# tmp <- hbdp(x,y,m,model="Logistic", estim="LMWS")
# plot(tmp)
# summary(hbdp(x,y,m,model="Poisson", estim="LTWS"))
# tmp <- hbdp(x,y,m,model="Gamma", estim="WMM")
# hbdp(x,y,m,model="Logistic",estim="WMM",iter=50)$coef
#---------------------------------------------------------------------

hbdp <- function( x, y, m, model, estim,
                    d=0.25,
                    lambda=1,
                    nsamp=10000,
                    quan.lms=floor((nrow(x)/2))+floor((ncol(x)+1)/2),
                    quan.lts=floor((nrow(x)+ncol(x)+1)/2),
                    randomn=10000, birthsn=10000, iter=50)
  {
    # Step 0: some checks
      if (   (model != "Logistic") & (model != "Poisson")
           & (model != "Gamma")
           & (estim != "LMWS")     & (estim != "LTWS")
           & (estim != "WMM") )
       { output <- c("invalid model and estimator definition")
         print(output)
         return(output) }
      if (  (model != "Logistic") & (model != "Poisson")
          & (model != "Gamma") )
       { output <- c("invalid model definition")
         print(output)
         return(output) }
      if (  (estim != "LMWS") & (estim != "LTWS")
          & (estim != "WMM") )
       { output <- c("invalid estimator definition")
         print(output)
         return(output) }
    # Step 1: transformation
        if ( model == "Logistic" )
        {
          p <- y/m
          pmod <- (y + 0.5) / (m + 1)
          p[y == 0 | y == m] <- pmod[y == 0 | y == m]
          v <- sqrt(m * p * (1 - p))
          xt <- x
          k <- ncol(xt)
          for(i in 1:k)  xt[, i] <- v * x[, i]
          yt <- v * log( p/(1 - p) )
        }
        if ( model == "Poisson" )
        {
          p <- y/m
          pmod <- (y + d) / m
          p[y == 0] <- pmod[y == 0]
          v <- sqrt( m * p )
          xt <- x
          k <- ncol(xt)
          for(i in 1:k)  xt[, i] <- v * x[, i]
          yt <- v * log(p)
        }
        if ( model == "Gamma" )
        {
          p <- y/m
          pmod <- d / m
          p[y == 0] <- pmod[y == 0]
          v <- sqrt(m * p * lambda)
          xt <- x
          k <- ncol(xt)
          for(i in 1:k)  xt[, i] <- v * x[, i]
          yt <- v * log( lambda*p )
        }
        # Step 2: Compute the estimate
        # if estim="LMWS" : call function lmsreg
        if ( estim == "LMWS" )
          { output <- lmsreg.formula ( yt ~ xt -1 , x=T, y=T,
                         nsamp=nsamp, wt=T, diagnostic=T,
                         quan=quan.lms, mve=T) }
        # if estim="LTWS" : call function ltsreg
        if ( estim == "LTWS" )
          { randomn <- max(randomn, 250 * k)
            birthsn <- max(birthsn, 250 * k + 75 * k * k)
            output <- ltsreg.formula(yt ~ xt -1, intercept=F,
                         quan=quan.lts, random.n = randomn,
                         births.n = birthsn,
                         x=T, y=T, wt=T, mcd=T) }
        # if estim="WMM" : call function mmreg developed by
        # Andreas Ruckstuhl, ETH Zuerich, 1995
        if ( estim == "WMM" )
          { zt <- matrix(cbind(xt,yt), ncol=(k+1))
            output <- mmreg( yt ~ xt -1, iter=iter) }
        return(output)
    }