1.2 R

1.2.1 Pacotes

Para o curso de Estatística Avançada considere o conjunto de pacotes a seguir.

# pacotes adicionais utilizados no curso - PODE LEVAR VÁRIOS MINUTOS!!
chooseCRANmirror(ind = 11) # Brazil, https://cran.fiocruz.br
install.packages('adabag', dep = T) 
install.packages('BAS', dep = T) 
install.packages('asbio', dep = T) # sudo apt-get install tk-dev
install.packages('biotools', dep = T) # sudo apt-get install libudunits2-dev tk-dev
# https://bioconductor.org/install
BiocManager::install(c('gRain'))
install.packages('bnlearn', dep = T) # sudo apt-get install libmpfr-dev
install.packages('bootstrap', dep = T)
install.packages('C50', dep = T)
install.packages('caret', dep = T)
install.packages('catnet', dep = T)
install.packages('cba', dep = T)
install.packages('CCA', dep = T)
install.packages('CCP', dep = T)
install.packages('ClusterR', dep = T)
install.packages('coefplot', dep = T)
install.packages('Compositional', dep = T)
install.packages('coronavirus', dep = T)
install.packages('data.tree', dep = T)
install.packages('deal', dep = T)
install.packages('DescTools', dep = T)
install.packages('devtools', dep = T)
install.packages('DirichletReg', dep = T)
install.packages('DT', dep = T)
install.packages('e1071', dep = T)
install.packages('ellipse', dep = T)
install.packages('extraDistr', dep = T)
install.packages('factoextra', dep = T)
install.packages('flexclust', dep = T)
install.packages('fpp3', dep = T)
install.packages('gamlss', dep = T)
install.packages('ggfortify', dep = T)
install.packages('ggstats', dep = T)
install.packages('Gmedian', dep = T)
install.packages('gRain', dep = T)
install.packages('gRbase', dep = T)
install.packages('hrbrthemes', dep = T)
install.packages('ICSNP', dep = T)
install.packages('ipred', dep = T)
install.packages('janitor', dep = T)
install.packages('jocre', dep = T)
install.packages('JOUSBoost', dep = T)
install.packages('jtools', dep = T)
install.packages('klaR', dep = T)
install.packages('LaplacesDemon', dep = T)
install.packages('lavaan', dep = T)
install.packages('lestat', dep = T)
install.packages('markovchain', dep = T)
install.packages('matlib', dep = T)
install.packages('matrixcalc', dep = T)
install.packages('mctest',  dep = T)
install.packages('mvtnorm', dep = T)
install.packages('neuralnet', dep = T)
install.packages('pcalg', dep = T)
install.packages('performance', dep = T)
install.packages('philentropy', dep = T)
install.packages('plotly', dep = T)
install.packages('PPtreeViz', dep = T)
install.packages('prinvars', dep = T)
install.packages('prophet', dep = T)
install.packages('Pursuit', dep = T)
install.packages('qqplotr', dep = T)
install.packages('rattle', dep = T)
install.packages('RColorBrewer', dep = T)
install.packages('rayshader', dep = T)
install.packages('readxl', dep = T)
install.packages('reticulate', dep = T)
# sudo apt-get install default-jre
# sudo apt-get install default-jdk
# sudo R CMD javareconf
install.packages('rJava', dep = T)
install.packages('rgl', dep = T)
install.packages('rmutil', dep = T)
install.packages('robCompositions', dep = T)
install.packages('RWeka', dep = T)
install.packages('semPlot', dep = T)
install.packages('tidyverse', dep = T)
install.packages('tree', dep = T)
install.packages('VGAM', dep = T)
install.packages('VIM', dep = T)
install.packages('visreg', dep = T)
install.packages('voice', dep = T)
install.packages('XML', dep = T)

# Instala pacotes Github
devtools::install_github('filipezabala/desempateTecnico')
devtools::install_github('filipezabala/jurimetrics')
devtools::install_github('kassambara/ggcorrplot')
devtools::install_github('souravc83/fastAdaboost'
devtools::install_github('dkahle/dirichlet') # https://github.com/dkahle/dirichlet
devtools::install_github('cran/BMhyd') 
devtools::install_github('HerveAbdi/PTCA4CATA', dependencies = TRUE)
# devtools::install_github('dustinfife/flexplavaan') # baixa mais de 400MB e pode demorar!
# install.packages(c('flexplot','semTools'), dep = T)
# install.packages('~/Downloads/dustinfife-flexplavaan-ab0ab48.tar.gz',
#                  repos = NULL, type = 'source')

# Instala BART + nonlinvarsel
packs <- c('BART','foreach')
install.packages(packs, dep = T)
url <- 'http://www.rob-mcculloch.org/chm/nonlinvarsel_0.0.1.9001.tar.gz'
download.file(url, destfile = 'temp')
install.packages('temp', repos = NULL, type = 'source')
# install.packages('drat', repos="https://cran.rstudio.com")
# drat:::addRepo('dmlc')
# install.packages('xgboost', repos='http://dmlc.ml/drat/', type = 'source')
# keras e tensorflow
# devtools::install_github('rstudio/keras')
# keras::install_keras()
# reticulate::py_config()
# reticulate::py_module_available('keras')
# install.packages('tensorflow', dep=T)

# Habilita o repositório r-universe
options(repos = c(fawda123 = 'https://fawda123.r-universe.dev',
                  CRAN = 'https://cloud.r-project.org'))
install.packages('ggord', dep = T)

# atualizando pacotes, realizar esse procedimento semanalmente
update.packages(ask = F)

1.2.2 Funções

Função plot_qda() para gráficos de QDA da Seção 10.9. Baseado em https://stackoverflow.com/questions/63782598/quadratic-discriminant-analysis-qda-plot-in-r.

plot_qda <- function(model, data, class = NULL, predict_type = "class",
                         resolution = 100, showgrid = TRUE, ...) {
  
  if(!is.null(class)) cl <- data[,class] else cl <- 1
  data <- data[,1:2]
  cn <- colnames(data)
  
  k <- length(unique(cl))
  
  data$pch <- data$col <- as.integer(cl) + 1L
  gg <- ggplot(aes_string(cn[1], cn[2]), data = data) + 
    geom_point(aes_string(col = 'as.factor(col)', shape = 'as.factor(col)'), size = 3)
  
  # make grid
  r <- sapply(data[, 1:2], range, na.rm = TRUE)
  xs <- seq(r[1, 1], r[2, 1], length.out = resolution)
  ys <- seq(r[1, 2], r[2, 2], length.out = resolution)
  
  g <- cbind(rep(xs, each = resolution), 
             rep(ys, time = resolution))
  colnames(g) <- colnames(r)
  
  g <- as.data.frame(g)
  
  ### guess how to get class labels from predict
  ### (unfortunately not very consistent between models)
  p <- predict(model, g, type = predict_type)
  if(is.list(p)) p <- p$class
  g$col <- g$pch <- as.integer(as.factor(p)) + 1L
  
  if(showgrid) 
    gg <- gg + geom_point(aes_string(x = cn[1], y = cn[2], col = 'as.factor(col)'), data = g, shape = 20, size = 1)
  
  gg + geom_contour(aes_string(x = cn[1], y = cn[2], z = 'col'), data = g, inherit.aes = FALSE)
}
# Uso
plot_qda(model, iris, class = 'Species')

1.2.3 Limites

.Machine é uma variável que contém informações sobre as características numéricas da máquina em que R está sendo executado, como o maior duplo ou inteiro e a precisão da máquina. O algoritmo é baseado na sub-rotina MACHAR de (Cody 1988). Veja ?.Machine para mais detalhes.

.Machine
## $double.eps
## [1] 2.220446e-16
## 
## $double.neg.eps
## [1] 1.110223e-16
## 
## $double.xmin
## [1] 2.225074e-308
## 
## $double.xmax
## [1] 1.797693e+308
## 
## $double.base
## [1] 2
## 
## $double.digits
## [1] 53
## 
## $double.rounding
## [1] 5
## 
## $double.guard
## [1] 0
## 
## $double.ulp.digits
## [1] -52
## 
## $double.neg.ulp.digits
## [1] -53
## 
## $double.exponent
## [1] 11
## 
## $double.min.exp
## [1] -1022
## 
## $double.max.exp
## [1] 1024
## 
## $integer.max
## [1] 2147483647
## 
## $sizeof.long
## [1] 8
## 
## $sizeof.longlong
## [1] 8
## 
## $sizeof.longdouble
## [1] 8
## 
## $sizeof.pointer
## [1] 8
## 
## $sizeof.time_t
## [1] 8

Referências

Cody, William J. 1988. “Algorithm 665: Machar: A Subroutine to Dynamically Determined Machine Parameters.” ACM Transactions on Mathematical Software (TOMS) 14 (4): 303–11. https://doi.org/10.1145/50063.51907.