use ggpubr to make data visualization more elegant

created at 06-27-2021 views: 5

ggpubr was created by Alboukadel Kassambara, a visualization package based on ggplot2. It is mainly used to draw graphics that meet the requirements of publishing.

Installation and loading

Install from CRAN:

install.packages("ggpubr")

Or install the latest version from GitHub:

# Install
if(!require(devtools)) install.packages("devtools")
devtools::install_github("kassambara/ggpubr")

Load package:

library(ggplot2)
library(ggpubr)

Distribution diagram

# Build the data set
set.seed(1234)

wdata = data.frame(
   sex = factor(rep(c("F", "M"), each=200)),
   weight = c(rnorm(200, 55), rnorm(200, 58)))
head(wdata, 4)
  • set.seed() sets the seed for generating random numbers. The seed is to make the result reproducible and reproduce the result. If the seed is not set, the random number generated cannot be reproduced.
> head(wdata, 4)
  sex   weight
1   F 53.79293
2   F 55.27743
3   F 56.08444
4   F 52.65430

Density distribution diagram

ggdensityggdensity(wdata, x = "weight",
   add = "mean", rug = TRUE,
   color = "sex", fill = "sex",
   palette = c("#00AFBB", "#E7B800"))

detailed parameters:

Usage
ggdensity(
  data,
  x,
  y = "..density..",
  combine = FALSE, # Whether to facet for multi-type data
  merge = FALSE, # Whether to merge multiple types of data
  color = "black",# Line color
  fill = NA,#Line #fill color
  palette = NULL,# Custom color drawing board
  size = NULL,#Point, line size
  linetype = "solid",#Line type
  alpha = 0.5,#transparency
  title = NULL,#set title
  xlab = NULL,#set x axis
  ylab = NULL,#set y axis
  facet.by = NULL,#Set up sub-group faces
  panel.labs = NULL,#Set the title of each group of facets
  short.panel.labs = TRUE,
  add = c("none", "mean", "median"),#Add mean or median line
  add.params = list(linetype = "dashed"),#Add other parameters
  rug = FALSE,#Whether to add marginal line
  label = NULL,#Set column labels
  font.label = list(size = 11, color = "black"), #Set label font
  label.select = NULL,
  repel = FALSE,#Whether to avoid font overlap
  label.rectangle = FALSE,#Whether to add a box to the label
  ggtheme = theme_pubr(),#Set drawing theme
  ...
)

Histogram

gghistogram(wdata, x = "weight",
   add = "mean", rug = TRUE,
   color = "sex", fill = "sex",
   palette = c("#00AFBB", "#E7B800"))

Box plot and violin plot

# Import built-in data
data("ToothGrowth")
df <- ToothGrowth
head(df, 4)
> head(df, 4)
   len supp dose
1  4.2   VC  0.5
2 11.5   VC  0.5
3  7.3   VC  0.5
4  5.8   VC  0.5

Box plot

p <- ggboxplot(df,
               x = "dose", 
               y = "len",
               color = "dose", 
               palette =c("#00AFBB", "#E7B800", "#FC4E07"),
               add = "jitter",
               shape = "dose")
p
# Add P value
# Create a group to be compared
my_comparisons <- list( c("0.5", "1"), c("1", "2"), c("0.5", "2") )
p + stat_compare_means(comparisons = my_comparisons)+ 
  stat_compare_means(label.y = 50)                   

detailed parameters

stat_compare_means(
  mapping = NULL,
  data = NULL,
  method = NULL,#Inspection method see the table below
  paired = FALSE,#Is it paired?
  method.args = list(),#Additional parameters used for inspection, such as method.args = list(alternative = "greater")
  ref.group = NULL,#Designated control group
  comparisons = NULL,#Contains a list of comparison groups
  hide.ns = FALSE,#Whether to hide the indifference flag ns
  label.sep = ", ",#The string separating terms. The default is "," to separate the correlation coefficient and p value
  label = NULL,#label
  label.x.npc = "left",#X-axis position of label
  label.y.npc = "top",#Y-axis position of label
  #Numbers 0~1, or characters ('right','left','center','centre','middle')
  label.x = NULL,
  label.y = NULL,#Specify a value, which means the absolute coordinate position of the display label
  vjust = 0,#Move text up or down
  tip.length = 0.03,
  bracket.size = 0.3,
  step.increase = 0,
  symnum.args = list(),#Default symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "* ", "ns")). You can modify it yourself
  geom = "text",
  position = "identity",
  na.rm = FALSE,#If it is FALSE (default value), the missing value will be deleted by warning. If it is true, silently delete missing values.
  show.legend = NA,#Whether to include a legend
  inherit.aes = TRUE,
  ...
)

Commonly used statistical methods:

methodR functiondescription
T-testt.test()Compare two groups (parametric test)
Wilcoxon testwilcox.test() Compare two groups (non-parametric test)
ANOVA aov()Compare multiple groups (parametric test)
Kruskal-Walliskruskal.test()Compare multiple groups (non-parametric test)

Violin Diagram + Box Plot

ggviolin(df, 
         x = "dose", 
         y = "len", 
         fill = "dose",
         palette = c("#00AFBB", "#E7B800", "#FC4E07"),
         add = "boxplot",
         add.params = list(fill = "white"))+ 
         stat_compare_means(comparisons = my_comparisons, label = "p.signif")+ 
         stat_compare_means(label.y = 50)                                      

Bar graph

# Import built-in data
data("mtcars")
dfm <- mtcars
# Convert cyl variable to factor
dfm$cyl <- as.factor(dfm$cyl)
# Add name column
dfm$name <- rownames(dfm)
#Check data
head(dfm[, c("name", "wt", "mpg", "cyl")])
> head(dfm[, c("name", "wt", "mpg", "cyl")])
                               name    wt  mpg cyl
Mazda RX4                 Mazda RX4 2.620 21.0   6
Mazda RX4 Wag         Mazda RX4 Wag 2.875 21.0   6
Datsun 710               Datsun 710 2.320 22.8   4
Hornet 4 Drive       Hornet 4 Drive 3.215 21.4   6
Hornet Sportabout Hornet Sportabout 3.440 18.7   8
Valiant                     Valiant 3.460 18.1   6

Sequence bar chart

ggbarplot(dfm, x = "name", y = "mpg",
          fill = "cyl",               #Fill colors according to cyl groups
          color = "white",            #Set the color of the bar border to white
          palette = "jco",            # jco magazine color
          sort.val = "desc",          # Descending
          sort.by.groups = FALSE,     # Not sorted by group
          x.text.angle = 90           # Rotate the x-axis text angle vertically
          )
ggbarplot(dfm, x = "name", y = "mpg",
          fill = "cyl",              
          color = "white",            
          palette = "jco",           
          sort.val = "asc",           # Ascending
          sort.by.groups = TRUE,      # Group first
          x.text.angle = 90         
          )

Deviation diagram

The deviation graph shows the deviation of the quantitative value from the reference value.

The z-score, also known as the standard score, is the process of dividing the difference between a number and the average by the standard deviation. In statistics, the standard score is the number of symbols whose value of an observation or data point is higher than the standard deviation of the average of the observed value or measurement value.

#Calculate the z-score of mpg data
dfm$mpg_z <- (dfm$mpg -mean(dfm$mpg))/sd(dfm$mpg)
dfm$mpg_grp <- factor(ifelse(dfm$mpg_z < 0, "low", "high"),
                     levels = c("low", "high")) #Group by z score and add as a new column. Easy to use code can be remembered
head(dfm[, c("name", "wt", "mpg", "mpg_z", "mpg_grp", "cyl")])
> head(dfm[, c("name", "wt", "mpg", "mpg_z", "mpg_grp", "cyl")])
                               name    wt  mpg      mpg_z mpg_grp cyl
Mazda RX4                 Mazda RX4 2.620 21.0  0.1508848    high   6
Mazda RX4 Wag         Mazda RX4 Wag 2.875 21.0  0.1508848    high   6
Datsun 710               Datsun 710 2.320 22.8  0.4495434    high   4
Hornet 4 Drive       Hornet 4 Drive 3.215 21.4  0.2172534    high   6
Hornet Sportabout Hornet Sportabout 3.440 18.7 -0.2307345     low   8
Valiant                     Valiant 3.460 18.1 -0.3302874     low   6
ggbarplot(dfm, x = "name", y = "mpg_z",
          fill = "mpg_grp",           
          color = "white",            
          palette = "jco",          
          sort.val = "asc",           
          sort.by.groups = FALSE,     
          x.text.angle = 90,          
          ylab = "MPG z-score",
          xlab = FALSE,
          legend.title = "MPG Group"
          )
ggbarplot(dfm, x = "name", y = "mpg_z",
          fill = "mpg_grp",           
          color = "white",            
          palette = "jco",            
          sort.val = "desc",          # descending
          sort.by.groups = FALSE,     
          x.text.angle = 90,          
          ylab = "MPG z-score",
          legend.title = "MPG Group",
          rotate = TRUE,              # vertical
          ggtheme = theme_minimal()
          )

Dot plot
Lollipop illustration

ggdotchart(dfm, x = "name", y = "mpg",
           color = "cyl",                                #Color grouping
           palette = c("#00AFBB", "#E7B800", "#FC4E07"), #Regular palette
           sorting = "ascending",                        # Ascending
           add = "segments",                             # Add the segment with y=0 to the point
           ggtheme = theme_pubr()                        # theme
           )
ggdotchart(dfm, x = "name", y = "mpg",
           color = "cyl",                                
           palette = c("#00AFBB", "#E7B800", "#FC4E07"), 
           sorting = "descending",                       #Descending
           add = "segments",                            
           rotate = TRUE,                                #Vertical
           group = "cyl",                                #grouping
           dot.size = 6,                                 
           label = round(dfm$mpg),                       
           font.label = list(color = "white", size = 9,
                             vjust = 0.5),               #Adjust label parameters
           ggtheme = theme_pubr()                        
           )
ggdotchart(dfm, x = "name", y = "mpg_z",
           color = "cyl",                               
           palette = c("#00AFBB", "#E7B800", "#FC4E07"), 
           sorting = "descending",                      
           add = "segments",                             
           add.params = list(color = "lightgray", size = 2), # Change the bar (line) color and size
           group = "cyl",                                
           dot.size = 6,                                
           label = round(dfm$mpg_z,1),                        
           font.label = list(color = "white", size = 9,
                             vjust = 0.5),               
           ggtheme = theme_pubr()                       
           )+
  geom_hline(yintercept = 0, linetype = 2, color = "lightgray") #Draw horizontal line

Cleveland dot chart

ggdotchart(dfm, x = "name", y = "mpg",
           color = "cyl",                               
           palette = c("#00AFBB", "#E7B800", "#FC4E07"), 
           sorting = "descending",                       
           rotate = TRUE,                                
           dot.size = 2,                                 
           y.text.col = TRUE,                            
           ggtheme = theme_pubr()                        
           )+
  theme_cleveland()                                      #Cleveland主题

Please log in to leave a comment.