ggpubr was created by Alboukadel Kassambara, a visualization package based on ggplot2. It is mainly used to draw graphics that meet the requirements of publishing.
Install from CRAN:
install.packages("ggpubr")
Or install the latest version from GitHub:
# Install
if(!require(devtools)) install.packages("devtools")
devtools::install_github("kassambara/ggpubr")
Load package:
library(ggplot2)
library(ggpubr)
# Build the data set
set.seed(1234)
wdata = data.frame(
sex = factor(rep(c("F", "M"), each=200)),
weight = c(rnorm(200, 55), rnorm(200, 58)))
head(wdata, 4)
set.seed()
sets the seed for generating random numbers. The seed is to make the result reproducible and reproduce the result. If the seed is not set, the random number generated cannot be reproduced.
> head(wdata, 4)
sex weight
1 F 53.79293
2 F 55.27743
3 F 56.08444
4 F 52.65430
ggdensityggdensity(wdata, x = "weight",
add = "mean", rug = TRUE,
color = "sex", fill = "sex",
palette = c("#00AFBB", "#E7B800"))
detailed parameters:
Usage
ggdensity(
data,
x,
y = "..density..",
combine = FALSE, # Whether to facet for multi-type data
merge = FALSE, # Whether to merge multiple types of data
color = "black",# Line color
fill = NA,#Line #fill color
palette = NULL,# Custom color drawing board
size = NULL,#Point, line size
linetype = "solid",#Line type
alpha = 0.5,#transparency
title = NULL,#set title
xlab = NULL,#set x axis
ylab = NULL,#set y axis
facet.by = NULL,#Set up sub-group faces
panel.labs = NULL,#Set the title of each group of facets
short.panel.labs = TRUE,
add = c("none", "mean", "median"),#Add mean or median line
add.params = list(linetype = "dashed"),#Add other parameters
rug = FALSE,#Whether to add marginal line
label = NULL,#Set column labels
font.label = list(size = 11, color = "black"), #Set label font
label.select = NULL,
repel = FALSE,#Whether to avoid font overlap
label.rectangle = FALSE,#Whether to add a box to the label
ggtheme = theme_pubr(),#Set drawing theme
...
)
gghistogram(wdata, x = "weight",
add = "mean", rug = TRUE,
color = "sex", fill = "sex",
palette = c("#00AFBB", "#E7B800"))
# Import built-in data
data("ToothGrowth")
df <- ToothGrowth
head(df, 4)
> head(df, 4)
len supp dose
1 4.2 VC 0.5
2 11.5 VC 0.5
3 7.3 VC 0.5
4 5.8 VC 0.5
p <- ggboxplot(df,
x = "dose",
y = "len",
color = "dose",
palette =c("#00AFBB", "#E7B800", "#FC4E07"),
add = "jitter",
shape = "dose")
p
# Add P value
# Create a group to be compared
my_comparisons <- list( c("0.5", "1"), c("1", "2"), c("0.5", "2") )
p + stat_compare_means(comparisons = my_comparisons)+
stat_compare_means(label.y = 50)
detailed parameters
stat_compare_means(
mapping = NULL,
data = NULL,
method = NULL,#Inspection method see the table below
paired = FALSE,#Is it paired?
method.args = list(),#Additional parameters used for inspection, such as method.args = list(alternative = "greater")
ref.group = NULL,#Designated control group
comparisons = NULL,#Contains a list of comparison groups
hide.ns = FALSE,#Whether to hide the indifference flag ns
label.sep = ", ",#The string separating terms. The default is "," to separate the correlation coefficient and p value
label = NULL,#label
label.x.npc = "left",#X-axis position of label
label.y.npc = "top",#Y-axis position of label
#Numbers 0~1, or characters ('right','left','center','centre','middle')
label.x = NULL,
label.y = NULL,#Specify a value, which means the absolute coordinate position of the display label
vjust = 0,#Move text up or down
tip.length = 0.03,
bracket.size = 0.3,
step.increase = 0,
symnum.args = list(),#Default symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "* ", "ns")). You can modify it yourself
geom = "text",
position = "identity",
na.rm = FALSE,#If it is FALSE (default value), the missing value will be deleted by warning. If it is true, silently delete missing values.
show.legend = NA,#Whether to include a legend
inherit.aes = TRUE,
...
)
Commonly used statistical methods:
method | R function | description |
T-test | t.test() | Compare two groups (parametric test) |
Wilcoxon test | wilcox.test() | Compare two groups (non-parametric test) |
ANOVA | aov() | Compare multiple groups (parametric test) |
Kruskal-Wallis | kruskal.test() | Compare multiple groups (non-parametric test) |
ggviolin(df,
x = "dose",
y = "len",
fill = "dose",
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
add = "boxplot",
add.params = list(fill = "white"))+
stat_compare_means(comparisons = my_comparisons, label = "p.signif")+
stat_compare_means(label.y = 50)
# Import built-in data
data("mtcars")
dfm <- mtcars
# Convert cyl variable to factor
dfm$cyl <- as.factor(dfm$cyl)
# Add name column
dfm$name <- rownames(dfm)
#Check data
head(dfm[, c("name", "wt", "mpg", "cyl")])
> head(dfm[, c("name", "wt", "mpg", "cyl")])
name wt mpg cyl
Mazda RX4 Mazda RX4 2.620 21.0 6
Mazda RX4 Wag Mazda RX4 Wag 2.875 21.0 6
Datsun 710 Datsun 710 2.320 22.8 4
Hornet 4 Drive Hornet 4 Drive 3.215 21.4 6
Hornet Sportabout Hornet Sportabout 3.440 18.7 8
Valiant Valiant 3.460 18.1 6
ggbarplot(dfm, x = "name", y = "mpg",
fill = "cyl", #Fill colors according to cyl groups
color = "white", #Set the color of the bar border to white
palette = "jco", # jco magazine color
sort.val = "desc", # Descending
sort.by.groups = FALSE, # Not sorted by group
x.text.angle = 90 # Rotate the x-axis text angle vertically
)
ggbarplot(dfm, x = "name", y = "mpg",
fill = "cyl",
color = "white",
palette = "jco",
sort.val = "asc", # Ascending
sort.by.groups = TRUE, # Group first
x.text.angle = 90
)
The deviation graph shows the deviation of the quantitative value from the reference value.
The z-score, also known as the standard score, is the process of dividing the difference between a number and the average by the standard deviation. In statistics, the standard score is the number of symbols whose value of an observation or data point is higher than the standard deviation of the average of the observed value or measurement value.
#Calculate the z-score of mpg data
dfm$mpg_z <- (dfm$mpg -mean(dfm$mpg))/sd(dfm$mpg)
dfm$mpg_grp <- factor(ifelse(dfm$mpg_z < 0, "low", "high"),
levels = c("low", "high")) #Group by z score and add as a new column. Easy to use code can be remembered
head(dfm[, c("name", "wt", "mpg", "mpg_z", "mpg_grp", "cyl")])
> head(dfm[, c("name", "wt", "mpg", "mpg_z", "mpg_grp", "cyl")])
name wt mpg mpg_z mpg_grp cyl
Mazda RX4 Mazda RX4 2.620 21.0 0.1508848 high 6
Mazda RX4 Wag Mazda RX4 Wag 2.875 21.0 0.1508848 high 6
Datsun 710 Datsun 710 2.320 22.8 0.4495434 high 4
Hornet 4 Drive Hornet 4 Drive 3.215 21.4 0.2172534 high 6
Hornet Sportabout Hornet Sportabout 3.440 18.7 -0.2307345 low 8
Valiant Valiant 3.460 18.1 -0.3302874 low 6
ggbarplot(dfm, x = "name", y = "mpg_z",
fill = "mpg_grp",
color = "white",
palette = "jco",
sort.val = "asc",
sort.by.groups = FALSE,
x.text.angle = 90,
ylab = "MPG z-score",
xlab = FALSE,
legend.title = "MPG Group"
)
ggbarplot(dfm, x = "name", y = "mpg_z",
fill = "mpg_grp",
color = "white",
palette = "jco",
sort.val = "desc", # descending
sort.by.groups = FALSE,
x.text.angle = 90,
ylab = "MPG z-score",
legend.title = "MPG Group",
rotate = TRUE, # vertical
ggtheme = theme_minimal()
)
Dot plot
Lollipop illustration
ggdotchart(dfm, x = "name", y = "mpg",
color = "cyl", #Color grouping
palette = c("#00AFBB", "#E7B800", "#FC4E07"), #Regular palette
sorting = "ascending", # Ascending
add = "segments", # Add the segment with y=0 to the point
ggtheme = theme_pubr() # theme
)
ggdotchart(dfm, x = "name", y = "mpg",
color = "cyl",
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
sorting = "descending", #Descending
add = "segments",
rotate = TRUE, #Vertical
group = "cyl", #grouping
dot.size = 6,
label = round(dfm$mpg),
font.label = list(color = "white", size = 9,
vjust = 0.5), #Adjust label parameters
ggtheme = theme_pubr()
)
ggdotchart(dfm, x = "name", y = "mpg_z",
color = "cyl",
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
sorting = "descending",
add = "segments",
add.params = list(color = "lightgray", size = 2), # Change the bar (line) color and size
group = "cyl",
dot.size = 6,
label = round(dfm$mpg_z,1),
font.label = list(color = "white", size = 9,
vjust = 0.5),
ggtheme = theme_pubr()
)+
geom_hline(yintercept = 0, linetype = 2, color = "lightgray") #Draw horizontal line
Cleveland dot chart
ggdotchart(dfm, x = "name", y = "mpg",
color = "cyl",
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
sorting = "descending",
rotate = TRUE,
dot.size = 2,
y.text.col = TRUE,
ggtheme = theme_pubr()
)+
theme_cleveland() #Cleveland主题