ggplot2 plot scatter plots and adds fitting curves and text labels

created at 11-13-2021 views: 5

Today’s tweet, let’s repeat Figure 5 in the paper

The data set uses TableS4, some of the data are as follows

data set

Load the required R package

library(readxl)
library(tidyverse)
library(ggplot2)
library(ggrepel)

Organize the data into the format required for drawing


df<-read_excel("mmc4.xlsx",
               skip = 1) %>% 
  select(2,5,9,10,11) %>% 
  rename('V2'=`Common name`,
         'V1'=`conservation status`,
         'V3'=`# Missense`,
         'V4'=`# LoF mutation`,
         'V7'=`# Silent`) %>% 
  mutate(V5=V3/V7,
         V6=V4/V7) %>% 
  select(V1,V2,V5,V6) %>% 
  group_by(V1,V2) %>% 
  summarise(V5=mean(V5),
            V6=mean(V6)) %>% 
  mutate(V3=case_when(
    V1 == "Least Concern" | V1 == "Least concern" | V1 == "Near Threatened" ~ 'A',
    TRUE ~ "B"
  ))
head(df)

Drawing code

pdf(file = "output.pdf",
    width = 10,
    height = 8,
    family = "serif")
plota = ggplot(data = df, aes(x=V5, y=V6)) +
  geom_point(aes(color=V3, shape=V3),size=4)+
  geom_text_repel(aes(label=V2),size=4) + 
  scale_x_continuous(name = "mean rate of Missense / Slient") + 
  scale_y_continuous(name = "mean value of LoF mutation rate") +  
  geom_smooth(method = "lm", 
              formula = y~x, 
              color="black",
              size=1, se=F) + 
  scale_shape_manual(values = c(15,19),
                     labels=c("Least concern, Data deficient, Near threatened",
                              "Vulnerable, Endangered, Critically endangered"))+
  scale_color_manual(values = rev(c("#D55E00","#999999")),
                     labels=c("Least concern, Data deficient, Near threatened",
                              "Vulnerable, Endangered, Critically endangered"))+ 
  annotate("text", x=0.6, y=0.03,
           label = "atop(italic(R) ^ 2 == 0.61, 'P value = 9.43e-5')", 
           parse=T, size=6) +  
  theme(panel.background = element_blank(),
               panel.grid = element_blank(),
               axis.line  = element_line(),
               axis.text = element_text(size = 12),
               axis.title = element_text(size = 12),
               legend.position = c(0.3,0.9),
               legend.title = element_blank()
)
print(plota)
dev.off()

final result

created at:11-13-2021
edited at: 11-13-2021: