Use ggplot2 to draw points and lines to display the Fst value between groups and the Pi value within groups

created at 11-13-2021 views: 7

Today’s post tries to imitate Figure2d in the paper

example

from Kang L, Qian L, Zheng M, et al. Genomic insights into the origin, domestication and diversification of Brassica juncea[J]. Nature genetics, 2021, 53(9): 1392-1402.

Many papers about population genetics have this graph, each point is the diversity within the population is measured by pi, and the line indicates the degree of differentiation between the populations, which is represented by fst.

Construct a data set

The data set is completely compiled randomly, meaningless

pi value data format

pi value data format

FST data format

FST data format

Read data set

The first is the pi value

library(readxl)

dfpi<-read_excel("20210913.xlsx",
                 sheet = "Sheet1")
dfpi

Add coordinates

dfpi$x<-2*sin(seq(0,300,60)*pi/180)
dfpi$y<-2*cos(seq(0,300,60)*pi/180)

plot

library(ggplot2)

cols<-c("#666116","#232c86","#e6eb49",
        "#f1a3bc","#f48b19","#64bea0")

ggplot()+
  geom_point(data=dfpi,
             aes(x=x,y=y,color=Population),
             size=40,
             show.legend = F)+
  xlim(-2.5,2.5)+ylim(-2.5,2.5)+
  theme_void()+
  geom_text(data=dfpi,aes(x=x,y=y,label=Population),
            vjust=-0.5,
            color="red")+
  geom_text(data=dfpi,aes(x=x,y=y,
                          label=paste0(round(pi_value*1000,2),
                                       '~x~',
                                       '10**-3')),
            vjust=1,
            parse=T,
            color="red")+
  scale_color_manual(values=cols)

result

construct the connection data

library(tidyverse)

read_excel("20210913.xlsx",
            sheet = "Sheet2") %>% 
  pivot_longer(!pop,
               names_to = "pop2",
               values_to = "Fst",
               values_drop_na = T) -> dffst
merge(dffst,dfpi,by.x="pop",by.y = "Population") %>% 
  select(pop,pop2,Fst,x,y) %>% 
  rename("x1"="x",
         "y1"="y") %>% 
  merge(dfpi,by.x = "pop2",by.y = "Population") %>% 
  select(pop,pop2,Fst,x,y,x1,y1) -> dffst1

Combine connection and dot diagrams together



ggplot()+
  geom_segment(data=dffst1,
               aes(x=x,y=y,xend=x1,yend=y1),
               size=1,
               lty="dashed")+
  geom_point(data=dfpi,
             aes(x=x,y=y,color=Population),
             size=40,
             show.legend = F)+
  xlim(-2.5,2.5)+ylim(-2.5,2.5)+
  theme_void()+
  geom_text(data=dfpi,aes(x=x,y=y,label=Population),
            vjust=-0.5,
            color="red")+
  geom_text(data=dfpi,aes(x=x,y=y,
                          label=paste0(round(pi_value*1000,2),
                                       '~x~',
                                       '10**-3')),
            vjust=1,
            parse=T,
            color="red")+
  scale_color_manual(values=cols)

final result

Finally, add the value of Fst to the graph

I can’t think of a better way to add the text to the right place here. I can only add it with the code first, and then use the AI software to edit the image.

library(ggrepel)

ggplot()+
  geom_segment(data=dffst1,
               aes(x=x,y=y,xend=x1,yend=y1),
               size=1,
               lty="dashed")+
  geom_text_repel(data=dffst1,aes(x=(x+x1)/2,
                                  y=(y+y1)/2,
                                  label=Fst))+
  geom_point(data=dfpi,
             aes(x=x,y=y,color=Population),
             size=40,
             show.legend = F)+
  xlim(-2.5,2.5)+ylim(-2.5,2.5)+
  theme_void()+
  geom_text(data=dfpi,aes(x=x,y=y,label=Population),
            vjust=-0.5,
            color="red")+
  geom_text(data=dfpi,aes(x=x,y=y,
                          label=paste0(round(pi_value*1000,2),
                                       '~x~',
                                       '10**-3')),
            vjust=1,
            parse=T,
            color="red")+
  scale_color_manual(values=cols)

final result

created at:11-13-2021
edited at: 11-13-2021: