Drawing density scatter plot with matplotlib

created at 06-29-2021 views: 4

introduction

Today our task is to plot diagrams as shown in the following picture

After multi-sample integration analysis and dimensionality reduction clustering, the two-dimensional spatial density map of each sample distribution can show the spatial distribution difference of each sample, which reflects the difference between samples.  

I don't know if you like R drawing or python drawing, personally like python style, today we will use python to implement the above picture.  

simple example

Let's start with a simple, randomly generated scatter density chart drawn by data

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
x = np.random.normal(size=1000)
y = x * 3 + np.random.normal(size=1000)
# Calculate the point density
xy = np.vstack([x, y])
z = gaussian_kde(xy)(xy)
fig, ax = plt.subplots()
plt.scatter(x, y, c=z, s=10, edgecolor='',cmap = 'Reds')
plt.tick_params(labelsize=15)
plt.xticks(size = 15,family = 'Times New Roman')
plt.yticks(size = 15,family = 'Times New Roman')
plt.xlabel('x',size = 20,family = 'Times New Roman')
plt.ylabel('y',size = 20,family = 'Times New Roman')
cb = plt.colorbar(shrink = 0.5)
cb.ax.tick_params(labelsize=15)
for l in cb.ax.yaxis.get_ticklabels():
       l.set_family('Times New Roman')
plt.figtext(0.76,0.73,'density',size = 20,family = 'Times New Roman')
plt.show()

The above diagram is an example, and the next step is our single-cell data.  

data preparation

Prepare data (multi-sample single-cell clustering data)

Two-dimensional dimensionality reduction coordinates and sample information, start drawing

example1

import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
os.chdir('C:/Users/86178/Desktop')
data = pd.read_csv('XZNYLvsXZNYL_3vsXZNYL_4vsXZNYL_7_UMAP_sample.csv',index_col = 0)
x = data.loc[:,'UMAP_1']
y = data.loc[:,'UMAP_2']
# Calculate the point density  every sample
data1 = data.loc[data.Sample == 'Sample1',:]  ##第一个样本
x1 = data1.loc[:,'UMAP_1']
y1 = data1.loc[:,'UMAP_2']
xy = np.vstack([x1, y1])
z = gaussian_kde(xy)(xy)
fig, ax = plt.subplots()
plt.scatter(x, y, s=10, edgecolor='',c = 'grey')
plt.scatter(x1,y1,s = 2,c = z,edgecolor='',cmap = 'Reds')
plt.tick_params(labelsize=15)
plt.xticks(size = 15,family = 'Times New Roman')
plt.yticks(size = 15,family = 'Times New Roman')
plt.xlabel('UMAP_1',size = 20,family = 'Times New Roman')
plt.ylabel('UMAP_2',size = 20,family = 'Times New Roman')
cb = plt.colorbar(shrink = 0.5)
cb.ax.tick_params(labelsize=15)
for l in cb.ax.yaxis.get_ticklabels():
       l.set_family('Times New Roman')
plt.figtext(0.76,0.73,'density',size = 20,family = 'Times New Roman')
plt.title('Sample1',size = 25,family = 'Times New Roman')
plt.show()

But pay attention to everyone, the colorbar of the picture we want to complete is 0 to 1, indicating the density value here, we need to deal with it, and zoom, everyone pay attention to the difference between the code below and the code above

import numpy as np
import pandas as pd
import os
from sklearn import preprocessing
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
os.chdir('C:/Users/86178/Desktop')
data = pd.read_csv('XZNYLvsXZNYL_3vsXZNYL_4vsXZNYL_7_UMAP_sample.csv',index_col = 0)
x = data.loc[:,'UMAP_1']
y = data.loc[:,'UMAP_2']
# Calculate the point density  every sample
data1 = data.loc[data.Sample == 'Sample1',:]
x1 = data1.loc[:,'UMAP_1']
y1 = data1.loc[:,'UMAP_2']
xy = np.vstack([x1, y1])
z = gaussian_kde(xy)(xy)
z = preprocessing.maxabs_scale(z,axis=0, copy=True)
fig, ax = plt.subplots()
plt.scatter(x, y, s=2, edgecolor='',c = 'grey')
plt.scatter(x1,y1,s = 5,c = z,edgecolor='',cmap = 'Reds')
plt.tick_params(labelsize=15)
plt.xticks(size = 15,family = 'Times New Roman')
plt.yticks(size = 15,family = 'Times New Roman')
plt.xlabel('UMAP_1',size = 20,family = 'Times New Roman')
plt.ylabel('UMAP_2',size = 20,family = 'Times New Roman')
cb = plt.colorbar(shrink = 0.5)
cb.ax.tick_params(labelsize=15)
for l in cb.ax.yaxis.get_ticklabels():
       l.set_family('Times New Roman')
plt.figtext(0.76,0.73,'density',size = 20,family = 'Times New Roman')
plt.title('Sample1',size = 25,family = 'Times New Roman')
plt.show()

example2

Then select the data set arbitrarily for the subset

import numpy as np
import pandas as pd
import os
from sklearn import preprocessing
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
os.chdir('C:/Users/86178/Desktop')
data = pd.read_csv('XZNYLvsXZNYL_3vsXZNYL_4vsXZNYL_7_UMAP_sample.csv',index_col = 0)
x = data.loc[:,'UMAP_1']
y = data.loc[:,'UMAP_2']
# Calculate the point density  every sample
data1 = data.loc[data.Sample == 'Sample2',:]
x1 = data1.loc[:,'UMAP_1']
y1 = data1.loc[:,'UMAP_2']
xy = np.vstack([x1, y1])
z = gaussian_kde(xy)(xy)
z = preprocessing.maxabs_scale(z,axis=0, copy=True)
fig, ax = plt.subplots()
plt.scatter(x, y, s=2, edgecolor='',c = 'grey')
plt.scatter(x1,y1,s = 5,c = z,edgecolor='',cmap = 'Reds')
plt.tick_params(labelsize=15)
plt.xticks(size = 15,family = 'Times New Roman')
plt.yticks(size = 15,family = 'Times New Roman')
plt.xlabel('UMAP_1',size = 20,family = 'Times New Roman')
plt.ylabel('UMAP_2',size = 20,family = 'Times New Roman')
cb = plt.colorbar(shrink = 0.5)
cb.ax.tick_params(labelsize=15)
for l in cb.ax.yaxis.get_ticklabels():
       l.set_family('Times New Roman')
plt.figtext(0.76,0.73,'density',size = 20,family = 'Times New Roman')
plt.title('Sample2',size = 25,family = 'Times New Roman')
plt.show()

select the data set

Please log in to leave a comment.