# Drawing density scatter plot with matplotlib

created at 06-29-2021 views: 4

## introduction¶

Today our task is to plot diagrams as shown in the following picture After multi-sample integration analysis and dimensionality reduction clustering, the two-dimensional spatial density map of each sample distribution can show the spatial distribution difference of each sample, which reflects the difference between samples.

I don't know if you like R drawing or python drawing, personally like python style, today we will use python to implement the above picture.

## simple example¶

Let's start with a simple, randomly generated scatter density chart drawn by data

``````import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
x = np.random.normal(size=1000)
y = x * 3 + np.random.normal(size=1000)
# Calculate the point density
xy = np.vstack([x, y])
z = gaussian_kde(xy)(xy)
fig, ax = plt.subplots()
plt.scatter(x, y, c=z, s=10, edgecolor='',cmap = 'Reds')
plt.tick_params(labelsize=15)
plt.xticks(size = 15,family = 'Times New Roman')
plt.yticks(size = 15,family = 'Times New Roman')
plt.xlabel('x',size = 20,family = 'Times New Roman')
plt.ylabel('y',size = 20,family = 'Times New Roman')
cb = plt.colorbar(shrink = 0.5)
cb.ax.tick_params(labelsize=15)
for l in cb.ax.yaxis.get_ticklabels():
l.set_family('Times New Roman')
plt.figtext(0.76,0.73,'density',size = 20,family = 'Times New Roman')
plt.show()
`````` The above diagram is an example, and the next step is our single-cell data.

## data preparation¶

Prepare data (multi-sample single-cell clustering data) Two-dimensional dimensionality reduction coordinates and sample information, start drawing

## example1¶

``````import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
os.chdir('C:/Users/86178/Desktop')
x = data.loc[:,'UMAP_1']
y = data.loc[:,'UMAP_2']
# Calculate the point density  every sample
data1 = data.loc[data.Sample == 'Sample1',:]  ##第一个样本
x1 = data1.loc[:,'UMAP_1']
y1 = data1.loc[:,'UMAP_2']
xy = np.vstack([x1, y1])
z = gaussian_kde(xy)(xy)
fig, ax = plt.subplots()
plt.scatter(x, y, s=10, edgecolor='',c = 'grey')
plt.scatter(x1,y1,s = 2,c = z,edgecolor='',cmap = 'Reds')
plt.tick_params(labelsize=15)
plt.xticks(size = 15,family = 'Times New Roman')
plt.yticks(size = 15,family = 'Times New Roman')
plt.xlabel('UMAP_1',size = 20,family = 'Times New Roman')
plt.ylabel('UMAP_2',size = 20,family = 'Times New Roman')
cb = plt.colorbar(shrink = 0.5)
cb.ax.tick_params(labelsize=15)
for l in cb.ax.yaxis.get_ticklabels():
l.set_family('Times New Roman')
plt.figtext(0.76,0.73,'density',size = 20,family = 'Times New Roman')
plt.title('Sample1',size = 25,family = 'Times New Roman')
plt.show()
`````` But pay attention to everyone, the colorbar of the picture we want to complete is 0 to 1, indicating the density value here, we need to deal with it, and zoom, everyone pay attention to the difference between the code below and the code above

``````import numpy as np
import pandas as pd
import os
from sklearn import preprocessing
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
os.chdir('C:/Users/86178/Desktop')
x = data.loc[:,'UMAP_1']
y = data.loc[:,'UMAP_2']
# Calculate the point density  every sample
data1 = data.loc[data.Sample == 'Sample1',:]
x1 = data1.loc[:,'UMAP_1']
y1 = data1.loc[:,'UMAP_2']
xy = np.vstack([x1, y1])
z = gaussian_kde(xy)(xy)
z = preprocessing.maxabs_scale(z,axis=0, copy=True)
fig, ax = plt.subplots()
plt.scatter(x, y, s=2, edgecolor='',c = 'grey')
plt.scatter(x1,y1,s = 5,c = z,edgecolor='',cmap = 'Reds')
plt.tick_params(labelsize=15)
plt.xticks(size = 15,family = 'Times New Roman')
plt.yticks(size = 15,family = 'Times New Roman')
plt.xlabel('UMAP_1',size = 20,family = 'Times New Roman')
plt.ylabel('UMAP_2',size = 20,family = 'Times New Roman')
cb = plt.colorbar(shrink = 0.5)
cb.ax.tick_params(labelsize=15)
for l in cb.ax.yaxis.get_ticklabels():
l.set_family('Times New Roman')
plt.figtext(0.76,0.73,'density',size = 20,family = 'Times New Roman')
plt.title('Sample1',size = 25,family = 'Times New Roman')
plt.show()
`````` ## example2¶

Then select the data set arbitrarily for the subset

``````import numpy as np
import pandas as pd
import os
from sklearn import preprocessing
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
os.chdir('C:/Users/86178/Desktop')
x = data.loc[:,'UMAP_1']
y = data.loc[:,'UMAP_2']
# Calculate the point density  every sample
data1 = data.loc[data.Sample == 'Sample2',:]
x1 = data1.loc[:,'UMAP_1']
y1 = data1.loc[:,'UMAP_2']
xy = np.vstack([x1, y1])
z = gaussian_kde(xy)(xy)
z = preprocessing.maxabs_scale(z,axis=0, copy=True)
fig, ax = plt.subplots()
plt.scatter(x, y, s=2, edgecolor='',c = 'grey')
plt.scatter(x1,y1,s = 5,c = z,edgecolor='',cmap = 'Reds')
plt.tick_params(labelsize=15)
plt.xticks(size = 15,family = 'Times New Roman')
plt.yticks(size = 15,family = 'Times New Roman')
plt.xlabel('UMAP_1',size = 20,family = 'Times New Roman')
plt.ylabel('UMAP_2',size = 20,family = 'Times New Roman')
cb = plt.colorbar(shrink = 0.5)
cb.ax.tick_params(labelsize=15)
for l in cb.ax.yaxis.get_ticklabels():
l.set_family('Times New Roman')
plt.figtext(0.76,0.73,'density',size = 20,family = 'Times New Roman')
plt.title('Sample2',size = 25,family = 'Times New Roman')
plt.show()
`````` 