In [ ]:
# 六边形分箱图,或称六边形分箱,对于可视化二维数据分布非常有效,特别是当数据点密集时。
# 它将数据空间划分为六边形箱,颜色表示每个箱中的点数,清晰地表示数据分布。
# 以下是一个使用 Python 和 Matplotlib 创建六边形分箱图的示例,展示了空气质量指数 (AQI) 与医院访问之间的相关性。
In [2]:
import numpy as np
import matplotlib.pyplot as plt
# Simulated data
np.random.seed(0) # Ensure reproducibility
n_points = 10000
x = np.random.rand(n_points) * 100 # Air Quality Index (AQI) range from 0 to 100
y = 5 * np.sin(x * np.pi / 50) + np.random.randn(n_points) * 15 # Simulated hospital visits, related to AQI but with noise
# Create a new figure
fig, ax = plt.subplots(figsize=(10, 8))
# Use hexbin to create a hexagonal bin plot
hb = ax.hexbin(x, y, gridsize=20, cmap='viridis', mincnt=1) # Set grid size, colormap, and minimum count threshold
# Add title and axis labels
ax.set_title('Relationship between Air Quality Index (AQI) and Hospital Visits')
ax.set_xlabel('Air Quality Index (AQI)')
ax.set_ylabel('Hospital Visits')
# Show the figure
plt.colorbar(hb, ax=ax, label='Number of Data Points') # Add color bar and set label
plt.show()
In [3]:
import matplotlib.pyplot as plt
import numpy as np
# Fixing random state for reproducibility
np.random.seed(19680801)
n = 100_000
x = np.random.standard_normal(n)
y = 2.0 + 3.0 * x + 4.0 * np.random.standard_normal(n)
xlim = x.min(), x.max()
ylim = y.min(), y.max()
fig, (ax0, ax1) = plt.subplots(ncols=2, sharey=True, figsize=(9, 4))
hb = ax0.hexbin(x, y, gridsize=50, cmap='inferno')
ax0.set(xlim=xlim, ylim=ylim)
ax0.set_title("Hexagon binning")
cb = fig.colorbar(hb, ax=ax0, label='counts')
hb = ax1.hexbin(x, y, gridsize=50, bins='log', cmap='inferno')
ax1.set(xlim=xlim, ylim=ylim)
ax1.set_title("With a log color scale")
cb = fig.colorbar(hb, ax=ax1, label='counts')
plt.show()
In [ ]: