In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
anime_data = pd.read_csv("Top_Anime_data.csv")
anime_data.head()
Out[1]:
| Score | Popularity | Rank | Members | Description | Synonyms | Japanese | English | Type | Episodes | ... | Premiered | Broadcast | Producers | Licensors | Studios | Source | Genres | Demographic | Duration | Rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 9.38 | 284 | 1 | 710 | During their decade-long quest to defeat the D... | Frieren at the Funeral | 葬送のフリーレン | Frieren: Beyond Journey's End | TV | 28 | ... | Fall 2023 | Fridays at 23:00 (JST) | Aniplex, Dentsu, Shogakukan-Shueisha Productio... | None found, add some | Madhouse | Manga | AdventureAdventure, DramaDrama, FantasyFantasy | ShounenShounen | 24 min. per ep. | PG-13 - Teens 13 or older |
| 1 | 9.09 | 3 | 2 | 3 | After a horrific alchemy experiment goes wrong... | Hagane no Renkinjutsushi: Fullmetal Alchemist,... | 鋼の錬金術師 FULLMETAL ALCHEMIST | Fullmetal Alchemist: Brotherhood | TV | 64 | ... | Spring 2009 | Sundays at 17:00 (JST) | Aniplex, Square Enix, Mainichi Broadcasting Sy... | Funimation, Aniplex of America | Bones | Manga | ActionAction, AdventureAdventure, DramaDrama, ... | ShounenShounen | 24 min. per ep. | R - 17+ (violence & profanity) |
| 2 | 9.07 | 13 | 3 | 2 | Eccentric scientist Rintarou Okabe has a never... | NaN | STEINS;GATE | Steins;Gate | TV | 24 | ... | Spring 2011 | Wednesdays at 02:05 (JST) | Frontier Works, Media Factory, Kadokawa Shoten... | Funimation | White Fox | Visual novel | DramaDrama, Sci-FiSci-Fi, SuspenseSuspense | NaN | 24 min. per ep. | PG-13 - Teens 13 or older |
| 3 | 9.06 | 342 | 4 | 630 | Gintoki, Shinpachi, and Kagura return as the f... | Gintama' (2015) | 銀魂° | Gintama Season 4 | TV | 51 | ... | Spring 2015 | Wednesdays at 18:00 (JST) | TV Tokyo, Aniplex, Dentsu | Funimation, Crunchyroll | Bandai Namco Pictures | Manga | ActionAction, ComedyComedy, Sci-FiSci-Fi | ShounenShounen | 24 min. per ep. | PG-13 - Teens 13 or older |
| 4 | 9.05 | 21 | 5 | 2 | Seeking to restore humanity's diminishing hope... | NaN | 進撃の巨人 Season3 Part.2 | Attack on Titan Season 3 Part 2 | TV | 10 | ... | Spring 2019 | Mondays at 00:10 (JST) | Production I.G, Dentsu, Mainichi Broadcasting ... | Funimation | Wit Studio | Manga | ActionAction, DramaDrama, SuspenseSuspense | ShounenShounen | 23 min. per ep. | R - 17+ (violence & profanity) |
5 rows × 22 columns
In [2]:
anime_data.shape
anime_data.info()
anime_data.describe()
anime_data.describe(include='O').T
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1000 entries, 0 to 999 Data columns (total 22 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Score 1000 non-null float64 1 Popularity 1000 non-null int64 2 Rank 1000 non-null int64 3 Members 1000 non-null int64 4 Description 1000 non-null object 5 Synonyms 709 non-null object 6 Japanese 999 non-null object 7 English 859 non-null object 8 Type 1000 non-null object 9 Episodes 1000 non-null object 10 Status 1000 non-null object 11 Aired 1000 non-null object 12 Premiered 569 non-null object 13 Broadcast 569 non-null object 14 Producers 1000 non-null object 15 Licensors 1000 non-null object 16 Studios 1000 non-null object 17 Source 1000 non-null object 18 Genres 771 non-null object 19 Demographic 521 non-null object 20 Duration 1000 non-null object 21 Rating 1000 non-null object dtypes: float64(1), int64(3), object(18) memory usage: 172.0+ KB
Out[2]:
| count | unique | top | freq | |
|---|---|---|---|---|
| Description | 1000 | 999 | Following their success in the qualifying roun... | 2 |
| Synonyms | 709 | 701 | The Magician's Bride, Mahoyome | 2 |
| Japanese | 999 | 953 | 僕のヒーローアカデミア | 5 |
| English | 859 | 856 | Time of Eve | 2 |
| Type | 1000 | 6 | TV | 569 |
| Episodes | 1000 | 84 | 1 | 292 |
| Status | 1000 | 2 | Finished Airing | 978 |
| Aired | 1000 | 961 | Oct 6, 2013 to Mar 30, 2014 | 3 |
| Premiered | 569 | 129 | Fall 2022 | 15 |
| Broadcast | 569 | 184 | Unknown | 58 |
| Producers | 1000 | 641 | None found, add some | 111 |
| Licensors | 1000 | 90 | None found, add some | 369 |
| Studios | 1000 | 166 | Madhouse | 56 |
| Source | 1000 | 15 | Manga | 547 |
| Genres | 771 | 237 | ActionAction, AdventureAdventure, FantasyFantasy | 43 |
| Demographic | 521 | 5 | ShounenShounen | 321 |
| Duration | 1000 | 141 | 24 min. per ep. | 259 |
| Rating | 1000 | 5 | PG-13 - Teens 13 or older | 651 |
数据可视化¶
In [3]:
# 分数分布
plt.figure(figsize=(10, 6))
sns.histplot(anime_data['Score'], bins=30, kde=True, color='skyblue')
plt.title('Distribution of Scores')
plt.xlabel('Score')
plt.ylabel('Frequency')
plt.show()
In [4]:
# 受欢迎程度分布
plt.figure(figsize=(10, 6))
sns.histplot(anime_data['Popularity'], bins=30, kde=True, color='orange')
plt.title('Popularity Distribution')
plt.xlabel('Popularity')
plt.ylabel('Frequency')
plt.show()
In [8]:
# 先处理 'Genres' 列
genres = anime_data['Genres'].str.split(',').explode().str.strip()
# 获取前15个最常见的genres,并重置索引
top_genres = genres.value_counts().head(15).reset_index()
top_genres.columns = ['Genre', 'Count']
# 使用 top_genres 的结果进行绘制
plt.figure(figsize=(12, 8))
sns.barplot(y='Genre', x='Count', data=top_genres, palette='muted')
plt.title('Top 15 Genres Distribution')
plt.xlabel('Count')
plt.ylabel('Genre')
plt.show()
In [9]:
# 分数与人气的散点图
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Score', y='Popularity', data=anime_data, alpha=0.7)
plt.title('Score vs. Popularity')
plt.xlabel('Score')
plt.ylabel('Popularity')
plt.show()
In [10]:
# 排名前十的生产商
top_producers = anime_data['Producers'].str.split(',').explode().value_counts().head(10)
plt.figure(figsize=(10, 6))
sns.barplot(x=top_producers.values, y=top_producers.index, palette='husl')
plt.title('Top 10 Producers by Count')
plt.xlabel('Count')
plt.ylabel('Producer')
plt.show()
In [11]:
# 动漫人口分布
anime_data["Demographic"].value_counts().plot.pie(figsize=(10,5))
plt.title("Distribution of Anime Demographic", loc='center',
color='red', fontsize=10, fontweight='bold')
plt.show()
labels=['Teens 13 or older', 'violence & profanity', 'Mild Nudity', 'All Ages', 'Children']
colors=['Pink','Yellow','Orange','Blue','Red']
plt.pie(anime_data["Rating"].value_counts(), labels=labels, colors=colors)
plt.axis('equal')
plt.title("Rating", loc='center', color='Blue', fontsize='15')
plt.gca().add_artist(plt.Circle(xy=(0,0),radius=0.75, facecolor='white'))
plt.show()
In [12]:
# 分数类别与来源的标准化交叉拟合
bins = [7.8, 7.9, 8.1, 8.3,9.4]
labels = ['7.8 to 7.9', '7.9 to 8.1','8.1 to 8.3','8.3 to 9.4']
anime_data['Score_Category'] = pd.cut(anime_data['Score'], bins=bins, labels=labels, right=False)
normalised_df = (pd.crosstab(anime_data["Score_Category"],anime_data["Source"],normalize="index")*100).round(2)
normalised_df.plot(kind="bar",stacked=True)
plt.xlabel('Score Category')
plt.ylabel('Percentage')
plt.title('Normalized Crosstabulation of Score Category and Source')
plt.legend(title='Source', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
In [13]:
# 收视人数与收视率的标准化交叉校正
bins = [1.0, 500.0, 1000.0]
labels = ['1 to 500', '500 to 1000']
anime_data['Members_category'] = pd.cut(anime_data['Members'], bins=bins, labels=labels, right=False)
(pd.crosstab(anime_data["Members_category"],anime_data["Rating"],normalize="index")*100).round(2).plot(kind="bar",stacked=True)
plt.title('Normalized Crosstabulation of Number of Viewers and Rating')
plt.legend(title='Rating', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
In [14]:
# 5大制作公司
anime_data["Studios"].value_counts().head(5).plot(kind="pie",autopct='%1.1f%%')
plt.title("Top 5 Production Houses")
plt.ylabel('')
plt.show()
In [15]:
# 疯人院制作的顶级动画
anime_data[anime_data["Studios"] == "Madhouse"]["Rating"].value_counts().plot(kind="pie",autopct='%1.1f%%')
plt.title("Top Anime produced by Madhouse")
plt.show()