Papers Analysis
Papers AnalysisΒΆ
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
Setting matplotlib
parameters
plt.rcParams["figure.figsize"] = [12, 6]
Read the data
df_paper = pd.read_csv("paper.csv", lineterminator="\n")
df_nep = pd.read_csv("nep.csv")
df_nep_inclusion = pd.read_csv("nep_inclusion.csv")
df_nep_issue = pd.read_csv("nep_issue.csv")
df_authorship = pd.read_csv("authorship.csv")
min_year = 2000
max_year = 2022
sns.histplot(data=df_paper.year[df_paper.year.between(min_year, max_year)], discrete=1)
plt.title("Research papers by year")
plt.xlabel("Year")
plt.xticks(range(min_year, max_year + 1, 1), rotation=45)
plt.show()
(
pd.DataFrame(
df_paper.groupby("institution").count().title.sort_values(ascending=False)
)
.rename(columns={"title": "count"})
.head(10)
.style.set_caption("Top 10 institutions by published papers")
)
count | |
---|---|
institution | |
University Library of Munich, Germany | 30027 |
National Bureau of Economic Research, Inc | 20463 |
Institute of Labor Economics (IZA) | 14808 |
HAL | 13249 |
arXiv.org | 10906 |
C.E.P.R. Discussion Papers | 10373 |
Agricultural and Applied Economics Association | 5529 |
The World Bank | 5236 |
CESifo | 4575 |
Center for Open Science | 4359 |
df_pa = df_paper.merge(df_authorship, left_on="pid", right_on="paper__pid", how="left")
df_pa.author__aid = df_pa.author__aid.str.replace(":", " ").str.title()
(
pd.DataFrame(
df_pa.groupby("author__aid").count().title.sort_values(ascending=False)
)
.rename(columns={"title": "count"})
.head(10)
.style.set_caption("Top 10 authors by published papers")
)
count | |
---|---|
author__aid | |
Asongu Simplice | 1559 |
Bank World | 907 |
Gupta Rangan | 748 |
Mcaleer Michael | 644 |
Fund International Monetary | 622 |
Oecd | 508 |
Tol Richard | 397 |
Odhiambo Nicholas | 358 |
Van Reenen John | 316 |
Yogyakarta Perpustakaan Stipram | 305 |
num_authors = (
pd.DataFrame(df_pa.groupby("pid").count().title.sort_values(ascending=False))
.rename(columns={"title": "count"})
.value_counts()
)
num_top = 5
height = list(num_authors[:num_top].values) + [num_authors[num_top:].sum()]
bars = list(range(1, num_top + 1)) + [f">{num_top}"]
y_pos = range(len(bars))
plt.bar(y_pos, height)
plt.xticks(y_pos, bars)
plt.title("Research papers by number of authors")
plt.xlabel("Number of authors")
plt.ylabel("Count")
plt.show()
pd.DataFrame(
df_pa.groupby("author__aid")
.institution.nunique()
.sort_values(ascending=False)
.head(10)
).style.set_caption("Top 10 authors affilieated with the most institution")
institution | |
---|---|
author__aid | |
Ongena Steven | 46 |
Ottaviano Gianmarco | 40 |
Peri Giovanni | 39 |
Clark Andrew | 39 |
Eichengreen Barry | 39 |
Verdier Thierry | 39 |
Devereux Michael | 38 |
Cabrales Antonio | 37 |
Schularick Moritz | 36 |
Spagnolo Giancarlo | 36 |