Field Analysis
Contents
Field Analysis¶
Latest stats are from 2021, either because scimago stopped collecting data, or newer data is deemed unreliable regarding citations
import numpy as np
import pandas as pd
yearly_df = pd.read_csv("journal_record.csv")
df = pd.read_csv("journal.csv").merge(
yearly_df.loc[lambda _df: _df["year"] == 2021],
left_on="sourceid",
right_on="journal__sourceid",
)
area_df = pd.read_csv("journal_area.csv")
def get_cats(df):
_cs = df["categories"].str.split("; ")
idk = "sourceid"
_df = pd.DataFrame(
{"catbase": _cs.sum(), idk: np.repeat(df[idk].values, _cs.str.len())}
)
return pd.concat([_df, _df["catbase"].str.extract("(.*) \((.*)\)")], axis=1).assign(
field=lambda df: np.where(df.loc[:, 0].isna(), df["catbase"], df.loc[:, 0])
).rename(columns={1: "Q"}).loc[:, ["field", "Q", idk]]
cat_base = get_cats(df.head(10000)).merge(df)
field_pivot = (
cat_base.fillna("no Q")
.pivot_table(index="field", columns="Q", values="sourceid", aggfunc="count")
.fillna(0)
.assign(s=lambda df: df.sum(axis=1))
.sort_values("s", ascending=False)
.loc[:, lambda df: df.columns.str.startswith("Q")]
)
field_pivot.head(15).style.background_gradient(axis=1).set_caption(
"Count values by fields"
)
Q | Q1 | Q2 | Q3 | Q4 |
---|---|---|---|---|
field | ||||
Medicine (miscellaneous) | 389.000000 | 426.000000 | 374.000000 | 307.000000 |
Sociology and Political Science | 197.000000 | 97.000000 | 58.000000 | 50.000000 |
Mechanical Engineering | 114.000000 | 87.000000 | 64.000000 | 68.000000 |
Geography, Planning and Development | 98.000000 | 86.000000 | 75.000000 | 74.000000 |
Electrical and Electronic Engineering | 96.000000 | 71.000000 | 60.000000 | 66.000000 |
History | 111.000000 | 77.000000 | 63.000000 | 63.000000 |
Ecology, Evolution, Behavior and Systematics | 99.000000 | 92.000000 | 82.000000 | 39.000000 |
Economics and Econometrics | 127.000000 | 87.000000 | 48.000000 | 35.000000 |
Condensed Matter Physics | 75.000000 | 79.000000 | 73.000000 | 57.000000 |
Education | 152.000000 | 66.000000 | 31.000000 | 25.000000 |
Psychiatry and Mental Health | 95.000000 | 76.000000 | 60.000000 | 38.000000 |
Chemistry (miscellaneous) | 57.000000 | 78.000000 | 69.000000 | 48.000000 |
Materials Science (miscellaneous) | 62.000000 | 72.000000 | 60.000000 | 52.000000 |
Public Health, Environmental and Occupational Health | 71.000000 | 69.000000 | 62.000000 | 37.000000 |
Mechanics of Materials | 76.000000 | 52.000000 | 52.000000 | 45.000000 |
def draw_table(df):
return (
df.pivot_table(index="field", columns="Q")
.loc[field_pivot.head(15).index]
.loc[:, lambda df: df.isna().mean() < 0.5]
.style.background_gradient(axis=0)
)
(
cat_base.groupby(["field", "Q"])[["h_index", "journal_rating", "total_docs_3years"]]
.mean()
.pipe(draw_table)
.set_caption("Mean values of fields")
)
h_index | journal_rating | total_docs_3years | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
Q | Q1 | Q2 | Q3 | Q4 | Q1 | Q2 | Q3 | Q4 | Q1 | Q2 | Q3 | Q4 |
field | ||||||||||||
Medicine (miscellaneous) | 138.403599 | 80.525822 | 49.620321 | 21.368078 | 1.952355 | 0.664251 | 0.375511 | 0.157072 | 850.737789 | 573.298122 | 381.534759 | 283.853420 |
Sociology and Political Science | 84.091371 | 38.340206 | 20.431034 | 11.320000 | 1.497914 | 0.387918 | 0.193000 | 0.117420 | 203.984772 | 143.391753 | 106.000000 | 75.180000 |
Mechanical Engineering | 128.877193 | 60.908046 | 29.718750 | 17.382353 | 1.562754 | 0.529828 | 0.290672 | 0.141176 | 1171.219298 | 559.816092 | 387.484375 | 410.220588 |
Geography, Planning and Development | 76.653061 | 38.674419 | 22.093333 | 12.635135 | 1.229337 | 0.454767 | 0.242347 | 0.128973 | 281.775510 | 120.046512 | 87.600000 | 63.054054 |
Electrical and Electronic Engineering | 144.250000 | 78.830986 | 36.816667 | 19.000000 | 1.906833 | 0.556042 | 0.295233 | 0.145561 | 1155.656250 | 1001.690141 | 454.650000 | 320.575758 |
History | 35.720721 | 16.350649 | 10.095238 | 7.126984 | 0.477171 | 0.159545 | 0.114698 | 0.101079 | 112.414414 | 88.675325 | 69.015873 | 46.412698 |
Ecology, Evolution, Behavior and Systematics | 124.878788 | 63.684783 | 40.231707 | 23.871795 | 1.492626 | 0.575076 | 0.357134 | 0.184308 | 480.070707 | 210.891304 | 152.597561 | 76.692308 |
Economics and Econometrics | 113.039370 | 53.011494 | 29.229167 | 12.857143 | 3.441748 | 0.702667 | 0.331229 | 0.157029 | 327.795276 | 207.195402 | 130.604167 | 140.171429 |
Condensed Matter Physics | 147.466667 | 93.075949 | 55.657534 | 27.543860 | 1.747840 | 0.582481 | 0.351151 | 0.177912 | 1605.626667 | 1041.101266 | 535.136986 | 514.263158 |
Education | 79.598684 | 48.287879 | 28.064516 | 19.040000 | 1.390322 | 0.531803 | 0.292581 | 0.147240 | 246.184211 | 210.606061 | 151.548387 | 81.280000 |
Psychiatry and Mental Health | 130.978947 | 70.657895 | 39.466667 | 19.105263 | 1.728611 | 0.767684 | 0.393933 | 0.173316 | 498.705263 | 255.776316 | 173.266667 | 144.526316 |
Chemistry (miscellaneous) | 214.175439 | 80.064103 | 40.144928 | 18.979167 | 2.458158 | 0.513449 | 0.269188 | 0.129687 | 2317.754386 | 935.679487 | 393.000000 | 311.312500 |
Materials Science (miscellaneous) | 158.161290 | 84.777778 | 38.533333 | 15.384615 | 2.168565 | 0.602625 | 0.296500 | 0.130538 | 1294.870968 | 810.875000 | 420.283333 | 345.923077 |
Public Health, Environmental and Occupational Health | 120.873239 | 69.753623 | 41.919355 | 20.324324 | 1.721803 | 0.656087 | 0.375726 | 0.170622 | 744.901408 | 322.884058 | 291.112903 | 171.324324 |
Mechanics of Materials | 135.750000 | 67.711538 | 34.807692 | 20.888889 | 1.629092 | 0.560462 | 0.314923 | 0.167733 | 1215.657895 | 567.288462 | 375.269231 | 516.933333 |
def gini(s):
vc = s.value_counts(normalize=True)
diffs = np.abs(vc.values.reshape(-1, 1) - vc.values.reshape(1, -1))
return diffs.sum() / (2 * vc.shape[0] ** 2 * vc.mean())
def top5(s):
return s.value_counts(normalize=True).head(5).sum()
Concentration metrics by fields¶
cat_base.groupby(["field", "Q"])[["country", "publisher"]].agg([gini, top5]).pipe(draw_table).set_caption("Concentration metrics by fields")
country | publisher | |||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
gini | top5 | gini | top5 | |||||||||||||
Q | Q1 | Q2 | Q3 | Q4 | Q1 | Q2 | Q3 | Q4 | Q1 | Q2 | Q3 | Q4 | Q1 | Q2 | Q3 | Q4 |
field | ||||||||||||||||
Medicine (miscellaneous) | 0.784429 | 0.771248 | 0.722648 | 0.634130 | 0.899743 | 0.821596 | 0.681818 | 0.472313 | 0.542272 | 0.549157 | 0.409589 | 0.192732 | 0.246787 | 0.215962 | 0.227273 | 0.114007 |
Sociology and Political Science | 0.690355 | 0.678538 | 0.551724 | 0.458000 | 0.989848 | 0.907216 | 0.689655 | 0.600000 | 0.593788 | 0.432317 | 0.294430 | 0.108182 | 0.482234 | 0.412371 | 0.379310 | 0.220000 |
Mechanical Engineering | 0.558897 | 0.647783 | 0.595703 | 0.562092 | 0.973684 | 0.873563 | 0.796875 | 0.750000 | 0.543344 | 0.413282 | 0.198342 | 0.079696 | 0.535088 | 0.425287 | 0.234375 | 0.147059 |
Geography, Planning and Development | 0.648980 | 0.729875 | 0.589444 | 0.536383 | 1.000000 | 0.895349 | 0.706667 | 0.635135 | 0.554731 | 0.468520 | 0.294422 | 0.088745 | 0.642857 | 0.534884 | 0.306667 | 0.162162 |
Electrical and Electronic Engineering | 0.489583 | 0.600939 | 0.557143 | 0.586453 | 1.000000 | 0.943662 | 0.816667 | 0.772727 | 0.709491 | 0.425822 | 0.338542 | 0.069548 | 0.833333 | 0.464789 | 0.383333 | 0.151515 |
History | 0.674389 | 0.614719 | 0.502924 | 0.507937 | 0.981982 | 0.805195 | 0.666667 | 0.730159 | 0.530346 | 0.347165 | 0.199546 | 0.112554 | 0.531532 | 0.324675 | 0.285714 | 0.174603 |
Ecology, Evolution, Behavior and Systematics | 0.664830 | 0.631884 | 0.560976 | 0.495192 | 0.939394 | 0.858696 | 0.682927 | 0.717949 | 0.528313 | 0.381100 | 0.232707 | 0.024966 | 0.545455 | 0.336957 | 0.256098 | 0.153846 |
Economics and Econometrics | 0.301181 | 0.535304 | 0.607639 | 0.386555 | 1.000000 | 0.977011 | 0.854167 | 0.600000 | 0.526772 | 0.466749 | 0.314655 | 0.053680 | 0.543307 | 0.540230 | 0.416667 | 0.200000 |
Condensed Matter Physics | 0.586667 | 0.526899 | 0.610212 | 0.491228 | 0.933333 | 0.962025 | 0.904110 | 0.877193 | 0.482581 | 0.451008 | 0.416770 | 0.202951 | 0.546667 | 0.481013 | 0.452055 | 0.280702 |
Education | 0.515789 | 0.647727 | 0.562212 | 0.445000 | 1.000000 | 0.954545 | 0.935484 | 0.880000 | 0.603659 | 0.409091 | 0.256598 | 0.148571 | 0.559211 | 0.454545 | 0.451613 | 0.360000 |
Psychiatry and Mental Health | 0.685167 | 0.678947 | 0.614286 | 0.386997 | 0.915789 | 0.934211 | 0.816667 | 0.578947 | 0.366819 | 0.411483 | 0.314530 | 0.136513 | 0.315789 | 0.434211 | 0.400000 | 0.289474 |
Chemistry (miscellaneous) | 0.491228 | 0.477564 | 0.516371 | 0.455729 | 0.964912 | 0.923077 | 0.608696 | 0.687500 | 0.507849 | 0.459135 | 0.315217 | 0.020390 | 0.666667 | 0.487179 | 0.362319 | 0.125000 |
Materials Science (miscellaneous) | 0.510753 | 0.603535 | 0.573333 | 0.464744 | 0.983871 | 0.916667 | 0.800000 | 0.826923 | 0.539589 | 0.395623 | 0.265079 | 0.107023 | 0.677419 | 0.416667 | 0.333333 | 0.211538 |
Public Health, Environmental and Occupational Health | 0.632238 | 0.668116 | 0.585253 | 0.378378 | 0.943662 | 0.927536 | 0.806452 | 0.594595 | 0.343357 | 0.357764 | 0.261815 | 0.026276 | 0.323944 | 0.347826 | 0.306452 | 0.162162 |
Mechanics of Materials | 0.548872 | 0.569930 | 0.582418 | 0.464815 | 0.973684 | 0.846154 | 0.826923 | 0.777778 | 0.505639 | 0.366864 | 0.267094 | 0.061905 | 0.578947 | 0.461538 | 0.365385 | 0.177778 |