Field Analysis

Field Analysis

Latest stats are from 2021, either because scimago stopped collecting data, or newer data is deemed unreliable regarding citations

import numpy as np
import pandas as pd
yearly_df = pd.read_csv("journal_record.csv")
df = pd.read_csv("journal.csv").merge(
    yearly_df.loc[lambda _df: _df["year"] == 2021],
    left_on="sourceid",
    right_on="journal__sourceid",
)
area_df = pd.read_csv("journal_area.csv")
def get_cats(df):
    _cs = df["categories"].str.split("; ")
    idk = "sourceid"
    _df = pd.DataFrame(
        {"catbase": _cs.sum(), idk: np.repeat(df[idk].values, _cs.str.len())}
    )
    return pd.concat([_df, _df["catbase"].str.extract("(.*) \((.*)\)")], axis=1).assign(
        field=lambda df: np.where(df.loc[:, 0].isna(), df["catbase"], df.loc[:, 0])
    ).rename(columns={1: "Q"}).loc[:, ["field", "Q", idk]]
cat_base = get_cats(df.head(10000)).merge(df)
field_pivot = (
    cat_base.fillna("no Q")
    .pivot_table(index="field", columns="Q", values="sourceid", aggfunc="count")
    .fillna(0)
    .assign(s=lambda df: df.sum(axis=1))
    .sort_values("s", ascending=False)
    .loc[:, lambda df: df.columns.str.startswith("Q")]
)
field_pivot.head(15).style.background_gradient(axis=1).set_caption(
    "Count values by fields"
)
Count values by fields
Q Q1 Q2 Q3 Q4
field        
Medicine (miscellaneous) 389.000000 426.000000 374.000000 307.000000
Sociology and Political Science 197.000000 97.000000 58.000000 50.000000
Mechanical Engineering 114.000000 87.000000 64.000000 68.000000
Geography, Planning and Development 98.000000 86.000000 75.000000 74.000000
Electrical and Electronic Engineering 96.000000 71.000000 60.000000 66.000000
History 111.000000 77.000000 63.000000 63.000000
Ecology, Evolution, Behavior and Systematics 99.000000 92.000000 82.000000 39.000000
Economics and Econometrics 127.000000 87.000000 48.000000 35.000000
Condensed Matter Physics 75.000000 79.000000 73.000000 57.000000
Education 152.000000 66.000000 31.000000 25.000000
Psychiatry and Mental Health 95.000000 76.000000 60.000000 38.000000
Chemistry (miscellaneous) 57.000000 78.000000 69.000000 48.000000
Materials Science (miscellaneous) 62.000000 72.000000 60.000000 52.000000
Public Health, Environmental and Occupational Health 71.000000 69.000000 62.000000 37.000000
Mechanics of Materials 76.000000 52.000000 52.000000 45.000000
def draw_table(df):
    return (
        df.pivot_table(index="field", columns="Q")
        .loc[field_pivot.head(15).index]
        .loc[:, lambda df: df.isna().mean() < 0.5]
        .style.background_gradient(axis=0)
    )
(
    cat_base.groupby(["field", "Q"])[["h_index", "journal_rating", "total_docs_3years"]]
    .mean()
    .pipe(draw_table)
    .set_caption("Mean values of fields")
)
Mean values of fields
  h_index journal_rating total_docs_3years
Q Q1 Q2 Q3 Q4 Q1 Q2 Q3 Q4 Q1 Q2 Q3 Q4
field                        
Medicine (miscellaneous) 138.403599 80.525822 49.620321 21.368078 1.952355 0.664251 0.375511 0.157072 850.737789 573.298122 381.534759 283.853420
Sociology and Political Science 84.091371 38.340206 20.431034 11.320000 1.497914 0.387918 0.193000 0.117420 203.984772 143.391753 106.000000 75.180000
Mechanical Engineering 128.877193 60.908046 29.718750 17.382353 1.562754 0.529828 0.290672 0.141176 1171.219298 559.816092 387.484375 410.220588
Geography, Planning and Development 76.653061 38.674419 22.093333 12.635135 1.229337 0.454767 0.242347 0.128973 281.775510 120.046512 87.600000 63.054054
Electrical and Electronic Engineering 144.250000 78.830986 36.816667 19.000000 1.906833 0.556042 0.295233 0.145561 1155.656250 1001.690141 454.650000 320.575758
History 35.720721 16.350649 10.095238 7.126984 0.477171 0.159545 0.114698 0.101079 112.414414 88.675325 69.015873 46.412698
Ecology, Evolution, Behavior and Systematics 124.878788 63.684783 40.231707 23.871795 1.492626 0.575076 0.357134 0.184308 480.070707 210.891304 152.597561 76.692308
Economics and Econometrics 113.039370 53.011494 29.229167 12.857143 3.441748 0.702667 0.331229 0.157029 327.795276 207.195402 130.604167 140.171429
Condensed Matter Physics 147.466667 93.075949 55.657534 27.543860 1.747840 0.582481 0.351151 0.177912 1605.626667 1041.101266 535.136986 514.263158
Education 79.598684 48.287879 28.064516 19.040000 1.390322 0.531803 0.292581 0.147240 246.184211 210.606061 151.548387 81.280000
Psychiatry and Mental Health 130.978947 70.657895 39.466667 19.105263 1.728611 0.767684 0.393933 0.173316 498.705263 255.776316 173.266667 144.526316
Chemistry (miscellaneous) 214.175439 80.064103 40.144928 18.979167 2.458158 0.513449 0.269188 0.129687 2317.754386 935.679487 393.000000 311.312500
Materials Science (miscellaneous) 158.161290 84.777778 38.533333 15.384615 2.168565 0.602625 0.296500 0.130538 1294.870968 810.875000 420.283333 345.923077
Public Health, Environmental and Occupational Health 120.873239 69.753623 41.919355 20.324324 1.721803 0.656087 0.375726 0.170622 744.901408 322.884058 291.112903 171.324324
Mechanics of Materials 135.750000 67.711538 34.807692 20.888889 1.629092 0.560462 0.314923 0.167733 1215.657895 567.288462 375.269231 516.933333
def gini(s):
    vc = s.value_counts(normalize=True)
    diffs = np.abs(vc.values.reshape(-1, 1) - vc.values.reshape(1, -1))
    return diffs.sum() / (2 * vc.shape[0] ** 2 * vc.mean())

def top5(s):
    return s.value_counts(normalize=True).head(5).sum()

Concentration metrics by fields

cat_base.groupby(["field", "Q"])[["country", "publisher"]].agg([gini, top5]).pipe(draw_table).set_caption("Concentration metrics by fields")
Concentration metrics by fields
  country publisher
  gini top5 gini top5
Q Q1 Q2 Q3 Q4 Q1 Q2 Q3 Q4 Q1 Q2 Q3 Q4 Q1 Q2 Q3 Q4
field                                
Medicine (miscellaneous) 0.784429 0.771248 0.722648 0.634130 0.899743 0.821596 0.681818 0.472313 0.542272 0.549157 0.409589 0.192732 0.246787 0.215962 0.227273 0.114007
Sociology and Political Science 0.690355 0.678538 0.551724 0.458000 0.989848 0.907216 0.689655 0.600000 0.593788 0.432317 0.294430 0.108182 0.482234 0.412371 0.379310 0.220000
Mechanical Engineering 0.558897 0.647783 0.595703 0.562092 0.973684 0.873563 0.796875 0.750000 0.543344 0.413282 0.198342 0.079696 0.535088 0.425287 0.234375 0.147059
Geography, Planning and Development 0.648980 0.729875 0.589444 0.536383 1.000000 0.895349 0.706667 0.635135 0.554731 0.468520 0.294422 0.088745 0.642857 0.534884 0.306667 0.162162
Electrical and Electronic Engineering 0.489583 0.600939 0.557143 0.586453 1.000000 0.943662 0.816667 0.772727 0.709491 0.425822 0.338542 0.069548 0.833333 0.464789 0.383333 0.151515
History 0.674389 0.614719 0.502924 0.507937 0.981982 0.805195 0.666667 0.730159 0.530346 0.347165 0.199546 0.112554 0.531532 0.324675 0.285714 0.174603
Ecology, Evolution, Behavior and Systematics 0.664830 0.631884 0.560976 0.495192 0.939394 0.858696 0.682927 0.717949 0.528313 0.381100 0.232707 0.024966 0.545455 0.336957 0.256098 0.153846
Economics and Econometrics 0.301181 0.535304 0.607639 0.386555 1.000000 0.977011 0.854167 0.600000 0.526772 0.466749 0.314655 0.053680 0.543307 0.540230 0.416667 0.200000
Condensed Matter Physics 0.586667 0.526899 0.610212 0.491228 0.933333 0.962025 0.904110 0.877193 0.482581 0.451008 0.416770 0.202951 0.546667 0.481013 0.452055 0.280702
Education 0.515789 0.647727 0.562212 0.445000 1.000000 0.954545 0.935484 0.880000 0.603659 0.409091 0.256598 0.148571 0.559211 0.454545 0.451613 0.360000
Psychiatry and Mental Health 0.685167 0.678947 0.614286 0.386997 0.915789 0.934211 0.816667 0.578947 0.366819 0.411483 0.314530 0.136513 0.315789 0.434211 0.400000 0.289474
Chemistry (miscellaneous) 0.491228 0.477564 0.516371 0.455729 0.964912 0.923077 0.608696 0.687500 0.507849 0.459135 0.315217 0.020390 0.666667 0.487179 0.362319 0.125000
Materials Science (miscellaneous) 0.510753 0.603535 0.573333 0.464744 0.983871 0.916667 0.800000 0.826923 0.539589 0.395623 0.265079 0.107023 0.677419 0.416667 0.333333 0.211538
Public Health, Environmental and Occupational Health 0.632238 0.668116 0.585253 0.378378 0.943662 0.927536 0.806452 0.594595 0.343357 0.357764 0.261815 0.026276 0.323944 0.347826 0.306452 0.162162
Mechanics of Materials 0.548872 0.569930 0.582418 0.464815 0.973684 0.846154 0.826923 0.777778 0.505639 0.366864 0.267094 0.061905 0.578947 0.461538 0.365385 0.177778