{ "cells": [ { "cell_type": "markdown", "id": "greater-sweden", "metadata": {}, "source": [ "# Field Analysis\n", "\n", "> Latest stats are from 2021, either because scimago stopped collecting data, or newer data is deemed unreliable regarding citations" ] }, { "cell_type": "code", "execution_count": 1, "id": "weighted-boost", "metadata": { "execution": { "iopub.execute_input": "2023-05-04T15:14:00.320848Z", "iopub.status.busy": "2023-05-04T15:14:00.320394Z", "iopub.status.idle": "2023-05-04T15:14:00.632904Z", "shell.execute_reply": "2023-05-04T15:14:00.632111Z" } }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "id": "0d40cb43", "metadata": { "execution": { "iopub.execute_input": "2023-05-04T15:14:00.637302Z", "iopub.status.busy": "2023-05-04T15:14:00.636755Z", "iopub.status.idle": "2023-05-04T15:14:01.634340Z", "shell.execute_reply": "2023-05-04T15:14:01.633515Z" } }, "outputs": [], "source": [ "yearly_df = pd.read_csv(\"journal_record.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "liberal-preserve", "metadata": { "execution": { "iopub.execute_input": "2023-05-04T15:14:01.638256Z", "iopub.status.busy": "2023-05-04T15:14:01.637805Z", "iopub.status.idle": "2023-05-04T15:14:01.821876Z", "shell.execute_reply": "2023-05-04T15:14:01.821107Z" } }, "outputs": [], "source": [ "df = pd.read_csv(\"journal.csv\").merge(\n", " yearly_df.loc[lambda _df: _df[\"year\"] == 2021],\n", " left_on=\"sourceid\",\n", " right_on=\"journal__sourceid\",\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "id": "5d60cd30", "metadata": { "execution": { "iopub.execute_input": "2023-05-04T15:14:01.825484Z", "iopub.status.busy": "2023-05-04T15:14:01.824964Z", "iopub.status.idle": "2023-05-04T15:14:01.860085Z", "shell.execute_reply": "2023-05-04T15:14:01.859290Z" } }, "outputs": [], "source": [ "area_df = pd.read_csv(\"journal_area.csv\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "1d109d45", "metadata": { "execution": { "iopub.execute_input": "2023-05-04T15:14:01.864076Z", "iopub.status.busy": "2023-05-04T15:14:01.863572Z", "iopub.status.idle": "2023-05-04T15:14:01.869017Z", "shell.execute_reply": "2023-05-04T15:14:01.868338Z" } }, "outputs": [], "source": [ "def get_cats(df):\n", " _cs = df[\"categories\"].str.split(\"; \")\n", " idk = \"sourceid\"\n", " _df = pd.DataFrame(\n", " {\"catbase\": _cs.sum(), idk: np.repeat(df[idk].values, _cs.str.len())}\n", " )\n", " return pd.concat([_df, _df[\"catbase\"].str.extract(\"(.*) \\((.*)\\)\")], axis=1).assign(\n", " field=lambda df: np.where(df.loc[:, 0].isna(), df[\"catbase\"], df.loc[:, 0])\n", " ).rename(columns={1: \"Q\"}).loc[:, [\"field\", \"Q\", idk]]" ] }, { "cell_type": "code", "execution_count": 6, "id": "f7b3db68", "metadata": { "execution": { "iopub.execute_input": "2023-05-04T15:14:01.872166Z", "iopub.status.busy": "2023-05-04T15:14:01.871637Z", "iopub.status.idle": "2023-05-04T15:14:02.481369Z", "shell.execute_reply": "2023-05-04T15:14:02.480504Z" } }, "outputs": [], "source": [ "cat_base = get_cats(df.head(10000)).merge(df)" ] }, { "cell_type": "code", "execution_count": 7, "id": "3c5214d6", "metadata": { "execution": { "iopub.execute_input": "2023-05-04T15:14:02.485066Z", "iopub.status.busy": "2023-05-04T15:14:02.484549Z", "iopub.status.idle": "2023-05-04T15:14:02.776925Z", "shell.execute_reply": "2023-05-04T15:14:02.776085Z" }, "scrolled": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Count values by fields
QQ1Q2Q3Q4
field    
Medicine (miscellaneous)389.000000426.000000374.000000307.000000
Sociology and Political Science197.00000097.00000058.00000050.000000
Mechanical Engineering114.00000087.00000064.00000068.000000
Geography, Planning and Development98.00000086.00000075.00000074.000000
Electrical and Electronic Engineering96.00000071.00000060.00000066.000000
History111.00000077.00000063.00000063.000000
Ecology, Evolution, Behavior and Systematics99.00000092.00000082.00000039.000000
Economics and Econometrics127.00000087.00000048.00000035.000000
Condensed Matter Physics75.00000079.00000073.00000057.000000
Education152.00000066.00000031.00000025.000000
Psychiatry and Mental Health95.00000076.00000060.00000038.000000
Chemistry (miscellaneous)57.00000078.00000069.00000048.000000
Materials Science (miscellaneous)62.00000072.00000060.00000052.000000
Public Health, Environmental and Occupational Health71.00000069.00000062.00000037.000000
Mechanics of Materials76.00000052.00000052.00000045.000000
\n" ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "field_pivot = (\n", " cat_base.fillna(\"no Q\")\n", " .pivot_table(index=\"field\", columns=\"Q\", values=\"sourceid\", aggfunc=\"count\")\n", " .fillna(0)\n", " .assign(s=lambda df: df.sum(axis=1))\n", " .sort_values(\"s\", ascending=False)\n", " .loc[:, lambda df: df.columns.str.startswith(\"Q\")]\n", ")\n", "field_pivot.head(15).style.background_gradient(axis=1).set_caption(\n", " \"Count values by fields\"\n", ")" ] }, { "cell_type": "code", "execution_count": 8, "id": "7018ed01", "metadata": { "execution": { "iopub.execute_input": "2023-05-04T15:14:02.781905Z", "iopub.status.busy": "2023-05-04T15:14:02.780368Z", "iopub.status.idle": "2023-05-04T15:14:02.787703Z", "shell.execute_reply": "2023-05-04T15:14:02.787007Z" } }, "outputs": [], "source": [ "def draw_table(df):\n", " return (\n", " df.pivot_table(index=\"field\", columns=\"Q\")\n", " .loc[field_pivot.head(15).index]\n", " .loc[:, lambda df: df.isna().mean() < 0.5]\n", " .style.background_gradient(axis=0)\n", " )" ] }, { "cell_type": "code", "execution_count": 9, "id": "09ba766d", "metadata": { "execution": { "iopub.execute_input": "2023-05-04T15:14:02.791219Z", "iopub.status.busy": "2023-05-04T15:14:02.790666Z", "iopub.status.idle": "2023-05-04T15:14:02.845157Z", "shell.execute_reply": "2023-05-04T15:14:02.844409Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Mean values of fields
 h_indexjournal_ratingtotal_docs_3years
QQ1Q2Q3Q4Q1Q2Q3Q4Q1Q2Q3Q4
field            
Medicine (miscellaneous)138.40359980.52582249.62032121.3680781.9523550.6642510.3755110.157072850.737789573.298122381.534759283.853420
Sociology and Political Science84.09137138.34020620.43103411.3200001.4979140.3879180.1930000.117420203.984772143.391753106.00000075.180000
Mechanical Engineering128.87719360.90804629.71875017.3823531.5627540.5298280.2906720.1411761171.219298559.816092387.484375410.220588
Geography, Planning and Development76.65306138.67441922.09333312.6351351.2293370.4547670.2423470.128973281.775510120.04651287.60000063.054054
Electrical and Electronic Engineering144.25000078.83098636.81666719.0000001.9068330.5560420.2952330.1455611155.6562501001.690141454.650000320.575758
History35.72072116.35064910.0952387.1269840.4771710.1595450.1146980.101079112.41441488.67532569.01587346.412698
Ecology, Evolution, Behavior and Systematics124.87878863.68478340.23170723.8717951.4926260.5750760.3571340.184308480.070707210.891304152.59756176.692308
Economics and Econometrics113.03937053.01149429.22916712.8571433.4417480.7026670.3312290.157029327.795276207.195402130.604167140.171429
Condensed Matter Physics147.46666793.07594955.65753427.5438601.7478400.5824810.3511510.1779121605.6266671041.101266535.136986514.263158
Education79.59868448.28787928.06451619.0400001.3903220.5318030.2925810.147240246.184211210.606061151.54838781.280000
Psychiatry and Mental Health130.97894770.65789539.46666719.1052631.7286110.7676840.3939330.173316498.705263255.776316173.266667144.526316
Chemistry (miscellaneous)214.17543980.06410340.14492818.9791672.4581580.5134490.2691880.1296872317.754386935.679487393.000000311.312500
Materials Science (miscellaneous)158.16129084.77777838.53333315.3846152.1685650.6026250.2965000.1305381294.870968810.875000420.283333345.923077
Public Health, Environmental and Occupational Health120.87323969.75362341.91935520.3243241.7218030.6560870.3757260.170622744.901408322.884058291.112903171.324324
Mechanics of Materials135.75000067.71153834.80769220.8888891.6290920.5604620.3149230.1677331215.657895567.288462375.269231516.933333
\n" ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(\n", " cat_base.groupby([\"field\", \"Q\"])[[\"h_index\", \"journal_rating\", \"total_docs_3years\"]]\n", " .mean()\n", " .pipe(draw_table)\n", " .set_caption(\"Mean values of fields\")\n", ")" ] }, { "cell_type": "code", "execution_count": 10, "id": "c68a1238", "metadata": { "execution": { "iopub.execute_input": "2023-05-04T15:14:02.849691Z", "iopub.status.busy": "2023-05-04T15:14:02.848319Z", "iopub.status.idle": "2023-05-04T15:14:02.854361Z", "shell.execute_reply": "2023-05-04T15:14:02.853732Z" } }, "outputs": [], "source": [ "def gini(s):\n", " vc = s.value_counts(normalize=True)\n", " diffs = np.abs(vc.values.reshape(-1, 1) - vc.values.reshape(1, -1))\n", " return diffs.sum() / (2 * vc.shape[0] ** 2 * vc.mean())\n", "\n", "def top5(s):\n", " return s.value_counts(normalize=True).head(5).sum()" ] }, { "cell_type": "markdown", "id": "5dc3cdc7", "metadata": {}, "source": [ "## Concentration metrics by fields" ] }, { "cell_type": "code", "execution_count": 11, "id": "43194a87", "metadata": { "execution": { "iopub.execute_input": "2023-05-04T15:14:02.857649Z", "iopub.status.busy": "2023-05-04T15:14:02.857283Z", "iopub.status.idle": "2023-05-04T15:14:05.425089Z", "shell.execute_reply": "2023-05-04T15:14:05.424277Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Concentration metrics by fields
 countrypublisher
 ginitop5ginitop5
QQ1Q2Q3Q4Q1Q2Q3Q4Q1Q2Q3Q4Q1Q2Q3Q4
field                
Medicine (miscellaneous)0.7844290.7712480.7226480.6341300.8997430.8215960.6818180.4723130.5422720.5491570.4095890.1927320.2467870.2159620.2272730.114007
Sociology and Political Science0.6903550.6785380.5517240.4580000.9898480.9072160.6896550.6000000.5937880.4323170.2944300.1081820.4822340.4123710.3793100.220000
Mechanical Engineering0.5588970.6477830.5957030.5620920.9736840.8735630.7968750.7500000.5433440.4132820.1983420.0796960.5350880.4252870.2343750.147059
Geography, Planning and Development0.6489800.7298750.5894440.5363831.0000000.8953490.7066670.6351350.5547310.4685200.2944220.0887450.6428570.5348840.3066670.162162
Electrical and Electronic Engineering0.4895830.6009390.5571430.5864531.0000000.9436620.8166670.7727270.7094910.4258220.3385420.0695480.8333330.4647890.3833330.151515
History0.6743890.6147190.5029240.5079370.9819820.8051950.6666670.7301590.5303460.3471650.1995460.1125540.5315320.3246750.2857140.174603
Ecology, Evolution, Behavior and Systematics0.6648300.6318840.5609760.4951920.9393940.8586960.6829270.7179490.5283130.3811000.2327070.0249660.5454550.3369570.2560980.153846
Economics and Econometrics0.3011810.5353040.6076390.3865551.0000000.9770110.8541670.6000000.5267720.4667490.3146550.0536800.5433070.5402300.4166670.200000
Condensed Matter Physics0.5866670.5268990.6102120.4912280.9333330.9620250.9041100.8771930.4825810.4510080.4167700.2029510.5466670.4810130.4520550.280702
Education0.5157890.6477270.5622120.4450001.0000000.9545450.9354840.8800000.6036590.4090910.2565980.1485710.5592110.4545450.4516130.360000
Psychiatry and Mental Health0.6851670.6789470.6142860.3869970.9157890.9342110.8166670.5789470.3668190.4114830.3145300.1365130.3157890.4342110.4000000.289474
Chemistry (miscellaneous)0.4912280.4775640.5163710.4557290.9649120.9230770.6086960.6875000.5078490.4591350.3152170.0203900.6666670.4871790.3623190.125000
Materials Science (miscellaneous)0.5107530.6035350.5733330.4647440.9838710.9166670.8000000.8269230.5395890.3956230.2650790.1070230.6774190.4166670.3333330.211538
Public Health, Environmental and Occupational Health0.6322380.6681160.5852530.3783780.9436620.9275360.8064520.5945950.3433570.3577640.2618150.0262760.3239440.3478260.3064520.162162
Mechanics of Materials0.5488720.5699300.5824180.4648150.9736840.8461540.8269230.7777780.5056390.3668640.2670940.0619050.5789470.4615380.3653850.177778
\n" ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cat_base.groupby([\"field\", \"Q\"])[[\"country\", \"publisher\"]].agg([gini, top5]).pipe(draw_table).set_caption(\"Concentration metrics by fields\")" ] } ], "metadata": { "interpreter": { "hash": "64878fe84c8d43d7501c7cf6e04817e42f7fe76450da501dadd076b66cecf21e" }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" } }, "nbformat": 4, "nbformat_minor": 5 }