Explore Waves and Conditions

Explore Waves and Conditions

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams["figure.figsize"] = (14, 7)
df = pd.read_csv("covid_victim.csv").assign(is_70_plus=lambda df: df["age"] >= 70)
cond_cols = df.columns[df.columns.str.startswith("condition")]
mean_cols = [
    "positive_rate",
    "total_vaccinations",
    "people_vaccinated",
    "people_fully_vaccinated",
    "total_boosters",
]
sum_cols = [*cond_cols, "is_70_plus", "is_male"]
aggs = {
    **{c: "sum" for c in sum_cols},
    **{c: "mean" for c in mean_cols},
    "count": "sum",
}
MIN_ASC = 30


def get_wave_n(df):
    wave_ascents = df.groupby("stage")["climb"].mean().loc[lambda s: s > 40].index
    _dic = {a: i + 1 for i, a in enumerate(wave_ascents)}
    wave_dic = {**_dic, **{k + 1: v for k, v in _dic.items()}}
    subseq_waves = pd.Series([wave_dic.get(e, 0) for e in df["stage"]], index=df.index)
    return np.where(
        df["climb"].abs().rolling(4).mean() > (MIN_ASC / 2), subseq_waves, 0
    ).astype(str)


weekly_wave_df = (
    df.assign(count=1, pdate=pd.to_datetime(df["estimated_date"]))
    .groupby("pdate")
    .agg(aggs)
    .resample("1W")
    .agg(aggs)
    .assign(
        climb=lambda df: df["count"].diff(),
        stage=lambda df: (df["climb"] > MIN_ASC)
        .diff()
        .pipe(lambda s: s & (s != s.shift(1)) & (s != s.shift(2)))
        .fillna(0)
        .astype(int)
        .cumsum(),
        wave=get_wave_n,
        wave_stage=lambda df: np.where(
            df.groupby("stage")["climb"].transform("mean") > 0, "ascent", "descent"
        ),
    )
    .loc[lambda df: df["wave"] != "0"]
)

ax = weekly_wave_df.reset_index().pipe(
    lambda df: sns.lineplot(
        x="pdate", y="count", hue="wave", style="wave_stage", data=df
    )
)
plt.title("Number of deaths caused by COVID in Hungary")
plt.xlabel("Date")
plt.xticks(rotation = 35)
plt.ylabel("Number of deaths")
plt.show()
../../_images/intro_6_0.png
changes = (
    weekly_wave_df.loc[:, ["count", *sum_cols]]
    .pipe(lambda df: 2 * df.diff() / (df + df.shift()))
    .fillna(0)
    .pipe(lambda df: df - df[["count"]].values)
    .drop("count", axis=1)
)
(
    pd.concat([changes, weekly_wave_df[["wave", "wave_stage"]]], axis=1)
    .pivot_table(index=["wave", "wave_stage"], values=sum_cols)
    .rename(columns=lambda s: s.replace("condition__", ""))
    .style.background_gradient(
        axis=None,
        vmin=-0.1,
        vmax=0.1,
        cmap=sns.diverging_palette(
            220,
            20,
            as_cmap=True,
        ),
    )
    .set_caption("Growth of conditions compared to the growth of deceased people (%)")
)
Growth of conditions compared to the growth of deceased people (%)
    blood_pressure diabetes heart lungs obesity is_70_plus is_male
wave wave_stage              
1 ascent -0.002730 0.004524 0.037673 0.000211 -0.009186 0.004350 -0.008984
descent 0.006674 0.000927 0.019730 0.018787 0.040337 -0.000514 -0.004357
2 ascent 0.000394 0.002414 -0.049256 -0.053962 0.119785 -0.021713 0.011351
descent -0.006851 0.024578 -0.024435 0.051909 -0.025324 -0.001743 0.007955
3 ascent -0.010319 -0.055158 0.024522 -0.010020 -0.055266 0.012735 -0.016413
descent 0.007315 0.001789 0.025949 -0.000337 -0.048794 0.017995 -0.005324
4 ascent 0.007603 -0.066693 0.033743 0.000125 -0.337224 0.045969 0.035116
ax = weekly_wave_df.loc[
    :,
    [
        "count",
        "condition__obesity",
        "condition__blood_pressure",
        "people_fully_vaccinated",
    ],
].pipe(lambda df: df / df.max()).melt(ignore_index=False).reset_index().pipe(
    lambda df: sns.lineplot(x="pdate", y="value", hue="variable", data=df)
)

plt.title("Selected variables weekly values compared to their peak values")
plt.xlabel("Date")
plt.xticks(rotation = 35)
plt.ylabel("%", rotation = 180)
plt.show()
../../_images/intro_9_0.png
condition_counts = (
    pd.DataFrame(df["raw_conditions"].str.split(", ").tolist())
    .unstack()
    .dropna()
    .value_counts()
)
df_contition_counts = pd.DataFrame(condition_counts.head(20), columns = ['Count'])
df_contition_counts.index.name= 'Conditions'
df_contition_counts.style.set_caption("Top 20 conditions recorded for the deceased")
Top 20 conditions recorded for the deceased
  Count
Conditions  
magasvérnyomás-betegség 20589
cukorbetegség 12810
magas vérnyomás 8889
iszkémiás szívbetegség 4860
szívbetegség 3614
demencia 3022
daganatos megbetegedés 2798
krónikus veseelégtelenség 2299
szívelégtelenség 2224
veseelégtelenség 1952
nem ismert alapbetegség 1950
szívritmuszavar 1890
tüdőbetegség 1874
érelmeszesedés 1478
pitvarfibrilláció 1438
vérszegénység 1406
kóros elhízás 1392
krónikus obstruktív tüdőbetegség 1379
elhízás 1124
asztma 1110