tak0kadaの何でもノート

発声練習、生存確認用。

医学関連は 医学ノート

低体温療法のデータいじり(その1)

久しぶりにそれなりの量のスクリプトを書いた。set_major_formatterやxticksを使えばx軸の数字をいじることが出来る。

以下のプログラムの患者IDなどの情報はランダムで現実のものではない。(データは本物だけれど匿名化されている)

from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
plt.style.use('seaborn-muted')

filenames = [j for i in Path("./hypothermia/20XXMMDD_バイタル").iterdir() for j in i.iterdir()]
patients_id = [str(i).split('/')[-1][6:].split('.')[0] for i in filenames]

record = {
    "1111111": [pd.to_datetime(i) for i in ["20XX/MM/DD hh:mm", "20XX/MM/DD hh:mm", "20XX/MM/DD hh:mm"]],....}

def parse_time(s: str):
    s = s.replace("\n", "").split(',')
    date = ""
    ret = []
    for i in s[1:-1]:
        if '/' in i:
            date = i.split(" ")[0]
            ret.append(i)
        else:
            ret.append(date + " " + i)
    return ret

def parse_therm(s: str):
    s = s.replace("\n", "").split(',')
    ret = []
    for i in s[1:-1]:
        if i == "":
            ret.append(np.nan)
        elif 25 < float(i) < 45:
            ret.append(float(i))
        else:
            ret.append(np.nan)
    return ret

data = {}
for fname, pid in zip(filenames, patients_id):
    with open(str(fname)) as f:
        is_time = False
        is_therm = False
        for i in f.readlines():
            if i[:2] == "時刻":
                time = pd.to_datetime(parse_time(i))
                is_time = True
            elif i[:2] == "T1":
                if sum([0 if np.isnan(i) else i for i in parse_therm(i)]):
                    therm = parse_therm(i)
                    is_therm = True
        if is_time and is_therm:
            data[pid] = [time, therm]

for pid in patients_id:
    if pid not in record.keys():
        continue
    id_valid = np.where((record[pid][0] < data[pid][0]) & (data[pid][0] < record[pid][2]))[0]
    data[pid][0] = [data[pid][0][i] for i in id_valid]
    data[pid][1] = [data[pid][1][i] for i in id_valid]
    data[pid][0] = [(i - record[pid][1])/np.timedelta64(1, 'D') for i in data[pid][0]]

for k,v in data.items():
    if k not in record.keys():
        continue
    plt.plot(v[0], v[1], color='orange', linewidth=1.0)

plt.grid(linestyle='--', linewidth=0.7)
plt.xlabel('hours', position=(1.05,0))
plt.ylabel('℃', position=(0, 1), rotation=0)

filenames = [i for i in Path("./hypothermia/バイタル2/").iterdir()]
patients_id = ["2222222",....]

data = dict()
for p in patients_id:
    data[p] = []
    for f in filenames:
        if p in str(f):
            # データ読み込み
            df = pd.read_csv(str(f), index_col=0)
            data[p].append(df.loc['T1'])

start_time = pd.to_datetime(["20XX/MM/DD hh:mm", "20XX/MM/DD hh:mm", "20XX/MM/DD hh:mm"])
start_time = dict(zip(patients_id, start_time))

for p in patients_id:
    # 複数ファイル分のデータを連結する
    data[p] = pd.concat(data[p])
    data[p] = data[p][~data[p].index.str.contains('^Unnamed') & ~data[p].index.duplicated()]
    data[p].index = pd.to_datetime(data[p].index)
    # 一応NaNの数を数えておく
    print(p, data[p].isnull().sum())
    # 異常値を削除してデータを日付順に並べ替える
    data[p] = data[p][(25 < data[p]) & (data[p] < 45)].sort_index(ascending=True)
    # data[p] = data[p].sort_index(ascending=True)
    # 開始日付を揃えて日表示にする(正規化)
    print(data[p].index[0])
    print(start_time[p])
    data[p].index = (data[p].index - start_time[p]) / np.timedelta64(1, 'D')

for i, p in enumerate(patients_id):
    plt.plot(data[p].index, data[p].values, color='purple', linewidth=1.2)

patch0 = mpatches.Patch(color="purple", label="new")
patch1 = mpatches.Patch(color="orange", label="prev")

# formatter = mpl.ticker.FuncFormatter(lambda x, pos: str(x) + " days")
# plt.set_major_formatter(formatter)
plt.xticks([-2,-1,0,1,2], [-48,-24,0,24,48])

plt.legend(handles=[patch0, patch1], loc="lower right")
plt.savefig('diff.svg')