久しぶりにそれなりの量のスクリプトを書いた。set_major_formatterやxticksを使えばx軸の数字をいじることが出来る。
以下のプログラムの患者IDなどの情報はランダムで現実のものではない。(データは本物だけれど匿名化されている)
from pathlib import Path import pandas as pd import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt import matplotlib.patches as mpatches plt.style.use('seaborn-muted') filenames = [j for i in Path("./hypothermia/20XXMMDD_バイタル").iterdir() for j in i.iterdir()] patients_id = [str(i).split('/')[-1][6:].split('.')[0] for i in filenames] record = { "1111111": [pd.to_datetime(i) for i in ["20XX/MM/DD hh:mm", "20XX/MM/DD hh:mm", "20XX/MM/DD hh:mm"]],....} def parse_time(s: str): s = s.replace("\n", "").split(',') date = "" ret = [] for i in s[1:-1]: if '/' in i: date = i.split(" ")[0] ret.append(i) else: ret.append(date + " " + i) return ret def parse_therm(s: str): s = s.replace("\n", "").split(',') ret = [] for i in s[1:-1]: if i == "": ret.append(np.nan) elif 25 < float(i) < 45: ret.append(float(i)) else: ret.append(np.nan) return ret data = {} for fname, pid in zip(filenames, patients_id): with open(str(fname)) as f: is_time = False is_therm = False for i in f.readlines(): if i[:2] == "時刻": time = pd.to_datetime(parse_time(i)) is_time = True elif i[:2] == "T1": if sum([0 if np.isnan(i) else i for i in parse_therm(i)]): therm = parse_therm(i) is_therm = True if is_time and is_therm: data[pid] = [time, therm] for pid in patients_id: if pid not in record.keys(): continue id_valid = np.where((record[pid][0] < data[pid][0]) & (data[pid][0] < record[pid][2]))[0] data[pid][0] = [data[pid][0][i] for i in id_valid] data[pid][1] = [data[pid][1][i] for i in id_valid] data[pid][0] = [(i - record[pid][1])/np.timedelta64(1, 'D') for i in data[pid][0]] for k,v in data.items(): if k not in record.keys(): continue plt.plot(v[0], v[1], color='orange', linewidth=1.0) plt.grid(linestyle='--', linewidth=0.7) plt.xlabel('hours', position=(1.05,0)) plt.ylabel('℃', position=(0, 1), rotation=0) filenames = [i for i in Path("./hypothermia/バイタル2/").iterdir()] patients_id = ["2222222",....] data = dict() for p in patients_id: data[p] = [] for f in filenames: if p in str(f): # データ読み込み df = pd.read_csv(str(f), index_col=0) data[p].append(df.loc['T1']) start_time = pd.to_datetime(["20XX/MM/DD hh:mm", "20XX/MM/DD hh:mm", "20XX/MM/DD hh:mm"]) start_time = dict(zip(patients_id, start_time)) for p in patients_id: # 複数ファイル分のデータを連結する data[p] = pd.concat(data[p]) data[p] = data[p][~data[p].index.str.contains('^Unnamed') & ~data[p].index.duplicated()] data[p].index = pd.to_datetime(data[p].index) # 一応NaNの数を数えておく print(p, data[p].isnull().sum()) # 異常値を削除してデータを日付順に並べ替える data[p] = data[p][(25 < data[p]) & (data[p] < 45)].sort_index(ascending=True) # data[p] = data[p].sort_index(ascending=True) # 開始日付を揃えて日表示にする(正規化) print(data[p].index[0]) print(start_time[p]) data[p].index = (data[p].index - start_time[p]) / np.timedelta64(1, 'D') for i, p in enumerate(patients_id): plt.plot(data[p].index, data[p].values, color='purple', linewidth=1.2) patch0 = mpatches.Patch(color="purple", label="new") patch1 = mpatches.Patch(color="orange", label="prev") # formatter = mpl.ticker.FuncFormatter(lambda x, pos: str(x) + " days") # plt.set_major_formatter(formatter) plt.xticks([-2,-1,0,1,2], [-48,-24,0,24,48]) plt.legend(handles=[patch0, patch1], loc="lower right") plt.savefig('diff.svg')