You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
415 lines
15 KiB
415 lines
15 KiB
# -*- coding: utf-8 -*- |
|
""" |
|
Script for parsing the Fitbit data into graphs. |
|
@author M.F. Dijkhof |
|
""" |
|
# Import stuff |
|
import os |
|
import pandas as pd |
|
import seaborn as sns |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
|
|
# Disable copy overwrite warning |
|
pd.options.mode.chained_assignment = None # default='warn' |
|
|
|
#%% Define filenames and path |
|
|
|
FilenameComp = 'SurgeryAndAdmission2.csv' #Surg and Adm + Complications |
|
FilenamePA = 'PA_Data.csv' |
|
FilenameSteps = 'StepData.csv' |
|
FilenameComplete = 'Complete.csv' |
|
FilenameOutcome = 'Complications.csv' |
|
|
|
Path = 'I:\Mike Dijkhof\Connecare MGP\Data' |
|
|
|
# Set path |
|
os.chdir(Path) |
|
|
|
#%% Create DF from files |
|
|
|
DFComp = pd.DataFrame(pd.read_csv(FilenameComp)) |
|
DFPA = pd.DataFrame(pd.read_csv(FilenamePA)) |
|
DFSteps = pd.DataFrame(pd.read_csv(FilenameSteps)) |
|
DFComplete = pd.DataFrame(pd.read_csv(FilenameComplete)) |
|
DFOutcome = pd.DataFrame(pd.read_csv(FilenameOutcome)) |
|
|
|
DFComp = DFComp.set_index('Study ID') |
|
DFPA = DFPA.set_index('Study ID') |
|
DFSteps = DFSteps.set_index('Study ID') |
|
DFComplete = DFComplete.set_index('Study ID') |
|
DFOutcome = DFOutcome.set_index('Study ID') |
|
|
|
#%% |
|
# Clear all uncomplete cases |
|
CompleteCheck= DFComplete['Has patient completed study?'] == 'Yes' |
|
|
|
DFComp = DFComp[CompleteCheck] |
|
DFPA = DFPA[CompleteCheck] |
|
DFOutcome = DFOutcome[CompleteCheck] |
|
DFSteps = DFSteps[CompleteCheck] |
|
|
|
|
|
# Transpose PA data into the right format |
|
NewDF= pd.DataFrame(DFPA.iloc[0]).transpose() |
|
|
|
counter = range(1, len(DFPA)) |
|
|
|
for i in counter: |
|
NewRow = DFPA.iloc[i].transpose() |
|
NewDF = NewDF.append(NewRow) |
|
|
|
NewDF = NewDF.drop(['Complete?'], axis=1) |
|
|
|
# Do the same for Step data |
|
NewStepDF = pd.DataFrame(DFSteps.iloc[0]).transpose() |
|
|
|
counter = range(1, len(DFSteps)) |
|
|
|
for i in counter: |
|
NewRow = DFSteps.iloc[i].transpose() |
|
NewStepDF = NewStepDF.append(NewRow) |
|
|
|
NewStepDF = NewStepDF.drop(['Complete?'], axis=1) |
|
|
|
#%% Create DF with important dates |
|
|
|
DFDates = DFComp [['Date of surgery','Date of hospital discharge', |
|
'Date first complication at home', 'Date (first) readmission', |
|
'Date discharge after first readmission', 'Date second readmission', |
|
'Date discharge second readmission']] |
|
|
|
for i in DFDates: |
|
DFDates[i] = pd.to_datetime(DFDates[i]).dt.date |
|
|
|
DFDates['LOS'] = DFDates['Date of hospital discharge'] - DFDates['Date of surgery'] #LOS = Length of stay |
|
DFDates['TTC'] = DFDates['Date first complication at home'] - DFDates['Date of surgery'] #TTC = Time to complication |
|
DFDates['TTR'] = DFDates['Date (first) readmission'] - DFDates['Date of surgery'] #TTR = Time to readmission |
|
DFDates['TT2R'] = DFDates['Date second readmission'] - DFDates['Date of surgery'] #TT2R = Time to second readmission |
|
|
|
#%% Create coordinates from the dates for the plots |
|
|
|
AXVcoord = pd.DataFrame(columns= ['LOS', 'TTC', 'TTR', 'TT2R']) |
|
|
|
for rows, index in DFDates.iterrows(): |
|
AXVcoord.loc[rows, 'LOS'] = DFDates['LOS'].loc[rows].days |
|
AXVcoord.loc[rows, 'TTC'] = DFDates['TTC'].loc[rows].days |
|
AXVcoord.loc[rows, 'TTR'] = DFDates['TTR'].loc[rows].days |
|
AXVcoord.loc[rows, 'TT2R'] = DFDates['TT2R'].loc[rows].days |
|
|
|
AXVcomb = AXVcoord.values.tolist() |
|
AXVArray = np.array(AXVcomb) |
|
|
|
|
|
#%% Create DFs for each PA level |
|
|
|
NoActDF = NewDF.loc[:, :'No activity After Surgery: 90'] |
|
LowActDF = NewDF.loc[:, 'Low activity Before Surgery: -1 ':'Low activity After Surgery: 90'] |
|
MedActDF = NewDF.loc[:, 'Medium activity Before Surgery: -1':'Medium activity After Surgery: 90'] |
|
HighActDF = NewDF.loc[:, 'High activity Before Surgery: -1 ':'High activity After Surgery: 90'] |
|
|
|
def MakeStepDF(NewDF): |
|
StepDF = NewDF.iloc[:,321:427] |
|
StepDF = StepDF.drop('Days Fitbit prescribed after surgery', axis=1) |
|
StepDF = StepDF.replace(' ', '') |
|
StepDF = StepDF.replace('N.A.', np.nan) |
|
StepDF = StepDF.replace('N.A. ', np.nan) |
|
StepDF = StepDF.replace('NA.', np.nan) |
|
StepDF = StepDF.replace('n.a.', np.nan) |
|
StepDF = StepDF.replace('N.A', np.nan) |
|
StepDF = StepDF.replace('NaN', np.nan) |
|
StepDF = StepDF.astype('float64') |
|
return StepDF |
|
|
|
StepDF = MakeStepDF(NewStepDF) |
|
|
|
#%% Day -14 to surgery were in the wrong order so we have to flip the first 14 days |
|
|
|
def DayFlipper(DF): |
|
ListCol = DF.columns.tolist() |
|
ListCol[0:14] = ListCol[0:14][::-1] |
|
DF = DF[ListCol] |
|
return(DF) |
|
|
|
NoActDF = DayFlipper(NoActDF) |
|
print(NoActDF.columns) |
|
LowActDF = DayFlipper(LowActDF) |
|
print(LowActDF.columns) |
|
MedActDF = DayFlipper(MedActDF) |
|
print(MedActDF.columns) |
|
HighActDF = DayFlipper(HighActDF) |
|
print(HighActDF.columns) |
|
StepDF = DayFlipper(StepDF) |
|
print(StepDF.columns) |
|
|
|
#%% |
|
OldColumns = LowActDF.columns |
|
NewColumns = range(-14, 91) |
|
|
|
LowActDF.columns = NewColumns |
|
MedActDF.columns = NewColumns |
|
HighActDF.columns = NewColumns |
|
StepDF.columns = NewColumns |
|
|
|
# Set NaN to zeroes in order to calculate the total amount of activity |
|
LowActDFZeroes = LowActDF.fillna(0) |
|
MedActDFZeroes = MedActDF.fillna(0) |
|
HighActDFZeroes = HighActDF.fillna(0) |
|
StepDFZeroes = StepDF.fillna(0) |
|
|
|
TotActDF = LowActDF + MedActDF + HighActDF |
|
TotActDFZeroes = LowActDFZeroes + MedActDFZeroes + HighActDFZeroes |
|
|
|
# Remove pts that reported less than threshold PA days |
|
Threshold = 200 |
|
|
|
NaNCount = LowActDF.isnull().sum(axis=1) # Count days without data per patient |
|
NaNRowDrop = (LowActDF.isnull().sum(axis=1)) < Threshold |
|
|
|
NoActDFClean = NoActDF[NaNRowDrop] |
|
LowActDFClean = LowActDFZeroes[NaNRowDrop] |
|
MedActDFClean = MedActDFZeroes[NaNRowDrop] |
|
HighActDFClean = HighActDFZeroes[NaNRowDrop] |
|
TotActDFClean = TotActDFZeroes[NaNRowDrop] |
|
|
|
#%% |
|
|
|
# NoActDFClean['Group'] = 'Complication' |
|
# LowActDFClean['Group'] = 'Complication' |
|
# MedActDFClean['Group'] = 'Complication' |
|
# HighActDFClean['Group'] = 'Complication' |
|
# TotActDFClean['Group'] = 'Complication' |
|
# StepDF['Group'] = 'Complication' |
|
|
|
def Grouper(DF): |
|
DF['Group'] = 'Complication' |
|
DF['Group'] = DF['Group'].where(DFOutcome['Complications at home during monitoring ? '] == 'Yes', other='No Comp') |
|
return DF |
|
|
|
NoActDFClean = Grouper(NoActDFClean) |
|
LowActDFClean = Grouper(LowActDFClean) |
|
MedActDFClean = Grouper(MedActDFClean) |
|
HighActDFClean = Grouper(HighActDFClean) |
|
TotActDFClean = Grouper(TotActDFClean) |
|
StepDF = Grouper(StepDF) |
|
|
|
# #%% Divide Comps, Non-comps and Unknown-Comps |
|
|
|
# LowActComp = LowActDFClean.loc[NewDF['Complications at Home'] == 'Yes'] |
|
# MedActComp = MedActDFClean.loc[NewDF['Complications at Home'] == 'Yes'] |
|
# HighActComp = HighActDFClean.loc[NewDF['Complications at Home'] == 'Yes'] |
|
# TotActComp = TotActDFClean.loc[NewDF['Complications at Home'] == 'Yes'] |
|
|
|
# LowActNoComp = LowActDFClean.loc[NewDF['Complications at Home'] == 'No'] |
|
# MedActNoComp = MedActDFClean.loc[NewDF['Complications at Home'] == 'No'] |
|
# HighActNoComp = HighActDFClean.loc[NewDF['Complications at Home'] == 'No'] |
|
# TotActNoComp = TotActDFClean.loc[NewDF['Complications at Home'] == 'No'] |
|
|
|
# LowActUnk = LowActDFClean.loc[(NewDF['Complications at Home'] != 'Yes') & (NewDF['Complications at Home'] != 'No')] |
|
# MedActUnk = MedActDFClean.loc[(NewDF['Complications at Home'] != 'Yes') & (NewDF['Complications at Home'] != 'No')] |
|
# HighActUnk = HighActDFClean.loc[(NewDF['Complications at Home'] != 'Yes') & (NewDF['Complications at Home'] != 'No')] |
|
# TotActUnk = TotActDFClean.loc[(NewDF['Complications at Home'] != 'Yes') & (NewDF['Complications at Home'] != 'No')] |
|
|
|
#%% Plot comps, non-comps amd unknown patient data with event-dates |
|
|
|
colors = ['k','c','r', 'r'] # k=discharge, c=complication, r=readmissions |
|
|
|
def PAPlotter(Low, Med, High, Tot, Step, AXV): |
|
|
|
for index, row in Tot.iterrows(): |
|
|
|
counter = index-1 |
|
fig, ax1 = plt.subplots(figsize=(20,8)) |
|
|
|
ax1.plot(Low.loc[index], 'b:') |
|
ax1.plot(Med.loc[index], 'r:') |
|
ax1.plot(High.loc[index], 'y:') |
|
ax1.plot(Tot.loc[index]) |
|
ax1.set_ylabel('Minutes of PA') |
|
ax1.set_xlabel('Days') |
|
plt.ylim(0,1440) |
|
|
|
plt.vlines(x=0, ymin=0, ymax=1440, linestyle='dashed') |
|
plt.vlines(AXV[counter], ymin= 0, ymax= 1440, colors=colors, linestyle='dotted') |
|
|
|
ax2 = ax1.twinx() |
|
ax2.plot(Step.loc[index], 'k') |
|
ax2.set_ylabel('Steps per day') |
|
|
|
plt.title('PA levels comp pt' + str(index)) |
|
plt.ylim(0,25000) |
|
|
|
PAPlotter(LowActDFClean, MedActDFClean, HighActDFClean, TotActDFClean, StepDF, AXVcomb) |
|
#PAPlotter(LowActNoComp, MedActNoComp, HighActNoComp,TotActNoComp, StepDF, AXVcomb, 'No Complication') |
|
#PAPlotter(LowActUnk, MedActUnk, HighActUnk, TotActUnk, StepDF, AXVcomb, 'Unknown Complication') |
|
|
|
|
|
#%% Calculate differences between comp PA and no comp PA |
|
|
|
def PAStats(DF, group): |
|
MeanTotPA = DF.mean().mean() |
|
StdTotPA = DF.std().std() |
|
PreMean= DF.loc[:,-14:-1].mean().mean() |
|
PreStd = DF.loc[:,-14:-1].std().std() |
|
Post30Mean = DF.loc[:,0:30].mean().mean() |
|
Post30Std = DF.loc[:,0:30].std().std() |
|
Post60Mean = DF.loc[:,0:60].mean().mean() |
|
Post60Std = DF.loc[:,0:60].std().std() |
|
Post90Mean = DF.loc[:,0:90].mean().mean() |
|
Post90Std = DF.loc[:,0:90].std().std() |
|
|
|
print('Stats '+ group + ':', '\n') |
|
print('Total Mean min PA ='+ str(MeanTotPA),'Std=' + str(StdTotPA)) |
|
print('Preoperative Mean min PA =' + str(PreMean), 'Std=' + str(PreStd)) |
|
print('30 days Postop. Mean min PA =' + str(Post30Mean), 'Std=' + str(Post30Std)) |
|
print('60 days Postop. Mean min PA =' + str(Post60Mean), 'Std=' + str(Post60Std)) |
|
print('90 days Postop. Mean min PA =' + str(Post90Mean), 'Std=' + str(Post90Std),'\n') |
|
|
|
PAStats(TotActComp, 'complication') |
|
PAStats(TotActNoComp, 'no complication') |
|
PAStats(TotActUnk, 'unkown') |
|
|
|
|
|
#%% Plot histogram number of missing values |
|
CountDF = pd.DataFrame(NaNCount) |
|
CountDF['Complication'] = DFCompl['Complications at home during monitoring ? '] |
|
CountDF.columns = ['Count', 'Complication'] |
|
|
|
sns.displot(CountDF, x='Count', bins=[10, 20, 30, 40, 50, 60, 70, 80, 90], hue='Complication') |
|
sns.color_palette ('colorblind') |
|
|
|
#%% |
|
|
|
def RollingAvAct(DF, windowsize): |
|
AvDF = pd.DataFrame() |
|
|
|
for index, row in DF.iterrows(): |
|
AvDF = AvDF.append(row.rolling(windowsize, min_periods=1).mean()) |
|
return(AvDF) |
|
|
|
AvTotActComp =pd.DataFrame(RollingAvAct(TotActComp, 3)) |
|
AvTotActNoComp = pd.DataFrame(RollingAvAct(TotActNoComp, 3)) |
|
|
|
|
|
#%% |
|
def Trendliner(DF, Dates, group): |
|
newPASlopePre = pd.DataFrame(columns=['Slope', 'Int', 'Group']) |
|
newPASlopeLOS = pd.DataFrame(columns=['Slope', 'Int', 'Group']) |
|
newPASlopePost = pd.DataFrame(columns=['Slope', 'Int', 'Group']) |
|
|
|
for index, row in DF.iterrows(): |
|
|
|
counter = index-1 |
|
DisDay = int(AXVArray[counter,0]) |
|
DisDay2 = int(DisDay+15) |
|
DisDay3 = int(DisDay2-1) |
|
|
|
# Calculate trendline pre-op |
|
Xpre = DF.columns[0:15] |
|
Ypre = DF.loc[index,-14:0] |
|
z_pre = np.polyfit(Xpre, Ypre, 1) |
|
p_pre = np.poly1d(z_pre) |
|
newPASlopePre.loc[index,'Slope'] = z_pre[0] |
|
newPASlopePre.loc[index,'Int'] = z_pre[1] |
|
newPASlopePre.loc[index, 'Group'] = group |
|
|
|
# Calculate trendline LOS |
|
Xlos = DF.columns[14:DisDay2] |
|
Ylos = DF.loc[index,0:DisDay] |
|
z_los = np.polyfit(Xlos, Ylos, 1) |
|
p_los = np.poly1d(z_los) |
|
newPASlopeLOS.loc[index,'Slope'] = z_los[0] |
|
newPASlopeLOS.loc[index,'Int'] = z_los[1] |
|
newPASlopeLOS.loc[index, 'Group'] = group |
|
|
|
# Calculate trendline post-op |
|
Xpost = DF.columns[DisDay3:] |
|
Ypost = DF.loc[index,DisDay:] |
|
z_post = np.polyfit(Xpost, Ypost, 1) |
|
p_post = np.poly1d(z_post) |
|
newPASlopePost.loc[index,'Slope'] = z_post[0] |
|
newPASlopePost.loc[index,'Int'] = z_post[1] |
|
newPASlopePost.loc[index, 'Group'] = group |
|
|
|
# Plot figures |
|
plt.figure(figsize=(24,8)) |
|
plt.plot(DF.loc[index]) |
|
plt.plot(Xpost,p_post(Xpost),'r--') |
|
plt.plot(Xpre, p_pre(Xpre), 'b--') |
|
plt.plot(Xlos, p_los(Xlos), 'k--') |
|
plt.vlines(x=0, ymin=0, ymax=1440, linestyle='dashed') |
|
plt.vlines(Dates[counter], ymin= 0, ymax= 1440, colors=colors, linestyle='dotted') |
|
plt.xlim(-14,105) |
|
plt.ylim(0,1440) |
|
plt.ylabel('Minutes of PA') |
|
plt.xlabel('Days') |
|
plt.title('Mov Avg PA levels pt' + str(index) + '_' + group) |
|
|
|
d = {'Pre': newPASlopePre, 'LOS':newPASlopeLOS, 'Post': newPASlopePost} |
|
|
|
return(d) |
|
|
|
TrendDictComp = Trendliner(AvTotActComp, AXVcomb, 'complication') |
|
TrendDictNoComp= Trendliner(AvTotActNoComp, AXVcomb, 'no complication') |
|
|
|
|
|
#%% |
|
|
|
# def SlopeStats(SlopeDict, group): |
|
# MeanSlopePre, MeanIntPre = SlopeDict['Pre'].mean() |
|
# StdSlopePre, StdIntPre = SlopeDict['Pre'].std() |
|
# MeanSlopeLOS, MeanIntLOS = SlopeDict['LOS'].mean() |
|
# StdSlopeLOS, StdIntLOS = SlopeDict['LOS'].std() |
|
# MeanSlopePost, MeanIntPost = SlopeDict['Post'].mean() |
|
# StdSlopePost, StdIntPost = SlopeDict['Post'].std() |
|
|
|
# print('Stats '+ group + ':', '\n') |
|
# print('Mean slope PA Pre-op = '+ str(MeanSlopePre),'Std= ' + str(StdSlopePre)) |
|
# print('Mean slope PA hospitalization = '+ str(MeanSlopeLOS),'Std= ' + str(StdSlopeLOS)) |
|
# print('Mean slope PA Post-op = '+ str(MeanSlopePost),'Std= ' + str(StdSlopePost)) |
|
# print('Mean intersept PA Pre-op = '+ str(MeanIntPre),'Std= ' + str(StdIntPre)) |
|
# print('Mean intercept PA hospitalization = '+ str(MeanIntLOS),'Std= ' + str(StdIntLOS)) |
|
# print('Mean intercept PA Post-op = '+ str(MeanIntPre),'Std= ' + str(StdIntPre), '\n') |
|
|
|
# return(MeanSlopePre, StdSlopePre, MeanSlopeLOS, StdSlopeLOS, MeanSlopePost, StdSlopePost) |
|
|
|
# MeanSlopePreComp, StdSlopePreComp, MeanSlopeLOSComp, StdSlopLOSComp, MeanSlopePostComp, StdSlopeComp, = SlopeStats(TrendDictComp, 'complications') |
|
# MeanSlopePreNoComp, StdSlopePreNoComp, MeanSlopeLOSNoComp, StdSlopLOSNoComp, MeanSlopePostNoComp, StdSlopeNoComp = SlopeStats(TrendDictNoComp, 'no complications') |
|
|
|
|
|
#%% |
|
|
|
# SlopeIntPreComp = pd.DataFrame(TrendDictComp['Pre']) |
|
# SlopeIntPreComp['Period'] = 'Pre' |
|
# SlopeIntPreNoComp= pd.DataFrame(TrendDictNoComp['Pre']) |
|
# SlopeIntPreNoComp['Period'] = 'Pre' |
|
# SlopeIntLOSComp = pd.DataFrame(TrendDictComp['LOS']) |
|
# SlopeIntLOSComp['Period'] = 'LOS' |
|
# SlopeIntLOSNoComp= pd.DataFrame(TrendDictNoComp['LOS']) |
|
# SlopeIntLOSNoComp['Period'] = 'LOS' |
|
|
|
# SlopeIntPostComp = pd.DataFrame(TrendDictComp['Post']) |
|
# SlopeIntPostComp['Period'] = 'Post' |
|
# SlopeIntPostNoComp= pd.DataFrame(TrendDictNoComp['Post']) |
|
# SlopeIntPostNoComp['Period'] = 'Post' |
|
|
|
# Slope = pd.DataFrame() |
|
# Slope = Slope.append([SlopeIntPreComp, SlopeIntPreNoComp, SlopeIntLOSComp, SlopeIntLOSNoComp, SlopeIntPostComp, SlopeIntPostNoComp]) |
|
# Slope['Slope'] = Slope['Slope'].astype('float64') |
|
# Slope['Int'] = Slope['Int'].astype('float64') |
|
|
|
#%% |
|
# plt.figure(figsize=(12,8)) |
|
# sns.set_theme(style="darkgrid") |
|
# sns.violinplot(x=Slope['Period'], y=Slope['Slope'],hue=Slope['Group'], palette="muted", split=True) |
|
# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) |
|
|
|
# plt.figure(figsize=(12,8)) |
|
# sns.set_theme(style="darkgrid") |
|
# sns.violinplot(x=Slope['Period'], y=Slope['Int'],hue=Slope['Group'], palette="muted", split=True) |
|
# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) |
|
|
|
#%% |
|
#fig, axes = plt.subplots(1,2, sharey=True) |
|
#sns.violinplot(data=newPASlopeComp['Intercept'], ax=axes[0], color='b') |
|
#sns.violinplot(data=newPASlopeNoComp['Intercept'], ax=axes[1], color='r') |