Scripts to create a dataset from Redcap outputs to use for a PLS-DA classification.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

217 lines
9.5 KiB

# -*- coding: utf-8 -*-
"""
Created on Thu Feb 25 11:13:35 2021
@author: Dijkhofmf
"""
# Import stuff
import os
import pandas as pd
import numpy as np
#import seaborn as sns
#import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None # default='warn'
#%% Define filenames and path
Filename_T0 = 'BaselineT0.csv'
Filename_T1 = 'DischargeT1.csv'
#Filename_T2 = 'FollowUpT2Data.csv'
FilenameOutc = 'SurgAdmComp.csv'
FilenameComplete = 'Complete.csv'
Path = 'I:\Mike Dijkhof\Connecare MGP\Data\FinalFiles'
# Set path
os.chdir(Path)
DFT0 = pd.DataFrame(pd.read_csv(Filename_T0))
DFT1 = pd.DataFrame(pd.read_csv(Filename_T1))
#DFT2 = pd.DataFrame(pd.read_csv(Filename_T2))
DFComplete = pd.DataFrame(pd.read_csv(FilenameComplete))
DFCompl = pd.DataFrame(pd.read_csv(FilenameOutc))
#%%
DFT0['Complete'] = DFComplete['Has patient completed study?']
DFT0 = DFT0.drop(DFT0[DFT0['Complete'] !='Yes'].index)
DFT0 = DFT0.astype('str')
DFT0 = DFT0.set_index(['Study ID'])
DFT1['Complete'] = DFComplete['Has patient completed study?']
DFT1 = DFT1.drop(DFT1[DFT1['Complete'] !='Yes'].index)
DFT1 = DFT1.astype('str')
DFT1 = DFT1.set_index(['Study ID'])
# DFT2['Complete data'] = DFComplete['Has patient completed study?']
# DFT2 = DFT2.drop(DFT2[DFT2['Complete data'] !='Yes'].index)
# DFT2 = DFT2.astype('str')
# DFT2 = DFT2.set_index(['Study ID'])
DFCompl['Complete'] = DFComplete['Has patient completed study?']
DFCompl = DFCompl.drop(DFCompl[DFCompl['Complete'] !='Yes'].index)
DFCompl = DFCompl.set_index(['Study ID'])
#%%
DFT0 = DFT0.apply(lambda x: x.str.replace(',','.'), axis=1)
DFT1 = DFT1.apply(lambda x: x.str.replace(',','.'), axis=1)
#DFT2 = DFT2.apply(lambda x: x.str.replace(',','.'), axis=1)
#%%
FinalDF_T0 = pd.DataFrame()
FinalDF_T0[['BMI','GFI', 'HADS_A', 'HADS_D', 'ADL', 'iADL']] = DFT0[['BMI', 'Groningen Frailty Index', 'Anxiety - Hospital Anxiety Depression Scale', 'Depression - Hospital Anxiety Depression Scale', 'ADL', 'iADL']].astype('float64')
FinalDF_T1 = pd.DataFrame()
FinalDF_T1[['GFI', 'HADS_A', 'HADS_D', 'ADL', 'iADL']] = DFT1[['Groningen Frailty Index', 'Anxiety - Hospital Anxiety Depression Scale', 'Depression - Hospital Anxiety Depression Scale', 'ADL', 'iADL']].astype('float64')
FinalDF_T2 = pd.DataFrame()
#FinalDF_T2[['GFI', 'HADS_A', 'HADS_D', 'ADL', 'iADL']] = DFT2[['Groningen Frailty Index', 'Anxiety - Hospital Anxiety Depression Scale', 'Depression - Hospital Anxiety Depression Scale', 'ADL', 'iADL']].astype('float64')
#%% TUG_T0
FinalDF_T0['TUG1'] = DFT0['Timed to Up&Go - attempt 1 (sec)'].astype('float64').fillna(0)
FinalDF_T0['TUG2'] = DFT0['Timed to Up&Go - attempt 2 (sec)'].astype('float64').fillna(0)
for i, r in FinalDF_T0.iterrows():
if FinalDF_T0.loc[i,'TUG1'] != 0 and FinalDF_T0.loc[i,'TUG2'] != 0:
FinalDF_T0.loc[i,'TUGTot'] = (FinalDF_T0.loc[i,'TUG1']+FinalDF_T0.loc[i,'TUG2'])/2
else:
FinalDF_T0.loc[i,'TUGTot'] = (FinalDF_T0.loc[i,'TUG1']+FinalDF_T0.loc[i,'TUG2'])/1
FinalDF_T0['TUG1'] = FinalDF_T0['TUG1'].replace(0, np.nan)
FinalDF_T0['TUG2'] = FinalDF_T0['TUG2'].replace(0, np.nan)
FinalDF_T0['TUGTot'] = FinalDF_T0['TUGTot'].replace(0, np.nan)
FinalDF_T0 = FinalDF_T0.drop(['TUG1', 'TUG2'], axis=1)
# TUG_T1 Asuming that all missing data were due to physical disabilties --> NaNs to 30 seconds
FinalDF_T1['TUG1'] = DFT1['Timed to Up&Go - attempt 1 (sec)'].astype('float64').fillna(30)
FinalDF_T1['TUG2'] = DFT1['Timed to Up&Go - attempt 2 (sec)'].astype('float64').fillna(30)
FinalDF_T1['TUGTot'] = (FinalDF_T1['TUG1']+FinalDF_T1['TUG2'])/2
FinalDF_T1 = FinalDF_T1.drop(['TUG1', 'TUG2'], axis=1)
# TUG_T2
#FinalDF_T2['TUG1'] = DFT2['Timed to Up&Go - attempt 1 (sec)'].astype('float64').fillna(0)
#FinalDF_T2['TUG2'] = DFT2['Timed to Up&Go - attempt 2 (sec)'].astype('float64').fillna(0)
# for i, r in FinalDF_T2.iterrows():
# if FinalDF_T2.loc[i,'TUG1'] != 0 and FinalDF_T2.loc[i,'TUG2'] != 0:
# FinalDF_T2.loc[i,'TUGTot'] = (FinalDF_T2.loc[i,'TUG1']+FinalDF_T2.loc[i,'TUG2'])/2
# else:
# FinalDF_T2.loc[i,'TUGTot'] = (FinalDF_T2.loc[i,'TUG1']+FinalDF_T2.loc[i,'TUG2'])/1
# FinalDF_T2['TUG1'] = FinalDF_T2['TUG1'].replace(0, np.nan)
# FinalDF_T2['TUG2'] = FinalDF_T2['TUG2'].replace(0, np.nan)
# FinalDF_T2['TUGTot'] = FinalDF_T2['TUGTot'].replace(0, np.nan)
#%%
FinalDF_T0[['HGSR1','HGSR2','HGSR3']] = DFT0[['Handgrip Strength test Attempt 1 rigth','Handgrip Strength test Attempt 2 rigth','Handgrip Strength test Attempt 3 right']].astype('float64')
FinalDF_T0['HGSRAvg'] = (FinalDF_T0['HGSR1'] + FinalDF_T0['HGSR2'] + FinalDF_T0['HGSR3'])/3
FinalDF_T0[['HGSL1','HGSL2','HGSL3']] = DFT0[['Handgrip Strength test Attempt 1 left', 'Handgrip Strength test Attempt 2 left', 'Handgrip Strength test Attempt 3 left']].astype('float64')
FinalDF_T0['HGSLAvg'] = (FinalDF_T0['HGSL1'] + FinalDF_T0['HGSL2'] + FinalDF_T0['HGSL3'])/3
FinalDF_T0['Dominance'] = DFT0['Hand dominance']
FinalDF_T1[['HGSR1','HGSR2','HGSR3']] = DFT1[['Handgrip Strength test Attempt 1 rigth','Handgrip Strength test Attempt 2 rigth','Handgrip Strength test Attempt 3 right']].astype('float64')
FinalDF_T1['HGSRAvg'] = (FinalDF_T1['HGSR1'] + FinalDF_T1['HGSR2'] + FinalDF_T1['HGSR3'])/3
FinalDF_T1[['HGSL1','HGSL2','HGSL3']] = DFT1[['Handgrip Strength test Attempt 1 left', 'Handgrip Strength test Attempt 2 left', 'Handgrip Strength test Attempt 3 left']].astype('float64')
FinalDF_T1['HGSLAvg'] = (FinalDF_T1['HGSL1'] + FinalDF_T1['HGSL2'] + FinalDF_T1['HGSL3'])/3
for i, r in DFT1.iterrows():
if DFT1.loc[i,'Handgrip Strength Test'] == 'No':
FinalDF_T1.loc[i,['HGSR1','HGSR2','HGSR3','HGSRAvg','HGSL1','HGSL2','HGSL3','HGSLAvg']] = 0
for index, rows in FinalDF_T1.iterrows():
if FinalDF_T0.loc[index, 'Dominance'] == 'Rigth':
FinalDF_T0.loc[index, 'HGSDom'] = FinalDF_T0.loc[index,'HGSRAvg']
FinalDF_T1.loc[index, 'HGSDom'] = FinalDF_T1.loc[index,'HGSRAvg']
elif FinalDF_T0.loc[index, 'Dominance'] == 'Left':
FinalDF_T0.loc[index, 'HGSDom'] = FinalDF_T0.loc[index,'HGSLAvg']
FinalDF_T1.loc[index, 'HGSDom'] = FinalDF_T1.loc[index,'HGSLAvg']
else:
FinalDF_T0.loc[index, 'HGSDom'] = (FinalDF_T0.loc[index,'HGSRAvg']+FinalDF_T0.loc[index,'HGSLAvg'])/2
FinalDF_T1.loc[index, 'HGSDom'] = (FinalDF_T1.loc[index,'HGSRAvg']+FinalDF_T1.loc[index,'HGSLAvg'])/2
FinalDF_T0 = FinalDF_T0.drop(['HGSR1', 'HGSR2', 'HGSR3', 'HGSRAvg','HGSL1', 'HGSL2', 'HGSL3', 'HGSLAvg', 'Dominance'], axis=1)
FinalDF_T1 = FinalDF_T1.drop(['HGSR1', 'HGSR2', 'HGSR3', 'HGSRAvg','HGSL1', 'HGSL2', 'HGSL3', 'HGSLAvg'], axis=1)
# FinalDF_T2[['HGSR1','HGSR2','HGSR3']] = DFT2[['Handgrip Strength test Attempt 1 right','Handgrip Strength test Attempt 2 right','Handgrip Strength test Attempt 3 right']].astype('float64')
# FinalDF_T2['HGSRAvg'] = (FinalDF_T2['HGSR1'] + FinalDF_T2['HGSR2'] + FinalDF_T2['HGSR3'])/3
# FinalDF_T2[['HGSL1','HGSL2','HGSL3']] = DFT2[['Handgrip Strength test Attempt 1 left', 'Handgrip Strength test Attempt 2 left', 'Handgrip Strength test Attempt 3 left']].astype('float64')
# FinalDF_T2['HGSLAvg'] = (FinalDF_T2['HGSL1'] + FinalDF_T2['HGSL2'] + FinalDF_T2['HGSL3'])/3
# if FinalDF_T0['Dominance'] == 'Right':
# FinalDF_T2['HSGDom'] = FinalDF_T0['HGSRAvg']
# elif FinalDF_T0['Dominance'] == 'Left':
# FinalDF_T2['HSGDom'] = FinalDF_T0['HGSLAvg']
# else:
# FinalDF_T2['HGSDom'] = (FinalDF_T0['HGSRAvg']+FinalDF_T0['HGSLAvg'])/2
#%%
EORTCCols = DFT0.columns[15:59].tolist()
EORTCScoresT0 = DFT0[EORTCCols]
#EORTCScoresT2 = DFT2[EORTCCols]
#%%
os.chdir('I:\Mike Dijkhof\Python')
import EORTC as eor
import SQUASH as sq
NewEORTCScoresT0 = eor.EORTCCalculator(EORTCScoresT0, EORTCCols)
#NewEORTCScoresT2 = eor.EORTCCalculator(EORTCScoresT2, EORTCCols)
EORTCT0 = eor.EORTCScore(EORTCScoresT0)
#EORTCT2 = eor.EORTCScore(EORTCScoresT2)
os.chdir(Path)
#%% plaatjes
# for index, row in EORTC.iterrows():
# plt.figure(figsize=(20,8))
# plt.title('EORTC preoperative outcomes pt ' + str(index))
# sns.barplot(x=EORTC.columns, y=EORTC.loc[index,:])
#%%
SQUASHScoresT0 = sq.SQUASHParse(DFT0)
#SQUASHScoresT2 = sq.SQUASHParse(DFT2)
ColsToDrop = ['SQUASH baseline afgenomen?', 'Woon werkverkeer?', 'Werk?', 'Huishoudelijk werk?']
SQUASHScoresT0 = SQUASHScoresT0.drop(ColsToDrop, axis=1)
SQUASHScoresT0 = SQUASHScoresT0.astype('float64')
#SQUASHScoresT2 = SQUASHScoresT2.drop(ColsToDrop, axis=1)
#SQUASHScoresT2 = SQUASHScoresT2.astype('float64')
SQUASHT0 = sq.SQUASHScore(SQUASHScoresT0)
#SQUASHT2 = sq.SQUASHScore(SQUASHScoresT2)
#%%
FinalDF_T0['Pt Type'] = DFCompl.loc[:,'Complications at home during monitoring ? '].values
FinalDF_T1['Pt Type'] = DFCompl.loc[:,'Complications at home during monitoring ? '].values
#FinalDF_T2['Pt Type'] = DFCompl.loc[:,'Complications at home during monitoring ? '].values
FinalDF_T0['Pt Type'] = FinalDF_T0['Pt Type'].str.replace('Yes', 'Complication')
FinalDF_T0['Pt Type'] = FinalDF_T0['Pt Type'].str.replace('No', 'Healthy')
FinalDF_T1['Pt Type'] = FinalDF_T1['Pt Type'].str.replace('Yes', 'Complication')
FinalDF_T1['Pt Type'] = FinalDF_T1['Pt Type'].str.replace('No', 'Healthy')
#FinalDF_T2['Pt Type'] = FinalDF_T2['Pt Type'].str.replace('Yes', 'Complication')
#FinalDF_T2['Pt Type'] = FinalDF_T2['Pt Type'].str.replace('No', 'Healthy')
#%% Save FinalDF to .csv file
FinalDF_T0.to_csv('FinalDF_T0.csv')
FinalDF_T1.to_csv('FinalDF_T1.csv')
#FinalDF_T2.to_csv('FinalDF_T2.csv')