You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
136 lines
5.0 KiB
136 lines
5.0 KiB
# -*- coding: utf-8 -*- |
|
""" |
|
Created on Mon Mar 8 10:38:31 2021 |
|
|
|
@author: Dijkhofmf |
|
""" |
|
|
|
# Import stuff |
|
import os |
|
import pandas as pd |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
|
|
|
|
pd.options.mode.chained_assignment = None # default='warn' |
|
|
|
#%% Define filenames and path |
|
|
|
|
|
FilenameComplete = 'Complete.csv' |
|
FilenameDemo = 'DemoData.csv' |
|
Filename_T0 = 'FinalDF_T0.csv' |
|
|
|
|
|
Path = 'I:\Mike Dijkhof\Connecare MGP\Data\FinalFiles' |
|
|
|
# Set path |
|
os.chdir(Path) |
|
|
|
DFComplete = pd.DataFrame(pd.read_csv(FilenameComplete)) |
|
|
|
DFDemo = pd.DataFrame(pd.read_csv(FilenameDemo)) |
|
DFDemo['Complete data'] = DFComplete['Has patient completed study?'] |
|
DFDemo = DFDemo.drop(DFDemo[DFDemo['Complete data'] !='Yes'].index) |
|
|
|
DFDemo['ASA-classification'] = DFDemo['ASA-classification'].str.replace('ASA ', '').astype('float64') |
|
DFDemo = DFDemo.replace('Unchecked', 0) |
|
DFDemo = DFDemo.replace('Checked', 1) |
|
Dropcols = ['Year of birth', 'Subject ID Connecare', 'Subject ID Connecare (version 2.0)','Date subject signed consent', 'Nationality', 'Language', 'Former occupation', |
|
'Does the patient have a smartphone that they use?', 'How many days a week is the smartphone used?', |
|
'Does the patient have a tablet that they use?','How many days a week is the tablet used?','Does the patient have a computer/pc that they use?', |
|
'How many days a week is the computer/pc used?','Smart device at home', 'Smart device at inclusion? (check all that apply) (choice=Fitbit)', |
|
'Smart device at inclusion? (check all that apply) (choice=Weight scale)','Indication Surgery', 'Comments', 'Complete?', 'Complete data'] |
|
DFDemo = DFDemo.drop(Dropcols, axis=1) |
|
DFDemo = DFDemo.set_index('Study ID') |
|
|
|
# Calculate CCI score |
|
DFDemo.iloc[:,20:26] = DFDemo.iloc[:,20:26]*2 |
|
DFDemo.iloc[:,26] = DFDemo.iloc[:,26]*3 |
|
DFDemo.iloc[:,26:28] = DFDemo.iloc[:,26:28]*6 |
|
|
|
ColMask = DFDemo.columns[10:29] |
|
DFDemo['Comorb'] = DFDemo[ColMask].sum(axis=1) |
|
DFDemo = DFDemo.drop(ColMask, axis=1) |
|
|
|
#%% |
|
|
|
DF_T0 = pd.DataFrame(pd.read_csv(Filename_T0)) |
|
DF_T0 = DF_T0.set_index('Study ID') |
|
|
|
DFDemo['Type'] = DF_T0['Pt Type'] |
|
|
|
#%% code variables |
|
|
|
DFDemo['Gender'] = DFDemo['Gender'].replace('Female', 0) |
|
DFDemo['Gender'] = DFDemo['Gender'].replace('Male', 1) |
|
|
|
Housing = pd.get_dummies(DFDemo['Housing'], drop_first=True) |
|
Education = pd.get_dummies(DFDemo['Education'], drop_first=True) |
|
Smoking = pd.get_dummies(DFDemo['Smoking'], drop_first=True) |
|
Med_Dif = pd.get_dummies(DFDemo['Difficulty preparing medication?'], drop_first=True) |
|
Loc_Tu = pd.get_dummies(DFDemo['Location tumour'], drop_first=True) |
|
Prim_Mal = pd.get_dummies(DFDemo['Primary Malignancy'], drop_first=True) |
|
|
|
DFDemo['Recurrent disease?'] = DFDemo['Recurrent disease?'].replace('No', 0) |
|
DFDemo['Recurrent disease?'] = DFDemo['Recurrent disease?'].replace('Yes', 1) |
|
|
|
DFDemo = DFDemo.drop(['Marital State', 'Housing', 'Education', 'Tumour Stage', 'Smoking', 'Difficulty preparing medication?', |
|
'Location tumour', 'Primary Malignancy'], axis=1) |
|
|
|
|
|
#%% |
|
DFDemo = pd.concat([DFDemo, Housing, Education, Smoking, Med_Dif, Loc_Tu, Prim_Mal], axis=1) |
|
|
|
#%% Create Neoadjuvant therapy variable |
|
|
|
for i,r in DFDemo.iterrows(): |
|
if (DFDemo.loc[i,'Neo-adjuvant therapy (choice=Chemotherapy)'] == 1) & (DFDemo.loc[i,'Neo-adjuvant therapy (choice=Radiotherapy)'] == 1): |
|
DFDemo.loc[i,'Neo'] = 1 |
|
elif DFDemo.loc[i, 'Neo-adjuvant therapy (choice=Chemotherapy)'] == 1: |
|
DFDemo.loc[i,'Neo'] = 2 |
|
elif DFDemo.loc[i,'Neo-adjuvant therapy (choice=Immunotherapy)'] == 1: |
|
DFDemo.loc[i,'Neo'] = 3 |
|
elif DFDemo.loc[i,'Neo-adjuvant therapy (choice=Radiotherapy)'] == 1: |
|
DFDemo.loc[i,'Neo'] = 4 |
|
elif DFDemo.loc[i,'Neo-adjuvant therapy (choice=Targeted Therapy)'] == 1: |
|
DFDemo.loc[i,'Neo'] = 5 |
|
elif DFDemo.loc[i,'Neo-adjuvant therapy (choice=None)'] == 1: |
|
DFDemo.loc[i,'Neo'] = 0 |
|
|
|
Neo = pd.get_dummies(DFDemo['Neo'], drop_first=True) |
|
|
|
NeoDrop = ['Neo-adjuvant therapy (choice=Chemotherapy)','Neo-adjuvant therapy (choice=Chemotherapy)','Neo-adjuvant therapy (choice=Immunotherapy)', |
|
'Neo-adjuvant therapy (choice=Radiotherapy)', 'Neo-adjuvant therapy (choice=None)', 'Neo-adjuvant therapy (choice=Targeted Therapy)', 'Neo'] |
|
|
|
DFDemo = DFDemo.drop(NeoDrop, axis=1) |
|
|
|
DFDemo = pd.concat([DFDemo, Neo], axis=1) |
|
|
|
#%% |
|
plt.figure() |
|
sns.displot(DFDemo['Age (years)']) |
|
|
|
#%% |
|
|
|
DemoComp = DFDemo[DFDemo['Type'] != 'Healthy'] |
|
DemoComp = DemoComp.drop('Type', axis=1) |
|
DemoNoComp = DFDemo[DFDemo['Type'] == 'Healthy'] |
|
DemoNoComp = DemoNoComp.drop('Type', axis=1) |
|
|
|
from scipy import stats |
|
|
|
#outcome = pd.DataFrame(index=['stat', 'p-value']) |
|
outcomeT = stats.ttest_ind(DemoNoComp, DemoComp, nan_policy='omit') |
|
|
|
OutcomeT = outcomeT[1].tolist() |
|
|
|
OutcomeMW = [] |
|
for column in DemoComp: |
|
print(column) |
|
outcomeMW = stats.mannwhitneyu(DemoNoComp[column], DemoComp[column]) |
|
OutcomeMW.append(outcomeMW[1]) |
|
|
|
|
|
#DFDemo.to_csv('FinalDemo.csv') |