Cainvas

Oil and Gas Equipment Failure Prediction

Credit: AITS Cainvas Community

Photo by Nathan Venn on Dribbble

In [ ]:
!wget "https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/equipfails_Dataset.zip"
!unzip -qo equipfails_Dataset.zip
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#Evaluation and HYpertuning
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score

#Model
from sklearn.ensemble import RandomForestClassifier 
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression

#Utility
pd.set_option('display.max_columns', None)
import time
import warnings
warnings.filterwarnings("ignore")

I Data exploration and import data

In [2]:
df=pd.read_csv('equipfails/equip_failures_training_set.csv')
In [3]:
df.shape
Out[3]:
(60000, 172)
In [4]:
df['sensor1_measure'][0]
Out[4]:
76698
In [5]:
df.head(4)
Out[5]:
id target sensor1_measure sensor2_measure sensor3_measure sensor4_measure sensor5_measure sensor6_measure sensor7_histogram_bin0 sensor7_histogram_bin1 sensor7_histogram_bin2 sensor7_histogram_bin3 sensor7_histogram_bin4 sensor7_histogram_bin5 sensor7_histogram_bin6 sensor7_histogram_bin7 sensor7_histogram_bin8 sensor7_histogram_bin9 sensor8_measure sensor9_measure sensor10_measure sensor11_measure sensor12_measure sensor13_measure sensor14_measure sensor15_measure sensor16_measure sensor17_measure sensor18_measure sensor19_measure sensor20_measure sensor21_measure sensor22_measure sensor23_measure sensor24_histogram_bin0 sensor24_histogram_bin1 sensor24_histogram_bin2 sensor24_histogram_bin3 sensor24_histogram_bin4 sensor24_histogram_bin5 sensor24_histogram_bin6 sensor24_histogram_bin7 sensor24_histogram_bin8 sensor24_histogram_bin9 sensor25_histogram_bin0 sensor25_histogram_bin1 sensor25_histogram_bin2 sensor25_histogram_bin3 sensor25_histogram_bin4 sensor25_histogram_bin5 sensor25_histogram_bin6 sensor25_histogram_bin7 sensor25_histogram_bin8 sensor25_histogram_bin9 sensor26_histogram_bin0 sensor26_histogram_bin1 sensor26_histogram_bin2 sensor26_histogram_bin3 sensor26_histogram_bin4 sensor26_histogram_bin5 sensor26_histogram_bin6 sensor26_histogram_bin7 sensor26_histogram_bin8 sensor26_histogram_bin9 sensor27_measure sensor28_measure sensor29_measure sensor30_measure sensor31_measure sensor32_measure sensor33_measure sensor34_measure sensor35_measure sensor36_measure sensor37_measure sensor38_measure sensor39_measure sensor40_measure sensor41_measure sensor42_measure sensor43_measure sensor44_measure sensor45_measure sensor46_measure sensor47_measure sensor48_measure sensor49_measure sensor50_measure sensor51_measure sensor52_measure sensor53_measure sensor54_measure sensor55_measure sensor56_measure sensor57_measure sensor58_measure sensor59_measure sensor60_measure sensor61_measure sensor62_measure sensor63_measure sensor64_histogram_bin0 sensor64_histogram_bin1 sensor64_histogram_bin2 sensor64_histogram_bin3 sensor64_histogram_bin4 sensor64_histogram_bin5 sensor64_histogram_bin6 sensor64_histogram_bin7 sensor64_histogram_bin8 sensor64_histogram_bin9 sensor65_measure sensor66_measure sensor67_measure sensor68_measure sensor69_histogram_bin0 sensor69_histogram_bin1 sensor69_histogram_bin2 sensor69_histogram_bin3 sensor69_histogram_bin4 sensor69_histogram_bin5 sensor69_histogram_bin6 sensor69_histogram_bin7 sensor69_histogram_bin8 sensor69_histogram_bin9 sensor70_measure sensor71_measure sensor72_measure sensor73_measure sensor74_measure sensor75_measure sensor76_measure sensor77_measure sensor78_measure sensor79_measure sensor80_measure sensor81_measure sensor82_measure sensor83_measure sensor84_measure sensor85_measure sensor86_measure sensor87_measure sensor88_measure sensor89_measure sensor90_measure sensor91_measure sensor92_measure sensor93_measure sensor94_measure sensor95_measure sensor96_measure sensor97_measure sensor98_measure sensor99_measure sensor100_measure sensor101_measure sensor102_measure sensor103_measure sensor104_measure sensor105_histogram_bin0 sensor105_histogram_bin1 sensor105_histogram_bin2 sensor105_histogram_bin3 sensor105_histogram_bin4 sensor105_histogram_bin5 sensor105_histogram_bin6 sensor105_histogram_bin7 sensor105_histogram_bin8 sensor105_histogram_bin9 sensor106_measure sensor107_measure
0 1 0 76698 na 2130706438 280 0 0 0 0 0 0 37250 1432864 3664156 1007684 25896 0 2551696 0 0 0 0 0 4933296 3655166 1766008 1132040 0 0 0 0 1012 268 0 0 0 0 0 469014 4239660 703300 755876 0 5374 2108 4114 12348 615248 5526276 2378 4 0 0 2328746 1022304 415432 287230 310246 681504 1118814 3574 0 0 6700214 0 10 108 50 2551696 97518 947550 799478 330760 353400 299160 305200 283680 na na na 178540 76698.08 6700214 6700214 6599892 43566 68656 54064 638360 6167850 1209600 246244 2 96 0 5245752 0 916567.68 6 1924 0 0 0 118196 1309472 3247182 1381362 98822 11208 1608 220 240 6700214 na 10476 1226 267998 521832 428776 4015854 895240 26330 118 0 532 734 4122704 51288 0 532572 0 18 5330690 4732 1126 0 0 0 0 0 0 0 0 62282 85908 32790 0 0 202710 37928 14745580 1876644 0 0 0 0 2801180 2445.8 2712 965866 1706908 1240520 493384 721044 469792 339156 157956 73224 0 0 0
1 2 0 33058 na 0 na 0 0 0 0 0 0 18254 653294 1720800 516724 31642 0 1393352 0 68 0 0 0 2560898 2127150 1084598 338544 0 0 0 0 0 0 0 0 0 0 0 71510 772720 1996924 99560 0 7336 7808 13776 13086 1010074 1873902 14726 6 0 0 1378576 447166 199512 154298 137280 138668 165908 229652 87082 4708 3646660 86 454 364 350 1393352 49028 688314 392208 341420 359780 366560 na na na na na 6700 33057.51 3646660 3646660 3582034 17733 260120 115626 6900 2942850 1209600 0 na na na 2291079.36 0 643536.96 0 0 0 0 38 98644 1179502 1286736 336388 36294 5192 56 na 0 3646660 na 6160 796 164860 350066 272956 1837600 301242 9148 22 0 na na na na na na na na na 3312 522 0 0 0 0 0 0 0 0 33736 36946 5936 0 0 103330 16254 4510080 868538 0 0 0 0 3477820 2211.76 2334 664504 824154 421400 178064 293306 245416 133654 81140 97576 1500 0 0
2 3 0 41040 na 228 100 0 0 0 0 0 0 1648 370592 1883374 292936 12016 0 1234132 0 0 0 0 0 2371990 2173634 300796 153698 0 0 0 0 358 110 0 0 0 0 0 0 870456 239798 1450312 0 1620 1156 1228 34250 1811606 710672 34 0 0 0 790690 672026 332340 254892 189596 135758 103552 81666 46 0 2673338 128 202 576 4 1234132 28804 160176 139730 137160 130640 na na na na na na 28000 41040.08 2673338 2673338 2678534 15439 7466 22436 248240 2560566 1209600 63328 0 124 0 2322692.16 0 236099.52 0 0 0 0 0 33276 1215280 1102798 196502 10260 2422 28 0 6 2673338 na 3584 500 56362 149726 100326 1744838 488302 16682 246 0 230 292 2180528 29188 22 20346 0 0 2341048 1494 152 0 0 0 0 0 0 0 0 13876 38182 8138 0 0 65772 10534 300240 48028 0 0 0 0 1040120 1018.64 1020 262032 453378 277378 159812 423992 409564 320746 158022 95128 514 0 0
3 4 0 12 0 70 66 0 10 0 0 0 318 2212 3232 1872 0 0 0 2668 0 0 0 642 3894 10184 7554 10764 1014 0 0 0 0 60 6 0 0 0 0 0 0 0 2038 5596 0 64 6 6 914 76 2478 2398 1692 0 0 6176 340 304 102 74 406 216 16 0 0 21614 2 12 0 0 2668 184 7632 3090 na na na na na na na na 10580 12.69 21614 21614 21772 32 50 1994 21400 7710 1209600 302 2 6 0 2135.04 0 4525.44 2 16 0 52 2544 1894 2170 822 152 0 0 0 2 2 21614 0 1032 6 24 656 692 4836 388 0 0 0 138 8 1666 72 0 12 0 0 2578 76 62 0 0 0 0 0 0 0 0 232 0 0 2014 370 48 18 15740 1822 20174 44 0 0 0 1.08 54 5670 1566 240 46 58 44 10 0 0 0 4 32
In [6]:
df.columns
Out[6]:
Index(['id', 'target', 'sensor1_measure', 'sensor2_measure', 'sensor3_measure',
       'sensor4_measure', 'sensor5_measure', 'sensor6_measure',
       'sensor7_histogram_bin0', 'sensor7_histogram_bin1',
       ...
       'sensor105_histogram_bin2', 'sensor105_histogram_bin3',
       'sensor105_histogram_bin4', 'sensor105_histogram_bin5',
       'sensor105_histogram_bin6', 'sensor105_histogram_bin7',
       'sensor105_histogram_bin8', 'sensor105_histogram_bin9',
       'sensor106_measure', 'sensor107_measure'],
      dtype='object', length=172)
In [7]:
df.shape
Out[7]:
(60000, 172)
In [8]:
#Histogram of the target
print("Failure: ",  df.target.sum(), ' Normal: ', df.shape[0]-df.target.sum())
df.target.hist()

plt.show()
Failure:  1000  Normal:  59000
In [9]:
df['sensor2_measure'][0]
Out[9]:
'na'
In [10]:
#Ratio between 'na' values and 60000 rows
nan_ratio=[]
for i in df.columns:
    foo=round((df[i]=='na').sum()/df.shape[0],2)
    nan_ratio.append(foo)

nan_ratio=pd.DataFrame({'Feature':df.columns,'na ratio':nan_ratio}).sort_values('na ratio', ascending=False)
In [11]:
nan_ratio.head()
Out[11]:
Feature na ratio
80 sensor43_measure 0.82
79 sensor42_measure 0.81
78 sensor41_measure 0.80
114 sensor68_measure 0.77
3 sensor2_measure 0.77

II Preprocessing and baseline

1 a) Treating 'na' with vtreat and use xgboost, random forest, and logistic to create a baseline

In [12]:
import vtreat
transform = vtreat.BinomialOutcomeTreatment(
    outcome_name='yc',    # We want vtreat change variable base on this column
    outcome_target=True,  # Tell vtreat that this is actually the target
    cols_to_copy=[],   # columns that we dont want vtreat to touch
)
d_prepared = transform.fit_transform(df, df.target)
In [13]:
transform.score_frame_.head()
Out[13]:
variable orig_variable treatment y_aware has_range PearsonR R2 significance vcount default_threshold recommended
0 id id clean_copy False True -0.012163 0.000873 2.877201e-03 3.0 0.083333 True
1 sensor1_measure sensor1_measure clean_copy False True 0.536978 0.397099 0.000000e+00 3.0 0.083333 True
2 target target clean_copy False True 1.000000 0.999882 0.000000e+00 3.0 0.083333 True
3 sensor44_measure_logit_code sensor44_measure logit_code True True 0.129352 0.074467 0.000000e+00 169.0 0.001479 True
4 sensor44_measure_prevalence_code sensor44_measure prevalence_code False True 0.030752 0.003621 1.288772e-09 169.0 0.001479 True
In [14]:
d_prepared.head()
Out[14]:
id sensor1_measure target sensor44_measure_logit_code sensor44_measure_prevalence_code sensor77_measure_logit_code sensor77_measure_prevalence_code sensor77_measure_lev_na sensor77_measure_lev_10 sensor25_histogram_bin0_logit_code sensor24_histogram_bin5_logit_code sensor24_histogram_bin5_prevalence_code sensor24_histogram_bin5_lev_0 sensor64_histogram_bin5_logit_code sensor64_histogram_bin5_prevalence_code sensor69_histogram_bin6_logit_code sensor69_histogram_bin6_prevalence_code sensor66_measure_logit_code sensor66_measure_prevalence_code sensor66_measure_lev_0 sensor66_measure_lev_4 sensor66_measure_lev_6 sensor66_measure_lev_8 sensor66_measure_lev_na sensor66_measure_lev_10 sensor66_measure_lev_12 sensor66_measure_lev_14 sensor66_measure_lev_16 sensor51_measure_logit_code sensor51_measure_prevalence_code sensor51_measure_lev_na sensor7_histogram_bin6_logit_code sensor7_histogram_bin6_prevalence_code sensor7_histogram_bin6_lev_0 sensor10_measure_logit_code sensor10_measure_prevalence_code sensor10_measure_lev_0 sensor50_measure_logit_code sensor50_measure_prevalence_code sensor50_measure_lev_0 sensor50_measure_lev_na sensor69_histogram_bin3_logit_code sensor69_histogram_bin3_lev_0 sensor24_histogram_bin2_logit_code sensor24_histogram_bin2_prevalence_code sensor24_histogram_bin2_lev_0 sensor41_measure_logit_code sensor41_measure_prevalence_code sensor41_measure_lev_na sensor41_measure_lev_1310700 sensor41_measure_lev_0 sensor15_measure_logit_code sensor15_measure_prevalence_code sensor64_histogram_bin6_logit_code sensor69_histogram_bin0_logit_code sensor7_histogram_bin7_logit_code sensor7_histogram_bin7_prevalence_code sensor7_histogram_bin7_lev_0 sensor26_histogram_bin0_logit_code sensor26_histogram_bin0_prevalence_code sensor6_measure_logit_code sensor6_measure_prevalence_code sensor6_measure_lev_0 sensor6_measure_lev_na sensor87_measure_logit_code sensor87_measure_prevalence_code sensor87_measure_lev_0 sensor87_measure_lev_na sensor20_measure_logit_code sensor20_measure_prevalence_code sensor20_measure_lev_0 sensor26_histogram_bin1_logit_code sensor7_histogram_bin3_logit_code sensor7_histogram_bin3_prevalence_code sensor7_histogram_bin3_lev_0 sensor27_measure_logit_code sensor27_measure_prevalence_code sensor63_measure_logit_code sensor63_measure_prevalence_code sensor63_measure_lev_0 sensor63_measure_lev_na sensor79_measure_logit_code sensor79_measure_prevalence_code sensor79_measure_lev_na sensor103_measure_logit_code sensor103_measure_prevalence_code sensor103_measure_lev_na sensor103_measure_lev_0 sensor25_histogram_bin9_logit_code sensor25_histogram_bin9_prevalence_code sensor25_histogram_bin9_lev_0 sensor33_measure_logit_code sensor56_measure_logit_code sensor56_measure_prevalence_code sensor56_measure_lev_0 sensor56_measure_lev_2 sensor56_measure_lev_na sensor56_measure_lev_4 sensor25_histogram_bin5_logit_code sensor25_histogram_bin5_prevalence_code sensor75_measure_logit_code sensor75_measure_prevalence_code sensor75_measure_lev_na sensor75_measure_lev_0 sensor75_measure_lev_2 sensor102_measure_logit_code sensor102_measure_prevalence_code sensor102_measure_lev_0 sensor102_measure_lev_na sensor25_histogram_bin3_logit_code sensor25_histogram_bin3_prevalence_code sensor43_measure_logit_code sensor43_measure_prevalence_code sensor43_measure_lev_na sensor43_measure_lev_1310700 sensor43_measure_lev_0 sensor9_measure_logit_code sensor9_measure_prevalence_code sensor9_measure_lev_0 sensor84_measure_logit_code sensor84_measure_prevalence_code sensor84_measure_lev_0 sensor84_measure_lev_na sensor64_histogram_bin1_logit_code sensor64_histogram_bin1_prevalence_code sensor64_histogram_bin1_lev_0 sensor64_histogram_bin2_logit_code sensor64_histogram_bin2_prevalence_code sensor64_histogram_bin2_lev_0 sensor81_measure_logit_code sensor81_measure_prevalence_code sensor81_measure_lev_0 sensor81_measure_lev_na sensor64_histogram_bin0_logit_code sensor64_histogram_bin0_prevalence_code sensor64_histogram_bin0_lev_0 sensor31_measure_logit_code sensor31_measure_prevalence_code sensor31_measure_lev_0 sensor31_measure_lev_2 sensor31_measure_lev_4 sensor31_measure_lev_na sensor31_measure_lev_6 sensor31_measure_lev_10 sensor92_measure_logit_code sensor92_measure_prevalence_code sensor92_measure_lev_0 sensor92_measure_lev_na sensor26_histogram_bin4_logit_code sensor26_histogram_bin4_lev_0 sensor69_histogram_bin5_logit_code sensor69_histogram_bin5_prevalence_code sensor7_histogram_bin8_logit_code sensor7_histogram_bin8_prevalence_code sensor7_histogram_bin8_lev_0 sensor107_measure_logit_code sensor107_measure_prevalence_code sensor107_measure_lev_0 sensor107_measure_lev_na sensor26_histogram_bin7_logit_code sensor26_histogram_bin7_prevalence_code sensor26_histogram_bin7_lev_0 sensor8_measure_logit_code sensor8_measure_prevalence_code sensor53_measure_logit_code sensor53_measure_prevalence_code sensor53_measure_lev_na sensor97_measure_logit_code sensor97_measure_prevalence_code sensor97_measure_lev_0 sensor97_measure_lev_na sensor25_histogram_bin2_logit_code sensor25_histogram_bin2_prevalence_code sensor25_histogram_bin2_lev_0 sensor25_histogram_bin4_logit_code sensor14_measure_logit_code sensor14_measure_prevalence_code sensor64_histogram_bin7_logit_code sensor64_histogram_bin7_prevalence_code sensor64_histogram_bin7_lev_0 sensor38_measure_logit_code sensor38_measure_prevalence_code sensor38_measure_lev_na sensor38_measure_lev_1310700 sensor38_measure_lev_0 sensor82_measure_logit_code sensor82_measure_prevalence_code sensor82_measure_lev_0 sensor82_measure_lev_na sensor7_histogram_bin5_logit_code sensor7_histogram_bin0_logit_code sensor7_histogram_bin0_prevalence_code sensor7_histogram_bin0_lev_0 sensor42_measure_logit_code sensor42_measure_prevalence_code sensor42_measure_lev_na sensor42_measure_lev_1310700 sensor42_measure_lev_0 sensor89_measure_logit_code sensor69_histogram_bin8_logit_code sensor69_histogram_bin8_prevalence_code sensor69_histogram_bin8_lev_0 sensor69_histogram_bin8_lev_2 sensor18_measure_logit_code sensor18_measure_prevalence_code sensor18_measure_lev_0 sensor18_measure_lev_na sensor34_measure_logit_code sensor34_measure_prevalence_code sensor49_measure_logit_code sensor49_measure_prevalence_code sensor49_measure_lev_0 sensor26_histogram_bin6_logit_code sensor62_measure_logit_code sensor62_measure_prevalence_code sensor62_measure_lev_0 sensor62_measure_lev_na sensor62_measure_lev_2 sensor62_measure_lev_4 sensor67_measure_logit_code sensor67_measure_prevalence_code sensor24_histogram_bin8_logit_code sensor24_histogram_bin8_prevalence_code sensor24_histogram_bin8_lev_0 sensor47_measure_logit_code sensor47_measure_prevalence_code sensor26_histogram_bin2_logit_code sensor40_measure_logit_code sensor40_measure_prevalence_code sensor40_measure_lev_na sensor40_measure_lev_1310700 sensor40_measure_lev_0 sensor39_measure_logit_code sensor39_measure_prevalence_code sensor39_measure_lev_na sensor39_measure_lev_1310700 sensor39_measure_lev_0 sensor29_measure_logit_code sensor29_measure_lev_0 sensor29_measure_lev_4 sensor29_measure_lev_2 sensor29_measure_lev_na sensor29_measure_lev_6 sensor29_measure_lev_8 sensor29_measure_lev_10 sensor17_measure_logit_code sensor17_measure_prevalence_code sensor105_histogram_bin7_logit_code sensor105_histogram_bin7_prevalence_code sensor105_histogram_bin7_lev_0 sensor11_measure_logit_code sensor11_measure_prevalence_code sensor11_measure_lev_0 sensor11_measure_lev_na sensor7_histogram_bin9_logit_code sensor7_histogram_bin9_prevalence_code sensor7_histogram_bin9_lev_0 sensor19_measure_logit_code sensor19_measure_prevalence_code sensor19_measure_lev_0 sensor90_measure_logit_code sensor90_measure_prevalence_code sensor90_measure_lev_0 sensor90_measure_lev_na sensor69_histogram_bin4_logit_code sensor69_histogram_bin4_lev_0 sensor85_measure_logit_code sensor85_measure_prevalence_code sensor85_measure_lev_0 sensor85_measure_lev_na sensor26_histogram_bin9_logit_code sensor26_histogram_bin9_prevalence_code sensor26_histogram_bin9_lev_0 sensor35_measure_logit_code sensor35_measure_prevalence_code sensor70_measure_logit_code sensor70_measure_prevalence_code sensor70_measure_lev_na sensor69_histogram_bin7_logit_code sensor69_histogram_bin7_prevalence_code sensor69_histogram_bin7_lev_0 sensor80_measure_logit_code sensor80_measure_prevalence_code sensor80_measure_lev_na sensor91_measure_logit_code sensor91_measure_prevalence_code sensor91_measure_lev_0 sensor91_measure_lev_na sensor98_measure_logit_code sensor98_measure_prevalence_code sensor98_measure_lev_0 sensor98_measure_lev_na sensor105_histogram_bin3_logit_code sensor105_histogram_bin3_lev_0 sensor59_measure_logit_code sensor21_measure_logit_code sensor21_measure_prevalence_code sensor21_measure_lev_0 sensor69_histogram_bin2_logit_code sensor69_histogram_bin2_prevalence_code sensor24_histogram_bin3_logit_code sensor24_histogram_bin3_prevalence_code sensor24_histogram_bin3_lev_0 sensor30_measure_logit_code sensor30_measure_lev_0 sensor30_measure_lev_na sensor30_measure_lev_2 sensor30_measure_lev_4 sensor86_measure_logit_code sensor86_measure_prevalence_code sensor86_measure_lev_0 sensor86_measure_lev_na sensor26_histogram_bin5_logit_code sensor26_histogram_bin5_lev_0 sensor100_measure_logit_code sensor100_measure_prevalence_code sensor100_measure_lev_0 sensor100_measure_lev_na sensor46_measure_logit_code sensor46_measure_prevalence_code sensor7_histogram_bin4_logit_code sensor7_histogram_bin4_lev_0 sensor25_histogram_bin7_logit_code sensor25_histogram_bin7_prevalence_code sensor25_histogram_bin7_lev_0 sensor32_measure_logit_code sensor32_measure_prevalence_code sensor95_measure_logit_code sensor95_measure_prevalence_code sensor95_measure_lev_na sensor96_measure_logit_code sensor96_measure_prevalence_code sensor96_measure_lev_0 sensor96_measure_lev_na sensor24_histogram_bin1_logit_code sensor24_histogram_bin1_prevalence_code sensor24_histogram_bin1_lev_0 sensor76_measure_logit_code sensor76_measure_prevalence_code sensor76_measure_lev_0 sensor76_measure_lev_na sensor23_measure_logit_code sensor23_measure_prevalence_code sensor23_measure_lev_na sensor24_histogram_bin4_logit_code sensor24_histogram_bin4_prevalence_code sensor24_histogram_bin4_lev_0 sensor16_measure_logit_code sensor16_measure_prevalence_code sensor28_measure_logit_code sensor28_measure_prevalence_code sensor28_measure_lev_0 sensor28_measure_lev_2 sensor28_measure_lev_na sensor28_measure_lev_4 sensor54_measure_logit_code sensor54_measure_prevalence_code sensor54_measure_lev_1209600 sensor64_histogram_bin8_logit_code sensor64_histogram_bin8_prevalence_code sensor64_histogram_bin8_lev_0 sensor24_histogram_bin9_logit_code sensor24_histogram_bin9_prevalence_code sensor24_histogram_bin9_lev_0 sensor72_measure_logit_code sensor72_measure_prevalence_code sensor72_measure_lev_na sensor78_measure_logit_code sensor78_measure_prevalence_code sensor78_measure_lev_na sensor4_measure_logit_code sensor4_measure_prevalence_code sensor4_measure_lev_na sensor13_measure_logit_code sensor13_measure_prevalence_code sensor13_measure_lev_0 sensor69_histogram_bin1_logit_code sensor69_histogram_bin1_prevalence_code sensor69_histogram_bin1_lev_0 sensor69_histogram_bin1_lev_12 sensor69_histogram_bin1_lev_14 sensor105_histogram_bin2_logit_code sensor105_histogram_bin2_lev_0 sensor64_histogram_bin4_logit_code sensor64_histogram_bin4_prevalence_code sensor105_histogram_bin5_logit_code sensor45_measure_logit_code sensor64_histogram_bin3_logit_code sensor22_measure_logit_code sensor22_measure_prevalence_code sensor22_measure_lev_na sensor52_measure_logit_code sensor52_measure_prevalence_code sensor105_histogram_bin6_logit_code sensor105_histogram_bin6_prevalence_code sensor105_histogram_bin6_lev_0 sensor74_measure_logit_code sensor74_measure_prevalence_code sensor74_measure_lev_0 sensor74_measure_lev_na sensor26_histogram_bin3_logit_code sensor26_histogram_bin3_lev_0 sensor26_histogram_bin8_logit_code sensor26_histogram_bin8_prevalence_code sensor26_histogram_bin8_lev_0 sensor25_histogram_bin8_logit_code sensor25_histogram_bin8_prevalence_code sensor25_histogram_bin8_lev_0 sensor25_histogram_bin8_lev_2 sensor93_measure_logit_code sensor93_measure_prevalence_code sensor93_measure_lev_0 sensor93_measure_lev_na sensor71_measure_logit_code sensor71_measure_prevalence_code sensor71_measure_lev_na sensor99_measure_logit_code sensor99_measure_prevalence_code sensor99_measure_lev_0 sensor99_measure_lev_na sensor104_measure_logit_code sensor104_measure_prevalence_code sensor104_measure_lev_na sensor104_measure_lev_0 sensor88_measure_logit_code sensor88_measure_prevalence_code sensor88_measure_lev_0 sensor88_measure_lev_na sensor24_histogram_bin7_logit_code sensor24_histogram_bin7_prevalence_code sensor24_histogram_bin7_lev_0 sensor94_measure_logit_code sensor94_measure_prevalence_code sensor94_measure_lev_na sensor105_histogram_bin9_logit_code sensor5_measure_logit_code sensor5_measure_prevalence_code sensor5_measure_lev_0 sensor5_measure_lev_na sensor25_histogram_bin6_logit_code sensor61_measure_logit_code sensor106_measure_logit_code sensor106_measure_prevalence_code sensor106_measure_lev_0 sensor106_measure_lev_na sensor73_measure_logit_code sensor73_measure_prevalence_code sensor73_measure_lev_na sensor105_histogram_bin4_logit_code sensor58_measure_logit_code sensor58_measure_prevalence_code sensor58_measure_lev_0 sensor58_measure_lev_na sensor57_measure_logit_code sensor57_measure_prevalence_code sensor57_measure_lev_na sensor57_measure_lev_4 sensor57_measure_lev_2 sensor57_measure_lev_6 sensor57_measure_lev_8 sensor57_measure_lev_10 sensor105_histogram_bin1_logit_code sensor105_histogram_bin1_prevalence_code sensor24_histogram_bin6_logit_code sensor55_measure_logit_code sensor55_measure_prevalence_code sensor55_measure_lev_0 sensor55_measure_lev_na sensor83_measure_logit_code sensor83_measure_prevalence_code sensor83_measure_lev_0 sensor83_measure_lev_na sensor101_measure_logit_code sensor101_measure_prevalence_code sensor101_measure_lev_0 sensor101_measure_lev_na sensor7_histogram_bin1_logit_code sensor7_histogram_bin1_prevalence_code sensor7_histogram_bin1_lev_0 sensor60_measure_logit_code sensor60_measure_prevalence_code sensor60_measure_lev_0 sensor105_histogram_bin0_logit_code sensor105_histogram_bin0_prevalence_code sensor48_measure_logit_code sensor48_measure_prevalence_code sensor48_measure_lev_na sensor65_measure_logit_code sensor65_measure_prevalence_code sensor65_measure_lev_na sensor65_measure_lev_0 sensor65_measure_lev_2 sensor65_measure_lev_4 sensor65_measure_lev_6 sensor65_measure_lev_8 sensor65_measure_lev_10 sensor105_histogram_bin8_logit_code sensor105_histogram_bin8_prevalence_code sensor105_histogram_bin8_lev_0 sensor7_histogram_bin2_logit_code sensor7_histogram_bin2_prevalence_code sensor7_histogram_bin2_lev_0 sensor36_measure_logit_code sensor36_measure_prevalence_code sensor36_measure_lev_na sensor36_measure_lev_1310700 sensor3_measure_logit_code sensor3_measure_prevalence_code sensor3_measure_lev_2130706432 sensor3_measure_lev_na sensor3_measure_lev_2130706434 sensor12_measure_logit_code sensor12_measure_prevalence_code sensor12_measure_lev_0 sensor24_histogram_bin0_logit_code sensor24_histogram_bin0_prevalence_code sensor24_histogram_bin0_lev_0 sensor37_measure_logit_code sensor37_measure_prevalence_code sensor37_measure_lev_na sensor37_measure_lev_1310700 sensor25_histogram_bin1_logit_code sensor25_histogram_bin1_prevalence_code sensor64_histogram_bin9_logit_code sensor64_histogram_bin9_prevalence_code sensor64_histogram_bin9_lev_0
0 1.0 76698.0 0.0 0.000000 0.000017 -1.561384 0.015700 0.0 0.0 -2.819397 0.000000 0.000017 0.0 0.000000 0.000017 0.000000 0.000017 -2.862263 0.000550 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000033 0.0 0.000000 0.000017 0.0 -0.338243 0.781983 1.0 0.000000 0.000017 0.0 0.0 0.000000 0.0 -0.068207 0.969367 1.0 -1.854412 0.795667 1.0 0.0 0.0 0.0 0.000017 0.000000 -2.625343 0.000000 0.000017 0.0 0.000000 0.000017 -0.327989 0.924600 1.0 0.0 -0.601491 0.929483 1.0 0.0 -0.268193 0.889917 1.0 0.000000 -1.091151 0.781567 1.0 0.0 0.000017 2.220446e-16 0.000033 0.0 0.0 -2.634200 0.000050 0.0 0.000000 0.000017 0.0 0.0 -0.154489 0.957417 1.0 0.000000 -2.141312 0.288183 0.0 1.0 0.0 0.0 0.000000 0.000017 0.000000 0.000017 0.0 0.0 0.0 0.000000 0.000017 0.0 0.0 0.000000 0.000033 -1.828714 0.821067 1.0 0.0 0.0 -1.087527 0.893133 1.0 -0.741663 0.760783 1.0 0.0 -0.798207 0.841067 1.0 -1.589039 0.508967 1.0 -0.710238 0.9155 1.0 0.0 -0.541668 0.955 1.0 -0.183142 0.003067 0.0 0.0 0.0 0.0 0.0 0.0 -0.506311 0.740067 1.0 0.0 0.000000 0.0 0.000000 0.000017 0.000000e+00 0.000017 0.0 -0.398315 0.946567 1.0 0.0 0.000000 0.000017 0.0 0.000000 0.000017 0.000000 0.000017 0.0 0.000000 0.000017 0.0 0.0 -2.800993 0.000083 0.0 0.000000 0.000000 0.000017 0.000000 0.000017 0.0 -2.741036 0.000083 0.0 0.0 0.0 -0.709907 0.907067 1.0 0.0 0.000000 -0.086368 0.98555 1.0 -1.839543 0.812033 1.0 0.0 0.0 0.000000 -2.873560 0.003883 0.0 0.0 -0.744542 0.915467 1.0 0.0 0.0000 0.000017 0.000000 0.000017 0.0 0.000000 -1.950494 0.013983 0.0 0.0 0.0 0.0 0.0 0.000017 0.000000 0.000017 0.0 0.0 0.000017 0.000000 -2.720736 0.000050 0.0 0.0 0.0 0.000000 0.000017 0.0 0.0 0.0 -2.266258 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.000000 0.000017 0.000000 0.000033 0.0 -0.647201 0.92045 1.0 0.0 -0.319136 0.6783 1.0 -0.035377 0.989167 1.0 0.000000 0.000017 0.0 0.0 0.000000e+00 0.0 -0.606895 0.929933 1.0 0.0 -0.918806 0.660317 1.0 0.000000 0.000017 -2.861114 0.000733 0.0 0.000000 0.000017 0.0 -2.813201 0.000117 0.0 0.000000 0.000017 0.0 0.0 -0.499184 0.659300 1.0 0.0 0.000000 0.0 0.000000 -0.052120 0.9885 1.0 0.000000 0.000017 -0.078673 0.967967 1.0 -2.874976 0.0 0.0 0.0 0.0 -0.609518 0.927917 1.0 0.0 0.000000 0.0 -0.401513 0.949733 1.0 0.0 0.0 0.000017 0.000000 0.0 0.420716 0.022717 0.0 0.000000 0.000017 0.000000 0.000017 0.0 0.000000 0.000017 0.0 0.0 -0.068752 0.969867 1.0 -0.468310 0.760750 1.0 0.0 -2.872923 0.001350 0.0 -0.161382 0.952633 1.0 0.00000 0.000017 -1.371119 0.321600 1.0 0.0 0.0 0.0 -0.032502 0.988733 1.0 0.000000 0.000033 0.0 -0.203726 0.979817 1.0 0.000000 0.000017 0.0 0.000000 0.000017 0.0 -2.866200 0.001167 0.0 -1.741908 0.621583 1.0 -2.859067 0.000267 0.0 0.0 0.0 0.000000 0.0 0.000000 0.000017 0.000000 0.0 0.000000 1.519030 0.000233 0.0 0.000000 0.000017 0.000000 0.000017 0.0 -0.787462 0.658483 1.0 0.0 0.00000 0.0 -1.096423 0.414233 1.0 -0.300058 0.9037 1.0 0.0 -0.503398 0.737783 1.0 0.0 -2.861985 0.000567 0.0 -0.461462 0.604483 1.0 0.0 -2.784521 0.000067 0.0 0.0 -0.600925 0.9289 1.0 0.0 0.000000 0.000017 0.0 0.0000 0.000017 0.0 -0.082320 -0.329068 0.925717 1.0 0.0 -2.744513 0.0 -0.399973 0.950350 1.0 0.0 0.000000 0.000017 0.0 0.000000 -0.690340 0.752150 1.0 0.0 -0.721552 0.005500 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000017 0.000000 0.000000 0.000017 0.0 0.0 -0.744947 0.8168 1.0 0.0 -0.396140 0.94225 1.0 0.0 -0.428144 0.97645 1.0 -0.198955 0.785367 1.0 0.0000 0.000017 0.0 0.000017 0.0 -2.850681 0.000450 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000033 0.0 -0.656768 0.93635 1.0 0.000000 0.000017 0.0 0.0 -2.876250 0.005450 0.0 0.0 0.0 -1.723730 0.62455 1.0 -0.070374 0.97935 1.0 0.000000 0.000017 0.0 0.0 -2.842959 0.000133 1.911896 0.000083 0.0
1 2.0 33058.0 0.0 -2.822465 0.000350 0.820590 0.230133 1.0 0.0 0.000000 0.000000 0.000017 0.0 0.000000 0.000033 0.000000 0.000017 0.358975 0.158950 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000017 0.0 0.000000 0.000017 0.0 0.276462 0.001950 0.0 0.000000 0.000017 0.0 0.0 0.000000 0.0 -0.071369 0.969367 1.0 -1.836575 0.795667 1.0 0.0 0.0 0.0 0.000017 0.000000 0.000000 0.000000 0.000017 0.0 0.000000 0.000017 -0.355672 0.924600 1.0 0.0 -0.638830 0.929483 1.0 0.0 -0.276507 0.889917 1.0 0.000000 -1.098034 0.781567 1.0 0.0 0.000017 -4.982886e-01 0.403583 1.0 0.0 -2.813159 0.000133 0.0 0.000000 0.000017 0.0 0.0 -0.141623 0.957417 1.0 0.000000 0.931040 0.247683 0.0 0.0 1.0 0.0 0.000000 0.000017 0.820606 0.230133 1.0 0.0 0.0 0.000000 0.000017 0.0 0.0 0.000000 0.000017 -1.802896 0.821067 1.0 0.0 0.0 -1.124500 0.893133 1.0 -0.770429 0.760783 1.0 0.0 -0.852468 0.841067 1.0 -2.843291 0.000300 0.0 -0.738139 0.9155 1.0 0.0 -0.558348 0.955 1.0 1.498436 0.000233 0.0 0.0 0.0 0.0 0.0 0.0 -0.531673 0.740067 1.0 0.0 0.000000 0.0 0.000000 0.000017 0.000000e+00 0.000017 0.0 -0.421995 0.946567 1.0 0.0 0.000000 0.000017 0.0 0.000000 0.000017 0.000000 0.000017 0.0 0.000000 0.000017 0.0 0.0 0.000000 0.000017 0.0 0.000000 0.000000 0.000017 0.000000 0.000017 0.0 0.000000 0.000017 0.0 0.0 0.0 -0.740759 0.907067 1.0 0.0 0.000000 -0.085646 0.98555 1.0 -1.813429 0.812033 1.0 0.0 0.0 0.000000 -0.312286 0.008800 0.0 0.0 -0.799287 0.915467 1.0 0.0 0.0000 0.000017 -2.655193 0.000067 0.0 0.000000 -0.801708 0.613117 1.0 0.0 0.0 0.0 0.0 0.000017 0.000000 0.000017 0.0 0.0 0.000017 0.000000 -1.845462 0.772217 1.0 0.0 0.0 -1.852659 0.733483 1.0 0.0 0.0 -2.860700 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000017 0.000000 0.000017 0.0 -0.720715 0.92045 1.0 0.0 -0.323573 0.6783 1.0 -0.032971 0.989167 1.0 0.000000 0.000017 0.0 0.0 0.000000e+00 0.0 -0.641519 0.929933 1.0 0.0 0.000000 0.000017 0.0 0.000000 0.000017 0.820658 0.230133 1.0 -2.347011 0.000100 0.0 0.514491 0.000700 0.0 -2.688702 0.000083 0.0 0.0 -0.522427 0.659300 1.0 0.0 0.000000 0.0 0.000000 -0.056099 0.9885 1.0 0.000000 0.000017 -0.087192 0.967967 1.0 -2.863663 0.0 0.0 0.0 0.0 -0.644545 0.927917 1.0 0.0 0.000000 0.0 -0.425025 0.949733 1.0 0.0 0.0 0.000017 0.000000 0.0 0.646619 0.012600 0.0 0.000000 0.000017 0.000000 0.000017 0.0 0.000000 0.000017 0.0 0.0 -0.069092 0.969867 1.0 0.820586 0.230133 0.0 1.0 0.043216 0.169850 0.0 -0.171977 0.952633 1.0 0.00000 0.000017 -2.866350 0.002467 0.0 0.0 0.0 0.0 -0.031353 0.988733 1.0 -2.825204 0.000183 0.0 -0.208361 0.979817 1.0 0.820551 0.230133 1.0 0.820551 0.230133 1.0 0.930989 0.247683 1.0 -1.755372 0.621583 1.0 -2.863720 0.000683 0.0 0.0 0.0 0.000000 0.0 0.000000 0.000017 0.000000 0.0 0.000000 0.039698 0.166800 0.0 0.000000 0.000017 0.000000 0.000017 0.0 0.820688 0.230133 0.0 1.0 0.00000 0.0 0.000000 0.000017 0.0 -0.279630 0.9037 1.0 0.0 -0.528848 0.737783 1.0 0.0 0.820671 0.230133 1.0 -0.487344 0.604483 1.0 0.0 -2.835788 0.000150 0.0 0.0 -0.638267 0.9289 1.0 0.0 0.000000 0.000017 0.0 0.0000 0.000017 0.0 -2.664955 -0.356791 0.925717 1.0 0.0 0.000000 0.0 -0.421981 0.950350 1.0 0.0 0.820582 0.230133 1.0 0.000000 0.930585 0.247683 0.0 1.0 0.931036 0.247683 1.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000017 0.000000 0.454811 0.197417 1.0 0.0 -0.780365 0.8168 1.0 0.0 -0.419590 0.94225 1.0 0.0 -0.424087 0.97645 1.0 -0.172750 0.785367 1.0 0.0000 0.000017 0.0 0.000017 0.0 0.930981 0.247683 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000017 0.0 -0.668187 0.93635 1.0 0.000000 0.000033 0.0 0.0 -0.144307 0.145867 0.0 0.0 0.0 -1.736884 0.62455 1.0 -0.073132 0.97935 1.0 0.000000 0.000017 0.0 0.0 0.000000 0.000017 -2.866589 0.002867 0.0
2 3.0 41040.0 0.0 -2.785933 0.000217 0.007536 0.461917 0.0 0.0 -2.852509 0.101975 0.505633 1.0 0.000000 0.000017 0.000000 0.000017 -2.148230 0.078217 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.293348 0.000067 0.0 0.000000 0.000017 0.0 -0.360603 0.781983 1.0 -2.553662 0.000083 0.0 0.0 0.000000 0.0 -0.069856 0.969367 1.0 -1.894195 0.795667 1.0 0.0 0.0 0.0 0.000017 0.000000 -2.836381 0.000000 0.000017 0.0 0.000000 0.000017 -0.321799 0.924600 1.0 0.0 -0.605643 0.929483 1.0 0.0 -0.262589 0.889917 1.0 0.000000 -1.132849 0.781567 1.0 0.0 0.000017 -4.724949e-01 0.403583 1.0 0.0 -2.848704 0.000333 0.0 0.000000 0.000017 0.0 0.0 -0.148487 0.957417 1.0 -2.464713 -0.481867 0.357800 1.0 0.0 0.0 0.0 0.000000 0.000017 -2.294033 0.000067 0.0 0.0 0.0 0.000000 0.000017 0.0 0.0 -2.477691 0.000050 -1.865917 0.821067 1.0 0.0 0.0 -1.155116 0.893133 1.0 -0.734390 0.760783 1.0 0.0 -0.843964 0.841067 1.0 -1.603895 0.508967 1.0 -0.708438 0.9155 1.0 0.0 -0.564335 0.955 1.0 -0.496277 0.049050 0.0 0.0 1.0 0.0 0.0 0.0 -0.514691 0.740067 1.0 0.0 0.000000 0.0 0.000000 0.000017 0.000000e+00 0.000017 0.0 -0.399492 0.946567 1.0 0.0 0.000000 0.000017 0.0 0.000000 0.000017 0.000000 0.000017 0.0 0.000000 0.000017 0.0 0.0 -2.851489 0.000317 0.0 0.000000 0.000000 0.000017 -2.456410 0.000050 0.0 -1.885131 0.659150 1.0 0.0 0.0 -0.707931 0.907067 1.0 0.0 0.000000 -0.093507 0.98555 1.0 -1.877702 0.812033 1.0 0.0 0.0 -2.508498 -2.860732 0.001600 0.0 0.0 -0.753570 0.915467 1.0 0.0 0.0000 0.000033 0.000000 0.000033 0.0 0.000000 -0.783126 0.613117 1.0 0.0 0.0 0.0 0.0 0.000017 0.000000 0.000017 0.0 0.0 0.000017 0.000000 -1.916558 0.772217 1.0 0.0 0.0 -1.928168 0.733483 1.0 0.0 0.0 -2.864117 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000017 0.000000 0.000017 0.0 -0.675040 0.92045 1.0 0.0 -0.301437 0.6783 1.0 -0.035844 0.989167 1.0 0.000000 0.000033 0.0 0.0 -2.220446e-16 0.0 -0.608594 0.929933 1.0 0.0 -0.964834 0.660317 1.0 0.000000 0.000017 -2.861732 0.001317 0.0 -2.345585 0.000067 0.0 -1.150755 0.004500 0.0 -2.621279 0.000083 0.0 0.0 -0.503129 0.659300 1.0 0.0 0.000000 0.0 0.000000 -0.054064 0.9885 1.0 0.000000 0.000017 -0.080361 0.967967 1.0 0.598111 0.0 0.0 0.0 0.0 -0.609002 0.927917 1.0 0.0 0.000000 0.0 -0.402387 0.949733 1.0 0.0 0.0 0.000017 -2.825255 0.0 -0.534436 0.751567 1.0 0.000000 0.000017 -2.688185 0.000117 0.0 0.000000 0.000017 0.0 0.0 -0.067725 0.969867 1.0 -0.445525 0.760750 1.0 0.0 -2.865759 0.002933 0.0 -0.163449 0.952633 1.0 0.00000 0.000017 -2.865587 0.001733 0.0 0.0 0.0 0.0 -0.028011 0.988733 1.0 -2.786592 0.000133 0.0 -0.214212 0.979817 1.0 0.000000 0.000017 0.0 0.000000 0.000017 0.0 -0.632136 0.002400 0.0 -1.775959 0.621583 1.0 -0.496486 0.002283 0.0 0.0 0.0 0.000000 0.0 0.000000 0.000017 -2.342589 0.0 0.000000 -2.861940 0.001000 0.0 0.000000 0.000033 -2.381753 0.000050 0.0 0.869800 0.002833 0.0 0.0 0.00000 0.0 -2.865963 0.002867 0.0 -0.302680 0.9037 1.0 0.0 -0.511960 0.737783 1.0 0.0 0.035314 0.001183 0.0 -0.467667 0.604483 1.0 0.0 -2.863797 0.000950 0.0 0.0 -0.605056 0.9289 1.0 0.0 0.000000 0.000017 0.0 0.0000 0.000017 0.0 -2.777264 -0.322920 0.925717 1.0 0.0 -0.513327 0.0 -0.401349 0.950350 1.0 0.0 0.000000 0.000017 0.0 0.000000 -0.676057 0.752150 1.0 0.0 -2.867184 0.003417 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000017 0.000000 0.000000 0.000017 0.0 0.0 -0.733703 0.8168 1.0 0.0 -0.397116 0.94225 1.0 0.0 -0.434971 0.97645 1.0 -0.185763 0.785367 1.0 0.0000 0.000017 0.0 0.000017 0.0 -1.502287 0.213217 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000017 0.0 -0.689125 0.93635 1.0 -2.500864 0.000067 0.0 0.0 -0.056900 0.001350 0.0 0.0 0.0 -1.767980 0.62455 1.0 -0.073272 0.97935 1.0 -2.505859 0.000050 0.0 0.0 -2.849829 0.000300 0.800449 0.003550 0.0
3 4.0 12.0 0.0 0.700734 0.000600 -0.026117 0.461917 0.0 0.0 -2.875415 0.111504 0.505633 1.0 -2.825761 0.000300 -2.278186 0.000083 -0.250957 0.004650 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.699935 0.000217 0.0 -2.771297 0.000200 0.0 -0.338243 0.781983 1.0 -2.867976 0.001850 0.0 0.0 -2.803874 0.0 -0.068207 0.969367 1.0 -1.854412 0.795667 1.0 0.0 0.0 0.0 0.000050 -2.853966 -2.869488 -1.279691 0.264583 1.0 -2.632452 0.000100 -2.857206 0.000767 0.0 0.0 -0.601491 0.929483 1.0 0.0 -0.268193 0.889917 1.0 -2.847009 -2.839403 0.000150 0.0 0.0 0.000067 -2.877305e+00 0.013650 0.0 0.0 -2.876116 0.004267 0.0 -2.842778 0.000617 0.0 0.0 -0.154489 0.957417 1.0 -2.868453 -2.141312 0.288183 0.0 1.0 0.0 0.0 -2.723246 0.000150 -2.875334 0.008400 0.0 0.0 0.0 -1.558104 0.357483 1.0 0.0 -2.853622 0.000467 -1.828714 0.821067 1.0 0.0 0.0 -1.087527 0.893133 1.0 -0.741663 0.760783 1.0 0.0 -2.849138 0.000167 0.0 -2.764853 0.000100 0.0 -0.710238 0.9155 1.0 0.0 -0.541668 0.955 1.0 -0.886850 0.421567 1.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000067 0.0 0.0 -2.871452 0.0 -2.303345 0.000083 -1.128711e+00 0.418183 1.0 -2.794820 0.000100 0.0 0.0 -2.874127 0.003367 0.0 -2.537328 0.000083 -2.633633 0.000100 0.0 -2.585025 0.000133 0.0 0.0 -2.876900 0.007550 0.0 -2.856515 -2.514356 0.000100 -1.114778 0.154817 1.0 -1.802752 0.659150 1.0 0.0 0.0 -0.709907 0.907067 1.0 0.0 -2.698127 -0.086368 0.98555 1.0 -1.839543 0.812033 1.0 0.0 0.0 -2.869100 0.645589 0.234050 1.0 0.0 -0.744542 0.915467 1.0 0.0 -2.6441 0.000100 -2.874957 0.004050 0.0 0.523896 -1.982863 0.043083 0.0 0.0 1.0 0.0 0.0 0.000067 -2.561316 0.000083 0.0 0.0 0.000067 -2.836314 -1.873882 0.772217 1.0 0.0 0.0 -1.851447 0.733483 1.0 0.0 0.0 -2.877296 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.824843 0.000317 -0.798235 0.131633 1.0 -0.647201 0.92045 1.0 0.0 -0.319136 0.6783 1.0 -0.035377 0.989167 1.0 -1.612023 0.221450 1.0 0.0 -2.830337e+00 0.0 -0.606895 0.929933 1.0 0.0 -0.918806 0.660317 1.0 -2.334259 0.000067 -2.871433 0.001617 0.0 1.026772 0.085117 1.0 -2.877104 0.008950 0.0 -1.581022 0.212500 1.0 0.0 0.000000 0.000017 0.0 0.0 -2.872561 0.0 -2.526104 -0.052120 0.9885 1.0 -2.875979 0.007867 -0.078673 0.967967 1.0 -1.166183 1.0 0.0 0.0 0.0 -0.609518 0.927917 1.0 0.0 -2.859378 0.0 -0.401513 0.949733 1.0 0.0 0.0 0.000067 -2.784844 0.0 -2.578310 0.000067 0.0 -2.536629 0.000083 -2.875857 0.007683 0.0 -2.650352 0.000150 0.0 0.0 -0.068752 0.969867 1.0 -0.468310 0.760750 1.0 0.0 -1.253793 0.015233 0.0 -0.161382 0.952633 1.0 -2.28849 0.000067 -2.294629 0.052617 0.0 1.0 0.0 0.0 -0.032502 0.988733 1.0 -1.391730 0.256450 1.0 -0.203726 0.979817 1.0 -2.638788 0.000117 0.0 -2.587239 0.000117 0.0 -0.920675 0.003317 0.0 -2.623905 0.000083 0.0 -1.977257 0.014533 0.0 0.0 0.0 -2.850194 0.0 -2.305781 0.000067 -2.871985 0.0 -2.672472 -2.875975 0.003783 0.0 -2.707615 0.000150 -1.083462 0.004033 0.0 -0.787462 0.658483 1.0 0.0 -2.86508 0.0 -1.096423 0.414233 1.0 -0.300058 0.9037 1.0 0.0 -2.822202 0.000250 0.0 0.0 -2.876314 0.006733 0.0 -2.872774 0.002350 0.0 0.0 -2.876964 0.006950 0.0 0.0 -0.600925 0.9289 1.0 0.0 -2.560329 0.000100 0.0 -2.8686 0.002017 0.0 -0.082320 -0.329068 0.925717 1.0 0.0 -2.744513 0.0 0.330543 0.000867 0.0 0.0 -2.867926 0.002383 0.0 -2.871193 -0.690340 0.752150 1.0 0.0 -1.945301 0.041033 0.0 0.0 0.0 1.0 0.0 0.0 -2.655725 0.000117 -0.047674 -2.843902 0.000833 0.0 0.0 -0.744947 0.8168 1.0 0.0 -0.396140 0.94225 1.0 0.0 -0.428144 0.97645 1.0 -0.198955 0.785367 1.0 -2.5472 0.000100 0.0 0.000033 0.0 -1.741227 0.069483 0.0 0.0 1.0 0.0 0.0 0.0 0.0 -1.328659 0.288000 1.0 -0.656768 0.93635 1.0 -1.491924 0.383900 1.0 0.0 -2.875065 0.002867 0.0 0.0 0.0 -2.857465 0.00030 0.0 -0.070374 0.97935 1.0 -1.577790 0.454617 1.0 0.0 -1.900561 0.012850 -0.914452 0.432967 1.0
4 5.0 60874.0 0.0 0.000000 0.000033 -0.020337 0.461917 0.0 0.0 -2.869590 0.000000 0.000017 0.0 0.000000 0.000017 0.000000 0.000017 0.498202 0.000683 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000017 0.0 0.000000 0.000017 0.0 -2.873611 0.000600 0.0 0.000000 0.000017 0.0 0.0 0.000000 0.0 -0.072328 0.969367 1.0 -1.866152 0.795667 1.0 0.0 0.0 0.0 0.000017 0.000000 -2.853608 0.000000 0.000017 0.0 0.000000 0.000017 -0.338480 0.924600 1.0 0.0 -0.635567 0.929483 1.0 0.0 -0.273899 0.889917 1.0 0.000000 -1.083780 0.781567 1.0 0.0 0.000017 -2.867790e+00 0.000267 0.0 0.0 -2.860188 0.000350 0.0 0.000000 0.000017 0.0 0.0 -0.148499 0.957417 1.0 0.000000 -0.568682 0.357800 1.0 0.0 0.0 0.0 0.000000 0.000017 -2.293180 0.000050 0.0 0.0 0.0 0.000000 0.000017 0.0 0.0 -2.857528 0.000517 -1.840274 0.821067 1.0 0.0 0.0 -1.124954 0.893133 1.0 -0.756862 0.760783 1.0 0.0 -0.812694 0.841067 1.0 -2.823600 0.000167 0.0 -0.745941 0.9155 1.0 0.0 -0.529522 0.955 1.0 -0.875673 0.081133 0.0 1.0 0.0 0.0 0.0 0.0 -0.504559 0.740067 1.0 0.0 0.000000 0.0 0.000000 0.000017 -2.220446e-16 0.000033 0.0 -0.409232 0.946567 1.0 0.0 0.000000 0.000017 0.0 0.000000 0.000017 0.000000 0.000017 0.0 0.000000 0.000017 0.0 0.0 -2.867478 0.000483 0.0 0.000000 0.000000 0.000017 -2.795793 0.000133 0.0 -2.633699 0.000050 0.0 0.0 0.0 -0.746217 0.907067 1.0 0.0 0.000000 -0.082427 0.98555 1.0 -1.860442 0.812033 1.0 0.0 0.0 0.000000 -2.873241 0.002117 0.0 0.0 -0.779569 0.915467 1.0 0.0 0.0000 0.000017 0.000000 0.000033 0.0 0.000000 -0.792576 0.613117 1.0 0.0 0.0 0.0 0.0 0.000017 0.000000 0.000017 0.0 0.0 0.000017 0.000000 -1.886461 0.772217 1.0 0.0 0.0 -1.894844 0.733483 1.0 0.0 0.0 -2.868188 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000017 0.000000 0.000017 0.0 -0.685939 0.92045 1.0 0.0 -0.326245 0.6783 1.0 -0.035504 0.989167 1.0 0.000000 0.000017 0.0 0.0 0.000000e+00 0.0 -0.638421 0.929933 1.0 0.0 -0.938922 0.660317 1.0 0.000000 0.000017 -2.821705 0.000183 0.0 -2.575079 0.000067 0.0 -2.872550 0.000517 0.0 0.000000 0.000017 0.0 0.0 0.000000 0.000017 0.0 0.0 0.000000 0.0 0.000000 -0.054601 0.9885 1.0 0.000000 0.000017 -0.084042 0.967967 1.0 -2.864471 0.0 0.0 0.0 0.0 -0.641213 0.927917 1.0 0.0 0.000000 0.0 -0.412210 0.949733 1.0 0.0 0.0 0.000017 0.000000 0.0 -0.533904 0.751567 1.0 0.000000 0.000017 0.000000 0.000017 0.0 0.000000 0.000017 0.0 0.0 -0.070284 0.969867 1.0 -0.469255 0.760750 1.0 0.0 -2.854847 0.000300 0.0 0.000000 0.000017 0.0 0.00000 0.000017 -2.859523 0.000267 0.0 0.0 0.0 0.0 -0.026486 0.988733 1.0 -2.823705 0.000183 0.0 -0.201797 0.979817 1.0 0.000000 0.000017 0.0 0.000000 0.000017 0.0 -2.863684 0.000617 0.0 -1.801485 0.621583 1.0 -2.878364 0.002100 0.0 0.0 0.0 0.000000 0.0 0.000000 0.000017 0.000000 0.0 0.000000 -2.848694 0.000200 0.0 0.000000 0.000017 0.000000 0.000017 0.0 -0.844140 0.658483 1.0 0.0 0.00000 0.0 0.000000 0.000017 0.0 -0.300306 0.9037 1.0 0.0 -0.501776 0.737783 1.0 0.0 -2.841146 0.000250 0.0 0.000000 0.000017 0.0 0.0 -2.865662 0.000283 0.0 0.0 -0.634981 0.9289 1.0 0.0 0.000000 0.000017 0.0 0.0000 0.000017 0.0 -2.634755 -0.339579 0.925717 1.0 0.0 -2.770591 0.0 -0.408978 0.950350 1.0 0.0 0.000000 0.000017 0.0 0.000000 -0.680524 0.752150 1.0 0.0 -2.878701 0.002933 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000017 0.000000 0.000000 0.000033 0.0 0.0 -0.760703 0.8168 1.0 0.0 -0.404887 0.94225 1.0 0.0 -0.407271 0.97645 1.0 -0.191253 0.785367 1.0 0.0000 0.000017 0.0 0.000017 0.0 -2.847315 0.000400 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000017 0.0 -0.638395 0.93635 1.0 0.000000 0.000033 0.0 0.0 -2.604010 0.000067 0.0 0.0 0.0 -1.782191 0.62455 1.0 -0.072536 0.97935 1.0 0.000000 0.000017 0.0 0.0 -2.873135 0.000600 -0.395110 0.001900 0.0

1 b) Base line

In [15]:
#Prepare 
X=d_prepared.drop(['id','target'],axis=1)
y=d_prepared.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
In [16]:
start=time.time()
rfc=RandomForestClassifier()
rfc.fit(X_train,y_train)

print(time.time()-start)

xgb=XGBClassifier()
xgb.fit(X_train.values,y_train)

print(time.time()-start)

lr=LogisticRegression()
lr.fit(X_train,y_train)

print(time.time()-start)
13.442559242248535
[15:32:45] WARNING: ../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
76.86526250839233
79.33955693244934
In [17]:
rfc_score=f1_score(rfc.predict(X_test),y_test)
xgb_score=f1_score(xgb.predict(X_test.values),y_test)
lr_score=f1_score(lr.predict(X_test),y_test)
In [18]:
print(rfc_score,xgb_score,lr_score)
0.6971153846153847 0.7767857142857143 0.25266362252663627

2) a) Treating 'na' with MANUALLY and use xgboost, random forest, and logistic to create a baseline

In [19]:
df.head()
Out[19]:
id target sensor1_measure sensor2_measure sensor3_measure sensor4_measure sensor5_measure sensor6_measure sensor7_histogram_bin0 sensor7_histogram_bin1 sensor7_histogram_bin2 sensor7_histogram_bin3 sensor7_histogram_bin4 sensor7_histogram_bin5 sensor7_histogram_bin6 sensor7_histogram_bin7 sensor7_histogram_bin8 sensor7_histogram_bin9 sensor8_measure sensor9_measure sensor10_measure sensor11_measure sensor12_measure sensor13_measure sensor14_measure sensor15_measure sensor16_measure sensor17_measure sensor18_measure sensor19_measure sensor20_measure sensor21_measure sensor22_measure sensor23_measure sensor24_histogram_bin0 sensor24_histogram_bin1 sensor24_histogram_bin2 sensor24_histogram_bin3 sensor24_histogram_bin4 sensor24_histogram_bin5 sensor24_histogram_bin6 sensor24_histogram_bin7 sensor24_histogram_bin8 sensor24_histogram_bin9 sensor25_histogram_bin0 sensor25_histogram_bin1 sensor25_histogram_bin2 sensor25_histogram_bin3 sensor25_histogram_bin4 sensor25_histogram_bin5 sensor25_histogram_bin6 sensor25_histogram_bin7 sensor25_histogram_bin8 sensor25_histogram_bin9 sensor26_histogram_bin0 sensor26_histogram_bin1 sensor26_histogram_bin2 sensor26_histogram_bin3 sensor26_histogram_bin4 sensor26_histogram_bin5 sensor26_histogram_bin6 sensor26_histogram_bin7 sensor26_histogram_bin8 sensor26_histogram_bin9 sensor27_measure sensor28_measure sensor29_measure sensor30_measure sensor31_measure sensor32_measure sensor33_measure sensor34_measure sensor35_measure sensor36_measure sensor37_measure sensor38_measure sensor39_measure sensor40_measure sensor41_measure sensor42_measure sensor43_measure sensor44_measure sensor45_measure sensor46_measure sensor47_measure sensor48_measure sensor49_measure sensor50_measure sensor51_measure sensor52_measure sensor53_measure sensor54_measure sensor55_measure sensor56_measure sensor57_measure sensor58_measure sensor59_measure sensor60_measure sensor61_measure sensor62_measure sensor63_measure sensor64_histogram_bin0 sensor64_histogram_bin1 sensor64_histogram_bin2 sensor64_histogram_bin3 sensor64_histogram_bin4 sensor64_histogram_bin5 sensor64_histogram_bin6 sensor64_histogram_bin7 sensor64_histogram_bin8 sensor64_histogram_bin9 sensor65_measure sensor66_measure sensor67_measure sensor68_measure sensor69_histogram_bin0 sensor69_histogram_bin1 sensor69_histogram_bin2 sensor69_histogram_bin3 sensor69_histogram_bin4 sensor69_histogram_bin5 sensor69_histogram_bin6 sensor69_histogram_bin7 sensor69_histogram_bin8 sensor69_histogram_bin9 sensor70_measure sensor71_measure sensor72_measure sensor73_measure sensor74_measure sensor75_measure sensor76_measure sensor77_measure sensor78_measure sensor79_measure sensor80_measure sensor81_measure sensor82_measure sensor83_measure sensor84_measure sensor85_measure sensor86_measure sensor87_measure sensor88_measure sensor89_measure sensor90_measure sensor91_measure sensor92_measure sensor93_measure sensor94_measure sensor95_measure sensor96_measure sensor97_measure sensor98_measure sensor99_measure sensor100_measure sensor101_measure sensor102_measure sensor103_measure sensor104_measure sensor105_histogram_bin0 sensor105_histogram_bin1 sensor105_histogram_bin2 sensor105_histogram_bin3 sensor105_histogram_bin4 sensor105_histogram_bin5 sensor105_histogram_bin6 sensor105_histogram_bin7 sensor105_histogram_bin8 sensor105_histogram_bin9 sensor106_measure sensor107_measure
0 1 0 76698 na 2130706438 280 0 0 0 0 0 0 37250 1432864 3664156 1007684 25896 0 2551696 0 0 0 0 0 4933296 3655166 1766008 1132040 0 0 0 0 1012 268 0 0 0 0 0 469014 4239660 703300 755876 0 5374 2108 4114 12348 615248 5526276 2378 4 0 0 2328746 1022304 415432 287230 310246 681504 1118814 3574 0 0 6700214 0 10 108 50 2551696 97518 947550 799478 330760 353400 299160 305200 283680 na na na 178540 76698.08 6700214 6700214 6599892 43566 68656 54064 638360 6167850 1209600 246244 2 96 0 5245752 0 916567.68 6 1924 0 0 0 118196 1309472 3247182 1381362 98822 11208 1608 220 240 6700214 na 10476 1226 267998 521832 428776 4015854 895240 26330 118 0 532 734 4122704 51288 0 532572 0 18 5330690 4732 1126 0 0 0 0 0 0 0 0 62282 85908 32790 0 0 202710 37928 14745580 1876644 0 0 0 0 2801180 2445.8 2712 965866 1706908 1240520 493384 721044 469792 339156 157956 73224 0 0 0
1 2 0 33058 na 0 na 0 0 0 0 0 0 18254 653294 1720800 516724 31642 0 1393352 0 68 0 0 0 2560898 2127150 1084598 338544 0 0 0 0 0 0 0 0 0 0 0 71510 772720 1996924 99560 0 7336 7808 13776 13086 1010074 1873902 14726 6 0 0 1378576 447166 199512 154298 137280 138668 165908 229652 87082 4708 3646660 86 454 364 350 1393352 49028 688314 392208 341420 359780 366560 na na na na na 6700 33057.51 3646660 3646660 3582034 17733 260120 115626 6900 2942850 1209600 0 na na na 2291079.36 0 643536.96 0 0 0 0 38 98644 1179502 1286736 336388 36294 5192 56 na 0 3646660 na 6160 796 164860 350066 272956 1837600 301242 9148 22 0 na na na na na na na na na 3312 522 0 0 0 0 0 0 0 0 33736 36946 5936 0 0 103330 16254 4510080 868538 0 0 0 0 3477820 2211.76 2334 664504 824154 421400 178064 293306 245416 133654 81140 97576 1500 0 0
2 3 0 41040 na 228 100 0 0 0 0 0 0 1648 370592 1883374 292936 12016 0 1234132 0 0 0 0 0 2371990 2173634 300796 153698 0 0 0 0 358 110 0 0 0 0 0 0 870456 239798 1450312 0 1620 1156 1228 34250 1811606 710672 34 0 0 0 790690 672026 332340 254892 189596 135758 103552 81666 46 0 2673338 128 202 576 4 1234132 28804 160176 139730 137160 130640 na na na na na na 28000 41040.08 2673338 2673338 2678534 15439 7466 22436 248240 2560566 1209600 63328 0 124 0 2322692.16 0 236099.52 0 0 0 0 0 33276 1215280 1102798 196502 10260 2422 28 0 6 2673338 na 3584 500 56362 149726 100326 1744838 488302 16682 246 0 230 292 2180528 29188 22 20346 0 0 2341048 1494 152 0 0 0 0 0 0 0 0 13876 38182 8138 0 0 65772 10534 300240 48028 0 0 0 0 1040120 1018.64 1020 262032 453378 277378 159812 423992 409564 320746 158022 95128 514 0 0
3 4 0 12 0 70 66 0 10 0 0 0 318 2212 3232 1872 0 0 0 2668 0 0 0 642 3894 10184 7554 10764 1014 0 0 0 0 60 6 0 0 0 0 0 0 0 2038 5596 0 64 6 6 914 76 2478 2398 1692 0 0 6176 340 304 102 74 406 216 16 0 0 21614 2 12 0 0 2668 184 7632 3090 na na na na na na na na 10580 12.69 21614 21614 21772 32 50 1994 21400 7710 1209600 302 2 6 0 2135.04 0 4525.44 2 16 0 52 2544 1894 2170 822 152 0 0 0 2 2 21614 0 1032 6 24 656 692 4836 388 0 0 0 138 8 1666 72 0 12 0 0 2578 76 62 0 0 0 0 0 0 0 0 232 0 0 2014 370 48 18 15740 1822 20174 44 0 0 0 1.08 54 5670 1566 240 46 58 44 10 0 0 0 4 32
4 5 0 60874 na 1368 458 0 0 0 0 0 0 43752 1966618 1800340 131646 4588 0 1974038 0 226 0 0 0 3230626 2618878 1058136 551022 0 0 0 0 1788 642 0 0 0 0 42124 372236 2128914 819596 584074 0 1644 362 562 842 30194 3911734 1606 0 0 0 1348578 1035668 338762 236540 182278 151778 163248 470800 19292 0 4289260 448 556 642 2 1974038 86454 653692 399410 306780 282560 274180 na na na na na 189000 60874.03 4289260 4289260 4283332 24793 17052 61844 654700 3946944 1209600 135720 0 152 0 3565684.8 0 379111.68 0 746 0 0 356 378910 2497104 993000 64230 10482 2776 86 202 212 4289260 na 3942 520 80950 227322 186242 2288268 1137268 22228 204 0 1716 1664 3440288 215826 0 4262 0 0 3590004 2026 444 0 0 0 0 0 0 0 0 44946 62648 11506 0 0 149474 35154 457040 80482 98334 27588 0 0 21173050 1116.06 1176 404740 904230 622012 229790 405298 347188 286954 311560 433954 1218 0 0
In [20]:
#4 ways to replace nan
#First is drop na when na_ratio is > than the first threshold
#Second is replaced na by -1 if na_ratio between the 2 threshold
#Third is replaced na by mode if na_ratio < the lower threshold and the mode has to be greater than mode_threshold
#Lastly, replaced na by median

def na(df, thresholds=[0.7,0.4],mode_threshold=20000):
    for i in df.columns:
        na_ratio=round((df[i]=='na').sum()/df.shape[0],2)
        value_counts=df[i].value_counts()
        if na_ratio>thresholds[0]:
            df=df.drop(i,axis=1)  
        elif (na_ratio>=thresholds[1]) & (na_ratio<=thresholds[0]):
            df[i]=df[i].replace('na', -1)
        elif (na_ratio<thresholds[1]) & (value_counts.values[0]>mode_threshold) & (value_counts.index[0]!='na') :
            df[i]=df[i].replace('na',value_counts.index[0])
        else:
            median=np.median([float(j) for j in df[i] if j!='na'])
            df[i]=df[i].replace('na',median)
            
    return df
In [21]:
#Check if there is any 'na' left
df_new=na(df)
for i in df_new.columns:
    if (df_new[i]=='na').sum()>0:
        print(i)
In [22]:
df_new.head()
Out[22]:
id target sensor1_measure sensor3_measure sensor4_measure sensor5_measure sensor6_measure sensor7_histogram_bin0 sensor7_histogram_bin1 sensor7_histogram_bin2 sensor7_histogram_bin3 sensor7_histogram_bin4 sensor7_histogram_bin5 sensor7_histogram_bin6 sensor7_histogram_bin7 sensor7_histogram_bin8 sensor7_histogram_bin9 sensor8_measure sensor9_measure sensor10_measure sensor11_measure sensor12_measure sensor13_measure sensor14_measure sensor15_measure sensor16_measure sensor17_measure sensor18_measure sensor19_measure sensor20_measure sensor21_measure sensor22_measure sensor23_measure sensor24_histogram_bin0 sensor24_histogram_bin1 sensor24_histogram_bin2 sensor24_histogram_bin3 sensor24_histogram_bin4 sensor24_histogram_bin5 sensor24_histogram_bin6 sensor24_histogram_bin7 sensor24_histogram_bin8 sensor24_histogram_bin9 sensor25_histogram_bin0 sensor25_histogram_bin1 sensor25_histogram_bin2 sensor25_histogram_bin3 sensor25_histogram_bin4 sensor25_histogram_bin5 sensor25_histogram_bin6 sensor25_histogram_bin7 sensor25_histogram_bin8 sensor25_histogram_bin9 sensor26_histogram_bin0 sensor26_histogram_bin1 sensor26_histogram_bin2 sensor26_histogram_bin3 sensor26_histogram_bin4 sensor26_histogram_bin5 sensor26_histogram_bin6 sensor26_histogram_bin7 sensor26_histogram_bin8 sensor26_histogram_bin9 sensor27_measure sensor28_measure sensor29_measure sensor30_measure sensor31_measure sensor32_measure sensor33_measure sensor34_measure sensor35_measure sensor36_measure sensor37_measure sensor38_measure sensor44_measure sensor45_measure sensor46_measure sensor47_measure sensor48_measure sensor49_measure sensor50_measure sensor51_measure sensor52_measure sensor53_measure sensor54_measure sensor55_measure sensor56_measure sensor57_measure sensor58_measure sensor59_measure sensor60_measure sensor61_measure sensor62_measure sensor63_measure sensor64_histogram_bin0 sensor64_histogram_bin1 sensor64_histogram_bin2 sensor64_histogram_bin3 sensor64_histogram_bin4 sensor64_histogram_bin5 sensor64_histogram_bin6 sensor64_histogram_bin7 sensor64_histogram_bin8 sensor64_histogram_bin9 sensor65_measure sensor66_measure sensor67_measure sensor69_histogram_bin0 sensor69_histogram_bin1 sensor69_histogram_bin2 sensor69_histogram_bin3 sensor69_histogram_bin4 sensor69_histogram_bin5 sensor69_histogram_bin6 sensor69_histogram_bin7 sensor69_histogram_bin8 sensor69_histogram_bin9 sensor70_measure sensor71_measure sensor72_measure sensor73_measure sensor74_measure sensor75_measure sensor76_measure sensor77_measure sensor78_measure sensor79_measure sensor80_measure sensor81_measure sensor82_measure sensor83_measure sensor84_measure sensor85_measure sensor86_measure sensor87_measure sensor88_measure sensor89_measure sensor90_measure sensor91_measure sensor92_measure sensor93_measure sensor94_measure sensor95_measure sensor96_measure sensor97_measure sensor98_measure sensor99_measure sensor100_measure sensor101_measure sensor102_measure sensor103_measure sensor104_measure sensor105_histogram_bin0 sensor105_histogram_bin1 sensor105_histogram_bin2 sensor105_histogram_bin3 sensor105_histogram_bin4 sensor105_histogram_bin5 sensor105_histogram_bin6 sensor105_histogram_bin7 sensor105_histogram_bin8 sensor105_histogram_bin9 sensor106_measure sensor107_measure
0 1 0 76698 2130706438 280 0 0 0 0 0 0 37250 1432864 3664156 1007684 25896 0 2551696 0 0 0 0 0 4933296 3655166 1766008 1132040 0 0 0 0 1012 268 0 0 0 0 0 469014 4239660 703300 755876 0 5374 2108 4114 12348 615248 5526276 2378 4 0 0 2328746 1022304 415432 287230 310246 681504 1118814 3574 0 0 6700214 0 10 108 50 2551696 97518 947550 799478 330760 353400 299160 178540 76698.08 6700214 6700214 6599892 43566 68656 54064 638360 6167850 1209600 246244 2 96 0 5245752 0 916567.68 6 1924 0 0 0 118196 1309472 3247182 1381362 98822 11208 1608 220 240 6700214 10476 1226 267998 521832 428776 4015854 895240 26330 118 0 532 734 4122704 51288 0 532572 0 18 5330690 4732 1126 0 0 0 0 0 0 0 0 62282 85908 32790 0 0 202710 37928 14745580 1876644 0 0 0 0 2801180 2445.8 2712 965866 1706908 1240520 493384 721044 469792 339156 157956 73224 0 0 0
1 2 0 33058 0 126 0 0 0 0 0 0 18254 653294 1720800 516724 31642 0 1393352 0 68 0 0 0 2560898 2127150 1084598 338544 0 0 0 0 0 0 0 0 0 0 0 71510 772720 1996924 99560 0 7336 7808 13776 13086 1010074 1873902 14726 6 0 0 1378576 447166 199512 154298 137280 138668 165908 229652 87082 4708 3646660 86 454 364 350 1393352 49028 688314 392208 341420 359780 366560 6700 33057.51 3646660 3646660 3582034 17733 260120 115626 6900 2942850 1209600 0 0 46 0 2291079.36 0 643536.96 0 0 0 0 38 98644 1179502 1286736 336388 36294 5192 56 8 0 3646660 6160 796 164860 350066 272956 1837600 301242 9148 22 0 210 278 1.18112e+06 44465 0 202 0 0 1.73447e+06 3312 522 0 0 0 0 0 0 0 0 33736 36946 5936 0 0 103330 16254 4510080 868538 0 0 0 0 3477820 2211.76 2334 664504 824154 421400 178064 293306 245416 133654 81140 97576 1500 0 0
2 3 0 41040 228 100 0 0 0 0 0 0 1648 370592 1883374 292936 12016 0 1234132 0 0 0 0 0 2371990 2173634 300796 153698 0 0 0 0 358 110 0 0 0 0 0 0 870456 239798 1450312 0 1620 1156 1228 34250 1811606 710672 34 0 0 0 790690 672026 332340 254892 189596 135758 103552 81666 46 0 2673338 128 202 576 4 1234132 28804 160176 139730 137160 130640 -1 28000 41040.08 2673338 2673338 2678534 15439 7466 22436 248240 2560566 1209600 63328 0 124 0 2322692.16 0 236099.52 0 0 0 0 0 33276 1215280 1102798 196502 10260 2422 28 0 6 2673338 3584 500 56362 149726 100326 1744838 488302 16682 246 0 230 292 2180528 29188 22 20346 0 0 2341048 1494 152 0 0 0 0 0 0 0 0 13876 38182 8138 0 0 65772 10534 300240 48028 0 0 0 0 1040120 1018.64 1020 262032 453378 277378 159812 423992 409564 320746 158022 95128 514 0 0
3 4 0 12 70 66 0 10 0 0 0 318 2212 3232 1872 0 0 0 2668 0 0 0 642 3894 10184 7554 10764 1014 0 0 0 0 60 6 0 0 0 0 0 0 0 2038 5596 0 64 6 6 914 76 2478 2398 1692 0 0 6176 340 304 102 74 406 216 16 0 0 21614 2 12 0 0 2668 184 7632 3090 210660 -1 -1 10580 12.69 21614 21614 21772 32 50 1994 21400 7710 1209600 302 2 6 0 2135.04 0 4525.44 2 16 0 52 2544 1894 2170 822 152 0 0 0 2 2 21614 1032 6 24 656 692 4836 388 0 0 0 138 8 1666 72 0 12 0 0 2578 76 62 0 0 0 0 0 0 0 0 232 0 0 2014 370 48 18 15740 1822 20174 44 0 0 0 1.08 54 5670 1566 240 46 58 44 10 0 0 0 4 32
4 5 0 60874 1368 458 0 0 0 0 0 0 43752 1966618 1800340 131646 4588 0 1974038 0 226 0 0 0 3230626 2618878 1058136 551022 0 0 0 0 1788 642 0 0 0 0 42124 372236 2128914 819596 584074 0 1644 362 562 842 30194 3911734 1606 0 0 0 1348578 1035668 338762 236540 182278 151778 163248 470800 19292 0 4289260 448 556 642 2 1974038 86454 653692 399410 306780 282560 274180 189000 60874.03 4289260 4289260 4283332 24793 17052 61844 654700 3946944 1209600 135720 0 152 0 3565684.8 0 379111.68 0 746 0 0 356 378910 2497104 993000 64230 10482 2776 86 202 212 4289260 3942 520 80950 227322 186242 2288268 1137268 22228 204 0 1716 1664 3440288 215826 0 4262 0 0 3590004 2026 444 0 0 0 0 0 0 0 0 44946 62648 11506 0 0 149474 35154 457040 80482 98334 27588 0 0 21173050 1116.06 1176 404740 904230 622012 229790 405298 347188 286954 311560 433954 1218 0 0
In [23]:
#Baseline manually
X=df_new.drop(['id','target'],axis=1)
y=df_new.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

start=time.time()
rfc=RandomForestClassifier()
rfc.fit(X_train,y_train)

print(time.time()-start)

xgb=XGBClassifier()
xgb.fit(X_train.values,y_train)

print(time.time()-start)

lr=LogisticRegression()
lr.fit(X_train,y_train)

print(time.time()-start)

rfc_score=f1_score(rfc.predict(X_test),y_test)
xgb_score=f1_score(xgb.predict(X_test.values),y_test)
lr_score=f1_score(lr.predict(X_test),y_test)

print(rfc_score,xgb_score,lr_score)
41.07751226425171
[15:34:39] WARNING: ../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
98.26411032676697
104.20749163627625
0.779510022271715 0.7896995708154507 0.6013363028953229

Conclusion:: manually treating na is better

Model

1) Use cross validation to choose the best model

In [24]:
#Evaluating with k-cross validation to see which model is the best 

rfc=RandomForestClassifier()
xgb=XGBClassifier()
lr=LogisticRegression()
rfc_score = cross_val_score(rfc, X, y, cv=3, scoring='f1')
xgb_score = cross_val_score(xgb, X.values, y, cv=3, scoring='f1')
lr_score = cross_val_score(lr, X, y, cv=3, scoring='f1')
[15:37:33] WARNING: ../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[15:38:23] WARNING: ../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[15:39:13] WARNING: ../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
In [25]:
print(rfc_score.mean(),rfc_score.std())
print(xgb_score.mean(),xgb_score.std())
print(lr_score.mean(),lr_score.std())
0.7733890670331349 0.003081826629848163
0.8039073132529581 0.021048530424348653
0.6220600948899493 0.03130437440139462

Conclusion: Hence, xgboost is better than the other models.

2) Hypertuning the na function

In [26]:
#Tuning treating na
df_new=na(df,[0.5,0.4], 10000)
X=df_new.drop(['id','target'],axis=1)
y=df_new.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
xgb=XGBClassifier()
xgb.fit(X_train.values,y_train)
xgb_score=f1_score(xgb.predict(X_test.values),y_test)
print(xgb_score)
[15:40:27] WARNING: ../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
0.8034557235421166
  • [0.7,0.1], 6000 -> 0.729
  • [0.7,0.1], 30000 -> 0.733
  • [0.7,0.4], 30000 -> 0.755
  • [0.7,0.5],30000 -> 0.763
  • [0.7,0.6],30000 -> 0.763
  • [0.7,0.7],30000 -> 0.75

  • [0.6,0.4],30000 -> 0.744

  • [0.5,0.4],30000 -> 0.7444
  • [0.5,0.4], 10000 -> 0.742

Conclusion: [0.7,0.5],30000 -> 0.763 give the best result

3) Hypertuning xgboost parameter with gridsearch

In [27]:
def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
                  results['mean_test_score'][candidate],
                  results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")
In [28]:
#Hypertuning xgb
df_new=na(df,[0.7,0.5],30000)
X=df_new.drop(['id','target'],axis=1)
y=df_new.target

xgb=XGBClassifier()
param_dist = {"max_depth": [3,4],
              "n_estimators":[100,200],
              }
grid_search = GridSearchCV(xgb, param_grid=param_dist, cv=3, scoring='f1')
In [29]:
# grid_search.fit(X.values, y.values)

# print("GridSearchCV took %.2f seconds for %d candidate parameter settings."
#       % (time.time() - start, len(grid_search.cv_results_['params'])))
# report(grid_search.cv_results_)

GridSearchCV took 3289.85 seconds for 4 candidate parameter settings.

Model with rank: 1 Mean validation score: 0.803 (std: 0.014) Parameters: {'max_depth': 4, 'n_estimators': 200}

Model with rank: 2 Mean validation score: 0.798 (std: 0.023) Parameters: {'max_depth': 3, 'n_estimators': 200}

Model with rank: 3 Mean validation score: 0.776 (std: 0.019) Parameters: {'max_depth': 4, 'n_estimators': 100}

Prediction

In [30]:
#Prepare X test
test=pd.read_csv('equipfails/equip_failures_test_set.csv')
df_train=na(df,[0.7,0.5],30000)
X=df_new.drop(['id','target'],axis=1)

id_=test['id']
df_test=test.drop(['id'],axis=1)
df_test=na(df_test,[0.7,0.5],30000)

df_test=df_test[X.columns] #Make sure that they have the same columns and the columns are arranged the same
In [31]:
# Training the model
y=df_new.target

xgb=XGBClassifier(max_depth= 4, n_estimators= 200)
xgb.fit(X.values,y)
[15:41:37] WARNING: ../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
Out[31]:
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=4,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=200, n_jobs=8, num_parallel_tree=1, random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)
In [34]:
#Predict the test set and submit
prediction=xgb.predict(df_test.values)
In [35]:
len(prediction)
Out[35]:
16001