GFALDA DISMIX - heart dataset#
[1]:
#disable warnings
from warnings import simplefilter, filterwarnings
simplefilter(action='ignore', category=FutureWarning)
filterwarnings("ignore")
heart dataset#
[2]:
#vins dataset
from discrimintools.datasets import load_heart
D = load_heart()
D.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 209 entries, 0 to 208
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 disease 209 non-null object
1 age 209 non-null int64
2 chest_pain 209 non-null object
3 restbpress 209 non-null int64
4 blood_sugar 209 non-null object
5 restecg 209 non-null object
6 max_hrate 209 non-null int64
7 exercice_angina 209 non-null object
dtypes: int64(3), object(5)
memory usage: 13.2+ KB
[3]:
#split into X and y
y, X = D["disease"], D.drop(columns=["disease"])
Instanciation and training#
[4]:
#instanciation and training
from discrimintools import GFALDA
clf = GFALDA(n_components=5)
clf.fit(X,y)
[4]:
GFALDA(n_components=5)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Parameters
| n_components | 5 | |
| priors | None | |
| classes | False |
Canonical coefficients#
[5]:
#canonical coefficients
cancoef = clf.cancoef_
cancoef._fields
[5]:
('standardized', 'raw', 'projection')
Standardized canonical coefficients#
[6]:
#standardized canonical coefficients
print(cancoef.standardized)
Can1 Can2 Can3 Can4 Can5
age 0.416595 0.316321 -0.012072 0.003635 0.174323
restbpress 0.294879 0.240781 0.231359 0.428392 0.283278
max_hrate -0.506240 0.067098 0.089431 0.025805 0.008298
asympt 0.303267 -0.246131 -0.109045 -0.220034 -0.148435
atyp_angina -0.241907 -0.031289 0.250964 0.458097 -0.296698
non_anginal -0.144716 0.393626 -0.414103 -0.036533 0.580514
typ_angina -0.099707 0.153625 0.637921 -0.511068 0.166605
f -0.048493 -0.157945 -0.076855 0.043480 0.096906
t 0.168421 0.548560 0.266927 -0.151012 -0.336564
left_vent_hyper -0.081625 0.485598 -0.338725 0.106522 -0.487535
normal -0.062769 -0.043861 -0.058415 -0.205626 0.019543
st_t_wave_abnormality 0.184490 -0.092614 0.278966 0.451725 0.151970
no -0.279328 0.093156 0.064043 0.065959 0.102750
yes 0.385308 -0.128501 -0.088342 -0.090985 -0.141735
Pojection canonical coefficients#
[7]:
#projection canonical coefficients
print(cancoef.projection)
Can1 Can2 Can3 Can4 Can5
age 0.138865 0.105440 -0.004024 0.001212 0.058108
restbpress 0.098293 0.080260 0.077120 0.142797 0.094426
max_hrate -0.168747 0.022366 0.029810 0.008602 0.002766
asympt 0.167249 -0.094941 -0.040254 -0.079693 -0.052592
atyp_angina -0.167121 -0.015119 0.116052 0.207840 -0.131685
non_anginal -0.134340 0.255575 -0.257310 -0.022272 0.346211
typ_angina -0.226720 0.244328 0.970936 -0.763190 0.243383
f -0.019442 -0.044291 -0.020625 0.011448 0.024960
t 0.234518 0.534257 0.248790 -0.138096 -0.301084
left_vent_hyper -0.203319 0.846014 -0.564758 0.174255 -0.780191
normal -0.026504 -0.012954 -0.016510 -0.057021 0.005301
st_t_wave_abnormality 0.187609 -0.065872 0.189885 0.301677 0.099283
no -0.132921 0.031005 0.020399 0.020613 0.031412
yes 0.252919 -0.058996 -0.038815 -0.039222 -0.059771
Raw canonical coefficients#
[8]:
#raw canonical coefficients
print(cancoef.raw)
Can1 Can2 Can3 Can4 Can5
Constant -1.829417 -4.126436 -2.222573 -3.463150 -3.265746
age 0.051849 0.039369 -0.001502 0.000452 0.021696
restbpress 0.016956 0.013845 0.013303 0.024633 0.016289
max_hrate -0.021253 0.002817 0.003755 0.001083 0.000348
asympt 0.434108 -0.352322 -0.156091 -0.314966 -0.212476
atyp_angina -0.433775 -0.056105 0.450016 0.821436 -0.532024
non_anginal -0.348690 0.948430 -0.997768 -0.088026 1.398731
typ_angina -0.588469 0.906694 3.764993 -3.016313 0.983297
f -0.050463 -0.164362 -0.079978 0.045247 0.100842
t 0.608707 1.982611 0.964731 -0.545788 -1.216412
left_vent_hyper -0.527730 3.139534 -2.189958 0.688697 -3.152059
normal -0.068793 -0.048070 -0.064021 -0.225359 0.021418
st_t_wave_abnormality 0.486952 -0.244448 0.736315 1.192302 0.401116
no -0.345006 0.115060 0.079102 0.081468 0.126910
yes 0.656470 -0.218934 -0.150513 -0.155016 -0.241481
Coefficients#
[9]:
#coefficients
coef = clf.coef_
coef._fields
[9]:
('standardized', 'raw', 'projection')
Standardized coefficients#
[10]:
#standardized coefficients
print(coef.standardized)
negative positive
Constant -0.877854 -1.302011
age -0.144160 0.183333
restbpress 0.118793 -0.151073
max_hrate 0.309326 -0.393381
asympt -0.341506 0.434306
atyp_angina 0.296718 -0.377348
non_anginal 0.233696 -0.297200
typ_angina -0.140991 0.179304
f 0.031468 -0.040019
t -0.109291 0.138990
left_vent_hyper 0.129256 -0.164380
normal -0.065758 0.083627
st_t_wave_abnormality 0.105597 -0.134292
no 0.219821 -0.279555
yes -0.303223 0.385621
Projection coefficients#
[11]:
#projection coefficients
print(coef.projection)
negative positive
Constant -0.877854 -1.302011
age -0.048053 0.061111
restbpress 0.039598 -0.050358
max_hrate 0.103109 -0.131127
asympt -0.156916 0.199557
atyp_angina 0.167046 -0.212439
non_anginal 0.170482 -0.216809
typ_angina -0.164976 0.209807
f 0.011424 -0.014529
t -0.137807 0.175255
left_vent_hyper 0.263634 -0.335273
normal -0.013292 0.016904
st_t_wave_abnormality 0.033153 -0.042162
no 0.094602 -0.120309
yes -0.180006 0.228921
Raw coefficients#
[12]:
#raw coefficients
print(coef.raw)
negative positive
Constant -2.716792 1.036638
age -0.017942 0.022817
restbpress 0.006831 -0.008687
max_hrate 0.012986 -0.016515
asympt -0.488845 0.621683
atyp_angina 0.532060 -0.676641
non_anginal 0.563084 -0.716096
typ_angina -0.832126 1.058248
f 0.032746 -0.041645
t -0.395002 0.502339
left_vent_hyper 0.835676 -1.062762
normal -0.072069 0.091652
st_t_wave_abnormality 0.278718 -0.354457
no 0.271507 -0.345286
yes -0.516618 0.657003
Summary#
[13]:
#summary
from discrimintools import summaryGFALDA
summaryGFALDA(clf,detailed=True)
General Factor Analysis Linear Discriminant Analysis - Results
Class Level Information:
Frequency Proportion Prior Probability
negative 117 0.5598 0.5598
positive 92 0.4402 0.4402
Importance of components:
Eigenvalue Difference Proportion (%) Cumulative (%)
Can1 2.3749 1.2131 23.7495 23.7495
Can2 1.1618 0.0978 11.6184 35.3678
Can3 1.0641 0.0398 10.6408 46.0086
Can4 1.0243 0.0441 10.2431 56.2517
Can5 0.9802 0.0131 9.8024 66.0541
Raw Canonical Coefficients:
Can1 Can2 Can3 Can4 Can5
Constant -1.8294 -4.1264 -2.2226 -3.4631 -3.2657
age 0.0518 0.0394 -0.0015 0.0005 0.0217
restbpress 0.0170 0.0138 0.0133 0.0246 0.0163
max_hrate -0.0213 0.0028 0.0038 0.0011 0.0003
asympt 0.4341 -0.3523 -0.1561 -0.3150 -0.2125
atyp_angina -0.4338 -0.0561 0.4500 0.8214 -0.5320
non_anginal -0.3487 0.9484 -0.9978 -0.0880 1.3987
typ_angina -0.5885 0.9067 3.7650 -3.0163 0.9833
f -0.0505 -0.1644 -0.0800 0.0452 0.1008
t 0.6087 1.9826 0.9647 -0.5458 -1.2164
left_vent_hyper -0.5277 3.1395 -2.1900 0.6887 -3.1521
normal -0.0688 -0.0481 -0.0640 -0.2254 0.0214
st_t_wave_abnormality 0.4870 -0.2444 0.7363 1.1923 0.4011
no -0.3450 0.1151 0.0791 0.0815 0.1269
yes 0.6565 -0.2189 -0.1505 -0.1550 -0.2415
Projection functions coefficients:
Can1 Can2 Can3 Can4 Can5
age 0.1389 0.1054 -0.0040 0.0012 0.0581
restbpress 0.0983 0.0803 0.0771 0.1428 0.0944
max_hrate -0.1687 0.0224 0.0298 0.0086 0.0028
asympt 0.1672 -0.0949 -0.0403 -0.0797 -0.0526
atyp_angina -0.1671 -0.0151 0.1161 0.2078 -0.1317
non_anginal -0.1343 0.2556 -0.2573 -0.0223 0.3462
typ_angina -0.2267 0.2443 0.9709 -0.7632 0.2434
f -0.0194 -0.0443 -0.0206 0.0114 0.0250
t 0.2345 0.5343 0.2488 -0.1381 -0.3011
left_vent_hyper -0.2033 0.8460 -0.5648 0.1743 -0.7802
normal -0.0265 -0.0130 -0.0165 -0.0570 0.0053
st_t_wave_abnormality 0.1876 -0.0659 0.1899 0.3017 0.0993
no -0.1329 0.0310 0.0204 0.0206 0.0314
yes 0.2529 -0.0590 -0.0388 -0.0392 -0.0598
Multivariate Analysis of Variance (MANOVA) Summary:
Statistic Value p-value
0 Wilks' Lambda 0.5667 NaN
1 Bartlett -- C(5) 116.1280 0.0
2 Rao -- F(5,203) 31.0384 0.0
LDA Classification functions & Statistical Evaluation:
negative positive Wilks' Lambda Partial R-Square F Value \
Constant -0.8779 -1.3020 NaN NaN NaN
Can1 -0.5618 0.7145 0.8789 0.6448 111.8153
Can2 0.1992 -0.2534 0.5859 0.9672 6.8795
Can3 -0.0193 0.0246 0.5669 0.9997 0.0593
Can4 0.4679 -0.5950 0.6601 0.8585 33.4456
Can5 0.1431 -0.1819 0.5751 0.9855 2.9922
Num DF Den DF Pr>F
Constant NaN NaN NaN
Can1 1.0 203.0 0.0000
Can2 1.0 203.0 0.0094
Can3 1.0 203.0 0.8078
Can4 1.0 203.0 0.0000
Can5 1.0 203.0 0.0852
Classification Summary for Calibration Data:
Observation Profile:
Read Used
Number of Observations 209 209
Number of Observations Classified into disease:
prediction negative positive Total
disease
negative 96 21 117
positive 21 71 92
Total 117 92 209
Percent Classified into disease:
prediction negative positive Total
disease
negative 82.0513 17.9487 100.0
positive 22.8261 77.1739 100.0
Total 55.9809 44.0191 100.0
Priors 0.5598 0.4402 NaN
Error Count Estimates for disease:
negative positive Total
Rate 0.1795 0.2283 0.201
Priors 0.5598 0.4402 NaN
Classification Report for disease:
precision recall f1-score support
negative 0.8205 0.8205 0.8205 117.000
positive 0.7717 0.7717 0.7717 92.000
accuracy 0.7990 0.7990 0.7990 0.799
macro avg 0.7961 0.7961 0.7961 209.000
weighted avg 0.7990 0.7990 0.7990 209.000