{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## GFALDA DISMIX - heart dataset" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "#disable warnings\n", "from warnings import simplefilter, filterwarnings\n", "simplefilter(action='ignore', category=FutureWarning)\n", "filterwarnings(\"ignore\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### heart dataset" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 209 entries, 0 to 208\n", "Data columns (total 8 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 disease 209 non-null object\n", " 1 age 209 non-null int64 \n", " 2 chest_pain 209 non-null object\n", " 3 restbpress 209 non-null int64 \n", " 4 blood_sugar 209 non-null object\n", " 5 restecg 209 non-null object\n", " 6 max_hrate 209 non-null int64 \n", " 7 exercice_angina 209 non-null object\n", "dtypes: int64(3), object(5)\n", "memory usage: 13.2+ KB\n" ] } ], "source": [ "#vins dataset\n", "from discrimintools.datasets import load_heart\n", "D = load_heart()\n", "D.info()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "#split into X and y\n", "y, X = D[\"disease\"], D.drop(columns=[\"disease\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Instanciation and training" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
GFALDA(n_components=5)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "GFALDA(n_components=5)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#instanciation and training\n", "from discrimintools import GFALDA\n", "clf = GFALDA(n_components=5)\n", "clf.fit(X,y)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Canonical coefficients" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('standardized', 'raw', 'projection')" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#canonical coefficients\n", "cancoef = clf.cancoef_\n", "cancoef._fields" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Standardized canonical coefficients" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Can1 Can2 Can3 Can4 Can5\n", "age 0.416595 0.316321 -0.012072 0.003635 0.174323\n", "restbpress 0.294879 0.240781 0.231359 0.428392 0.283278\n", "max_hrate -0.506240 0.067098 0.089431 0.025805 0.008298\n", "asympt 0.303267 -0.246131 -0.109045 -0.220034 -0.148435\n", "atyp_angina -0.241907 -0.031289 0.250964 0.458097 -0.296698\n", "non_anginal -0.144716 0.393626 -0.414103 -0.036533 0.580514\n", "typ_angina -0.099707 0.153625 0.637921 -0.511068 0.166605\n", "f -0.048493 -0.157945 -0.076855 0.043480 0.096906\n", "t 0.168421 0.548560 0.266927 -0.151012 -0.336564\n", "left_vent_hyper -0.081625 0.485598 -0.338725 0.106522 -0.487535\n", "normal -0.062769 -0.043861 -0.058415 -0.205626 0.019543\n", "st_t_wave_abnormality 0.184490 -0.092614 0.278966 0.451725 0.151970\n", "no -0.279328 0.093156 0.064043 0.065959 0.102750\n", "yes 0.385308 -0.128501 -0.088342 -0.090985 -0.141735\n" ] } ], "source": [ "#standardized canonical coefficients\n", "print(cancoef.standardized)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Pojection canonical coefficients" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Can1 Can2 Can3 Can4 Can5\n", "age 0.138865 0.105440 -0.004024 0.001212 0.058108\n", "restbpress 0.098293 0.080260 0.077120 0.142797 0.094426\n", "max_hrate -0.168747 0.022366 0.029810 0.008602 0.002766\n", "asympt 0.167249 -0.094941 -0.040254 -0.079693 -0.052592\n", "atyp_angina -0.167121 -0.015119 0.116052 0.207840 -0.131685\n", "non_anginal -0.134340 0.255575 -0.257310 -0.022272 0.346211\n", "typ_angina -0.226720 0.244328 0.970936 -0.763190 0.243383\n", "f -0.019442 -0.044291 -0.020625 0.011448 0.024960\n", "t 0.234518 0.534257 0.248790 -0.138096 -0.301084\n", "left_vent_hyper -0.203319 0.846014 -0.564758 0.174255 -0.780191\n", "normal -0.026504 -0.012954 -0.016510 -0.057021 0.005301\n", "st_t_wave_abnormality 0.187609 -0.065872 0.189885 0.301677 0.099283\n", "no -0.132921 0.031005 0.020399 0.020613 0.031412\n", "yes 0.252919 -0.058996 -0.038815 -0.039222 -0.059771\n" ] } ], "source": [ "#projection canonical coefficients\n", "print(cancoef.projection)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Raw canonical coefficients" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Can1 Can2 Can3 Can4 Can5\n", "Constant -1.829417 -4.126436 -2.222573 -3.463150 -3.265746\n", "age 0.051849 0.039369 -0.001502 0.000452 0.021696\n", "restbpress 0.016956 0.013845 0.013303 0.024633 0.016289\n", "max_hrate -0.021253 0.002817 0.003755 0.001083 0.000348\n", "asympt 0.434108 -0.352322 -0.156091 -0.314966 -0.212476\n", "atyp_angina -0.433775 -0.056105 0.450016 0.821436 -0.532024\n", "non_anginal -0.348690 0.948430 -0.997768 -0.088026 1.398731\n", "typ_angina -0.588469 0.906694 3.764993 -3.016313 0.983297\n", "f -0.050463 -0.164362 -0.079978 0.045247 0.100842\n", "t 0.608707 1.982611 0.964731 -0.545788 -1.216412\n", "left_vent_hyper -0.527730 3.139534 -2.189958 0.688697 -3.152059\n", "normal -0.068793 -0.048070 -0.064021 -0.225359 0.021418\n", "st_t_wave_abnormality 0.486952 -0.244448 0.736315 1.192302 0.401116\n", "no -0.345006 0.115060 0.079102 0.081468 0.126910\n", "yes 0.656470 -0.218934 -0.150513 -0.155016 -0.241481\n" ] } ], "source": [ "#raw canonical coefficients\n", "print(cancoef.raw)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Coefficients" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('standardized', 'raw', 'projection')" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#coefficients\n", "coef = clf.coef_\n", "coef._fields" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Standardized coefficients" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " negative positive\n", "Constant -0.877854 -1.302011\n", "age -0.144160 0.183333\n", "restbpress 0.118793 -0.151073\n", "max_hrate 0.309326 -0.393381\n", "asympt -0.341506 0.434306\n", "atyp_angina 0.296718 -0.377348\n", "non_anginal 0.233696 -0.297200\n", "typ_angina -0.140991 0.179304\n", "f 0.031468 -0.040019\n", "t -0.109291 0.138990\n", "left_vent_hyper 0.129256 -0.164380\n", "normal -0.065758 0.083627\n", "st_t_wave_abnormality 0.105597 -0.134292\n", "no 0.219821 -0.279555\n", "yes -0.303223 0.385621\n" ] } ], "source": [ "#standardized coefficients\n", "print(coef.standardized)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Projection coefficients" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " negative positive\n", "Constant -0.877854 -1.302011\n", "age -0.048053 0.061111\n", "restbpress 0.039598 -0.050358\n", "max_hrate 0.103109 -0.131127\n", "asympt -0.156916 0.199557\n", "atyp_angina 0.167046 -0.212439\n", "non_anginal 0.170482 -0.216809\n", "typ_angina -0.164976 0.209807\n", "f 0.011424 -0.014529\n", "t -0.137807 0.175255\n", "left_vent_hyper 0.263634 -0.335273\n", "normal -0.013292 0.016904\n", "st_t_wave_abnormality 0.033153 -0.042162\n", "no 0.094602 -0.120309\n", "yes -0.180006 0.228921\n" ] } ], "source": [ "#projection coefficients\n", "print(coef.projection)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Raw coefficients" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " negative positive\n", "Constant -2.716792 1.036638\n", "age -0.017942 0.022817\n", "restbpress 0.006831 -0.008687\n", "max_hrate 0.012986 -0.016515\n", "asympt -0.488845 0.621683\n", "atyp_angina 0.532060 -0.676641\n", "non_anginal 0.563084 -0.716096\n", "typ_angina -0.832126 1.058248\n", "f 0.032746 -0.041645\n", "t -0.395002 0.502339\n", "left_vent_hyper 0.835676 -1.062762\n", "normal -0.072069 0.091652\n", "st_t_wave_abnormality 0.278718 -0.354457\n", "no 0.271507 -0.345286\n", "yes -0.516618 0.657003\n" ] } ], "source": [ "#raw coefficients\n", "print(coef.raw)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Summary" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " General Factor Analysis Linear Discriminant Analysis - Results \n", "\n", "Class Level Information:\n", " Frequency Proportion Prior Probability\n", "negative 117 0.5598 0.5598\n", "positive 92 0.4402 0.4402\n", "\n", "Importance of components:\n", " Eigenvalue Difference Proportion (%) Cumulative (%)\n", "Can1 2.3749 1.2131 23.7495 23.7495\n", "Can2 1.1618 0.0978 11.6184 35.3678\n", "Can3 1.0641 0.0398 10.6408 46.0086\n", "Can4 1.0243 0.0441 10.2431 56.2517\n", "Can5 0.9802 0.0131 9.8024 66.0541\n", "\n", "Raw Canonical Coefficients:\n", " Can1 Can2 Can3 Can4 Can5\n", "Constant -1.8294 -4.1264 -2.2226 -3.4631 -3.2657\n", "age 0.0518 0.0394 -0.0015 0.0005 0.0217\n", "restbpress 0.0170 0.0138 0.0133 0.0246 0.0163\n", "max_hrate -0.0213 0.0028 0.0038 0.0011 0.0003\n", "asympt 0.4341 -0.3523 -0.1561 -0.3150 -0.2125\n", "atyp_angina -0.4338 -0.0561 0.4500 0.8214 -0.5320\n", "non_anginal -0.3487 0.9484 -0.9978 -0.0880 1.3987\n", "typ_angina -0.5885 0.9067 3.7650 -3.0163 0.9833\n", "f -0.0505 -0.1644 -0.0800 0.0452 0.1008\n", "t 0.6087 1.9826 0.9647 -0.5458 -1.2164\n", "left_vent_hyper -0.5277 3.1395 -2.1900 0.6887 -3.1521\n", "normal -0.0688 -0.0481 -0.0640 -0.2254 0.0214\n", "st_t_wave_abnormality 0.4870 -0.2444 0.7363 1.1923 0.4011\n", "no -0.3450 0.1151 0.0791 0.0815 0.1269\n", "yes 0.6565 -0.2189 -0.1505 -0.1550 -0.2415\n", "\n", "Projection functions coefficients:\n", " Can1 Can2 Can3 Can4 Can5\n", "age 0.1389 0.1054 -0.0040 0.0012 0.0581\n", "restbpress 0.0983 0.0803 0.0771 0.1428 0.0944\n", "max_hrate -0.1687 0.0224 0.0298 0.0086 0.0028\n", "asympt 0.1672 -0.0949 -0.0403 -0.0797 -0.0526\n", "atyp_angina -0.1671 -0.0151 0.1161 0.2078 -0.1317\n", "non_anginal -0.1343 0.2556 -0.2573 -0.0223 0.3462\n", "typ_angina -0.2267 0.2443 0.9709 -0.7632 0.2434\n", "f -0.0194 -0.0443 -0.0206 0.0114 0.0250\n", "t 0.2345 0.5343 0.2488 -0.1381 -0.3011\n", "left_vent_hyper -0.2033 0.8460 -0.5648 0.1743 -0.7802\n", "normal -0.0265 -0.0130 -0.0165 -0.0570 0.0053\n", "st_t_wave_abnormality 0.1876 -0.0659 0.1899 0.3017 0.0993\n", "no -0.1329 0.0310 0.0204 0.0206 0.0314\n", "yes 0.2529 -0.0590 -0.0388 -0.0392 -0.0598\n", "\n", "Multivariate Analysis of Variance (MANOVA) Summary:\n", " Statistic Value p-value\n", "0 Wilks' Lambda 0.5667 NaN\n", "1 Bartlett -- C(5) 116.1280 0.0\n", "2 Rao -- F(5,203) 31.0384 0.0\n", "\n", "LDA Classification functions & Statistical Evaluation:\n", " negative positive Wilks' Lambda Partial R-Square F Value \\\n", "Constant -0.8779 -1.3020 NaN NaN NaN \n", "Can1 -0.5618 0.7145 0.8789 0.6448 111.8153 \n", "Can2 0.1992 -0.2534 0.5859 0.9672 6.8795 \n", "Can3 -0.0193 0.0246 0.5669 0.9997 0.0593 \n", "Can4 0.4679 -0.5950 0.6601 0.8585 33.4456 \n", "Can5 0.1431 -0.1819 0.5751 0.9855 2.9922 \n", "\n", " Num DF Den DF Pr>F \n", "Constant NaN NaN NaN \n", "Can1 1.0 203.0 0.0000 \n", "Can2 1.0 203.0 0.0094 \n", "Can3 1.0 203.0 0.8078 \n", "Can4 1.0 203.0 0.0000 \n", "Can5 1.0 203.0 0.0852 \n", "\n", "Classification Summary for Calibration Data:\n", "\n", "Observation Profile:\n", " Read Used\n", "Number of Observations 209 209\n", "\n", "Number of Observations Classified into disease:\n", "prediction negative positive Total\n", "disease \n", "negative 96 21 117\n", "positive 21 71 92\n", "Total 117 92 209\n", "\n", "Percent Classified into disease:\n", "prediction negative positive Total\n", "disease \n", "negative 82.0513 17.9487 100.0\n", "positive 22.8261 77.1739 100.0\n", "Total 55.9809 44.0191 100.0\n", "Priors 0.5598 0.4402 NaN\n", "\n", "Error Count Estimates for disease:\n", " negative positive Total\n", "Rate 0.1795 0.2283 0.201\n", "Priors 0.5598 0.4402 NaN\n", "\n", "Classification Report for disease:\n", " precision recall f1-score support\n", "negative 0.8205 0.8205 0.8205 117.000\n", "positive 0.7717 0.7717 0.7717 92.000\n", "accuracy 0.7990 0.7990 0.7990 0.799\n", "macro avg 0.7961 0.7961 0.7961 209.000\n", "weighted avg 0.7990 0.7990 0.7990 209.000\n" ] } ], "source": [ "#summary\n", "from discrimintools import summaryGFALDA\n", "summaryGFALDA(clf,detailed=True)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 2 }