{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## STEPDISC LDA - heart dataset" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "#disable warnings\n", "from warnings import simplefilter, filterwarnings\n", "simplefilter(action='ignore', category=FutureWarning)\n", "filterwarnings(\"ignore\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### heart dataset" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 150 entries, 0 to 149\n", "Data columns (total 14 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 disease 150 non-null object \n", " 1 age 150 non-null int64 \n", " 2 sex 150 non-null object \n", " 3 chestpain 150 non-null object \n", " 4 restbpress 150 non-null int64 \n", " 5 cholesteral 150 non-null int64 \n", " 6 sugar 150 non-null object \n", " 7 electro 150 non-null object \n", " 8 maxHeartRate 150 non-null int64 \n", " 9 ExerciseAngina 150 non-null object \n", " 10 oldpeak 150 non-null float64\n", " 11 slope 150 non-null object \n", " 12 vesselsColored 150 non-null int64 \n", " 13 thal 150 non-null object \n", "dtypes: float64(1), int64(5), object(8)\n", "memory usage: 17.6+ KB\n", "None\n" ] } ], "source": [ "#vins dataset\n", "from discrimintools.datasets import load_heart\n", "D = load_heart(\"train\")\n", "print(D.info())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### instanciation and training" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Categorical features have been encoded into binary variables.\n", "\n", "\n", "====================== Step 1 backward selection results =======================\n", " Wilks' Lambda Partial R-Square F Value Num DF \\\n", "age 0.390537 0.000287 0.037556 1 \n", "sexmale 0.406288 0.039044 5.322618 1 \n", "chestpainatypicalAngina 0.410195 0.048197 6.633471 1 \n", "chestpainnonAnginal 0.407635 0.042220 5.774636 1 \n", "chestpaintypicalAngina 0.435333 0.103158 15.068044 1 \n", "restbpress 0.395178 0.012029 1.595002 1 \n", "cholesteral 0.391386 0.002455 0.322413 1 \n", "sugarlow 0.397899 0.018784 2.507857 1 \n", "electrosttAbnormality 0.392351 0.004911 0.646455 1 \n", "electroventricHypertrophy 0.393479 0.007763 1.024876 1 \n", "maxHeartRate 0.400177 0.024369 3.272082 1 \n", "ExerciseAnginayes 0.396484 0.015283 2.033088 1 \n", "oldpeak 0.404146 0.033951 4.603833 1 \n", "slopeflat 0.393651 0.008197 1.082665 1 \n", "slopeupsloping 0.390878 0.001159 0.151970 1 \n", "vesselsColored 0.442917 0.118515 17.612864 1 \n", "thalnormal 0.392200 0.004525 0.595495 1 \n", "thalreversableEffect 0.393379 0.007510 0.991197 1 \n", "\n", " Den DF Pr>F \n", "age 131 0.846637 \n", "sexmale 131 0.022619 \n", "chestpainatypicalAngina 131 0.011118 \n", "chestpainnonAnginal 131 0.017660 \n", "chestpaintypicalAngina 131 0.000164 \n", "restbpress 131 0.208856 \n", "cholesteral 131 0.571133 \n", "sugarlow 131 0.115691 \n", "electrosttAbnormality 131 0.422839 \n", "electroventricHypertrophy 131 0.313231 \n", "maxHeartRate 131 0.072761 \n", "ExerciseAnginayes 131 0.156286 \n", "oldpeak 131 0.033745 \n", "slopeflat 131 0.300019 \n", "slopeupsloping 131 0.697293 \n", "vesselsColored 131 0.000050 \n", "thalnormal 131 0.441692 \n", "thalreversableEffect 131 0.321286 \n", "\n", "Variable age will be removed\n", "\n", "\n", "====================== Step 2 backward selection results =======================\n", " Wilks' Lambda Partial R-Square F Value Num DF \\\n", "sexmale 0.406288 0.038769 5.323891 1 \n", "chestpainatypicalAngina 0.410504 0.048642 6.749045 1 \n", "chestpainnonAnginal 0.407779 0.042283 5.827703 1 \n", "chestpaintypicalAngina 0.435344 0.102923 15.144637 1 \n", "restbpress 0.396076 0.013985 1.872180 1 \n", "cholesteral 0.391407 0.002224 0.294157 1 \n", "sugarlow 0.397905 0.018518 2.490549 1 \n", "electrosttAbnormality 0.392399 0.004745 0.629361 1 \n", "electroventricHypertrophy 0.393649 0.007906 1.051882 1 \n", "maxHeartRate 0.402640 0.030059 4.090787 1 \n", "ExerciseAnginayes 0.396512 0.015070 2.019730 1 \n", "oldpeak 0.404147 0.033676 4.600093 1 \n", "slopeflat 0.393696 0.008026 1.067946 1 \n", "slopeupsloping 0.390980 0.001135 0.149973 1 \n", "vesselsColored 0.449061 0.130325 19.780910 1 \n", "thalnormal 0.392377 0.004689 0.621919 1 \n", "thalreversableEffect 0.393405 0.007290 0.969355 1 \n", "\n", " Den DF Pr>F \n", "sexmale 132 0.022591 \n", "chestpainatypicalAngina 132 0.010445 \n", "chestpainnonAnginal 132 0.017147 \n", "chestpaintypicalAngina 132 0.000157 \n", "restbpress 132 0.173550 \n", "cholesteral 132 0.588483 \n", "sugarlow 132 0.116925 \n", "electrosttAbnormality 132 0.429013 \n", "electroventricHypertrophy 132 0.306950 \n", "maxHeartRate 132 0.045138 \n", "ExerciseAnginayes 132 0.157625 \n", "oldpeak 132 0.033802 \n", "slopeflat 132 0.303300 \n", "slopeupsloping 132 0.699185 \n", "vesselsColored 132 0.000018 \n", "thalnormal 132 0.431749 \n", "thalreversableEffect 132 0.326642 \n", "\n", "Variable slopeupsloping will be removed\n", "\n", "\n", "====================== Step 3 backward selection results =======================\n", " Wilks' Lambda Partial R-Square F Value Num DF \\\n", "sexmale 0.407255 0.039961 5.536054 1 \n", "chestpainatypicalAngina 0.410717 0.048053 6.713645 1 \n", "chestpainnonAnginal 0.408154 0.042077 5.842021 1 \n", "chestpaintypicalAngina 0.436303 0.103879 15.417452 1 \n", "restbpress 0.396268 0.013342 1.798522 1 \n", "cholesteral 0.391857 0.002236 0.298057 1 \n", "sugarlow 0.399178 0.020536 2.788604 1 \n", "electrosttAbnormality 0.393135 0.005481 0.732954 1 \n", "electroventricHypertrophy 0.393932 0.007493 1.004102 1 \n", "maxHeartRate 0.402704 0.029113 3.988168 1 \n", "ExerciseAnginayes 0.396680 0.014368 1.938852 1 \n", "oldpeak 0.405450 0.035689 4.922248 1 \n", "slopeflat 0.396077 0.012867 1.733586 1 \n", "vesselsColored 0.456018 0.142620 22.123724 1 \n", "thalnormal 0.392612 0.004157 0.555181 1 \n", "thalreversableEffect 0.394134 0.008001 1.072658 1 \n", "\n", " Den DF Pr>F \n", "sexmale 133 0.020095 \n", "chestpainatypicalAngina 133 0.010636 \n", "chestpainnonAnginal 133 0.017004 \n", "chestpaintypicalAngina 133 0.000138 \n", "restbpress 133 0.182177 \n", "cholesteral 133 0.586018 \n", "sugarlow 133 0.097289 \n", "electrosttAbnormality 133 0.393467 \n", "electroventricHypertrophy 133 0.318140 \n", "maxHeartRate 133 0.047862 \n", "ExerciseAnginayes 133 0.166117 \n", "oldpeak 133 0.028209 \n", "slopeflat 133 0.190218 \n", "vesselsColored 133 0.000006 \n", "thalnormal 133 0.457523 \n", "thalreversableEffect 133 0.302225 \n", "\n", "Variable cholesteral will be removed\n", "\n", "\n", "====================== Step 4 backward selection results =======================\n", " Wilks' Lambda Partial R-Square F Value Num DF \\\n", "sexmale 0.409528 0.043151 6.043007 1 \n", "chestpainatypicalAngina 0.411805 0.048441 6.821489 1 \n", "chestpainnonAnginal 0.408653 0.041101 5.743665 1 \n", "chestpaintypicalAngina 0.436743 0.102776 15.349524 1 \n", "restbpress 0.396703 0.012217 1.657322 1 \n", "sugarlow 0.400138 0.020696 2.831918 1 \n", "electrosttAbnormality 0.393716 0.004723 0.635860 1 \n", "electroventricHypertrophy 0.394296 0.006188 0.834316 1 \n", "maxHeartRate 0.403514 0.028891 3.986532 1 \n", "ExerciseAnginayes 0.397982 0.015392 2.094745 1 \n", "oldpeak 0.406739 0.036590 5.089300 1 \n", "slopeflat 0.396831 0.012535 1.701067 1 \n", "vesselsColored 0.456268 0.141170 22.026289 1 \n", "thalnormal 0.393948 0.005309 0.715220 1 \n", "thalreversableEffect 0.394533 0.006784 0.915272 1 \n", "\n", " Den DF Pr>F \n", "sexmale 134 0.015238 \n", "chestpainatypicalAngina 134 0.010034 \n", "chestpainnonAnginal 134 0.017929 \n", "chestpaintypicalAngina 134 0.000142 \n", "restbpress 134 0.200184 \n", "sugarlow 134 0.094737 \n", "electrosttAbnormality 134 0.426625 \n", "electroventricHypertrophy 134 0.362668 \n", "maxHeartRate 134 0.047892 \n", "ExerciseAnginayes 134 0.150142 \n", "oldpeak 134 0.025694 \n", "slopeflat 134 0.194384 \n", "vesselsColored 134 0.000007 \n", "thalnormal 134 0.399224 \n", "thalreversableEffect 134 0.340442 \n", "\n", "Variable electrosttAbnormality will be removed\n", "\n", "\n", "====================== Step 5 backward selection results =======================\n", " Wilks' Lambda Partial R-Square F Value Num DF \\\n", "sexmale 0.410873 0.041757 5.882798 1 \n", "chestpainatypicalAngina 0.414253 0.049575 7.041772 1 \n", "chestpainnonAnginal 0.411211 0.042545 5.998803 1 \n", "chestpaintypicalAngina 0.440501 0.106208 16.041826 1 \n", "restbpress 0.400086 0.015922 2.184188 1 \n", "sugarlow 0.402213 0.021126 2.913628 1 \n", "electroventricHypertrophy 0.395665 0.004925 0.668218 1 \n", "maxHeartRate 0.405997 0.030248 4.210902 1 \n", "ExerciseAnginayes 0.400247 0.016317 2.239323 1 \n", "oldpeak 0.409876 0.039425 5.540870 1 \n", "slopeflat 0.399037 0.013335 1.824583 1 \n", "vesselsColored 0.456813 0.138125 21.635246 1 \n", "thalnormal 0.395655 0.004900 0.664812 1 \n", "thalreversableEffect 0.396233 0.006351 0.862875 1 \n", "\n", " Den DF Pr>F \n", "sexmale 135 0.016611 \n", "chestpainatypicalAngina 135 0.008919 \n", "chestpainnonAnginal 135 0.015597 \n", "chestpaintypicalAngina 135 0.000102 \n", "restbpress 135 0.141764 \n", "sugarlow 135 0.090133 \n", "electroventricHypertrophy 135 0.415114 \n", "maxHeartRate 135 0.042098 \n", "ExerciseAnginayes 135 0.136874 \n", "oldpeak 135 0.020020 \n", "slopeflat 135 0.179028 \n", "vesselsColored 135 0.000008 \n", "thalnormal 135 0.416302 \n", "thalreversableEffect 135 0.354593 \n", "\n", "Variable thalnormal will be removed\n", "\n", "\n", "====================== Step 6 backward selection results =======================\n", " Wilks' Lambda Partial R-Square F Value Num DF \\\n", "sexmale 0.417444 0.052196 7.489525 1 \n", "chestpainatypicalAngina 0.415723 0.048272 6.897966 1 \n", "chestpainnonAnginal 0.413922 0.044132 6.279052 1 \n", "chestpaintypicalAngina 0.440939 0.102699 15.565699 1 \n", "restbpress 0.403027 0.018292 2.534024 1 \n", "sugarlow 0.403748 0.020045 2.781833 1 \n", "electroventricHypertrophy 0.397602 0.004898 0.669371 1 \n", "maxHeartRate 0.408842 0.032255 4.532833 1 \n", "ExerciseAnginayes 0.401628 0.014873 2.053222 1 \n", "oldpeak 0.411139 0.037661 5.322282 1 \n", "slopeflat 0.402652 0.017377 2.405049 1 \n", "vesselsColored 0.460870 0.141503 22.416487 1 \n", "thalreversableEffect 0.423809 0.066430 9.677416 1 \n", "\n", " Den DF Pr>F \n", "sexmale 136 0.007035 \n", "chestpainatypicalAngina 136 0.009619 \n", "chestpainnonAnginal 136 0.013397 \n", "chestpaintypicalAngina 136 0.000127 \n", "restbpress 136 0.113737 \n", "sugarlow 136 0.097640 \n", "electroventricHypertrophy 136 0.414702 \n", "maxHeartRate 136 0.035053 \n", "ExerciseAnginayes 136 0.154179 \n", "oldpeak 136 0.022565 \n", "slopeflat 136 0.123268 \n", "vesselsColored 136 0.000005 \n", "thalreversableEffect 136 0.002273 \n", "\n", "Variable electroventricHypertrophy will be removed\n", "\n", "\n", "====================== Step 7 backward selection results =======================\n", " Wilks' Lambda Partial R-Square F Value Num DF \\\n", "sexmale 0.420956 0.055478 8.046977 1 \n", "chestpainatypicalAngina 0.418392 0.049691 7.163594 1 \n", "chestpainnonAnginal 0.416175 0.044627 6.399484 1 \n", "chestpaintypicalAngina 0.444648 0.105805 16.210398 1 \n", "restbpress 0.405184 0.018713 2.612513 1 \n", "sugarlow 0.405372 0.019166 2.677050 1 \n", "maxHeartRate 0.410360 0.031089 4.395911 1 \n", "ExerciseAnginayes 0.404261 0.016471 2.294300 1 \n", "oldpeak 0.415631 0.043377 6.212088 1 \n", "slopeflat 0.404662 0.017446 2.432584 1 \n", "vesselsColored 0.467272 0.149099 24.005791 1 \n", "thalreversableEffect 0.424401 0.063146 9.234051 1 \n", "\n", " Den DF Pr>F \n", "sexmale 137 0.005250 \n", "chestpainatypicalAngina 137 0.008348 \n", "chestpainnonAnginal 137 0.012547 \n", "chestpaintypicalAngina 137 0.000093 \n", "restbpress 137 0.108324 \n", "sugarlow 137 0.104099 \n", "maxHeartRate 137 0.037864 \n", "ExerciseAnginayes 137 0.132153 \n", "oldpeak 137 0.013880 \n", "slopeflat 137 0.121144 \n", "vesselsColored 137 0.000003 \n", "thalreversableEffect 137 0.002846 \n", "\n", "Variable ExerciseAnginayes will be removed\n", "\n", "\n", "====================== Step 8 backward selection results =======================\n", " Wilks' Lambda Partial R-Square F Value Num DF \\\n", "sexmale 0.430245 0.060394 8.870105 1 \n", "chestpainatypicalAngina 0.430059 0.059988 8.806591 1 \n", "chestpainnonAnginal 0.429872 0.059579 8.742837 1 \n", "chestpaintypicalAngina 0.457101 0.115598 18.037639 1 \n", "restbpress 0.414100 0.023760 3.358669 1 \n", "sugarlow 0.412059 0.018926 2.662180 1 \n", "maxHeartRate 0.421814 0.041614 5.992152 1 \n", "oldpeak 0.425421 0.049739 7.223204 1 \n", "slopeflat 0.412181 0.019215 2.703597 1 \n", "vesselsColored 0.470765 0.141268 22.701974 1 \n", "thalreversableEffect 0.432783 0.065904 9.736476 1 \n", "\n", " Den DF Pr>F \n", "sexmale 138 0.003425 \n", "chestpainatypicalAngina 138 0.003539 \n", "chestpainnonAnginal 138 0.003657 \n", "chestpaintypicalAngina 138 0.000040 \n", "restbpress 138 0.069008 \n", "sugarlow 138 0.105040 \n", "maxHeartRate 138 0.015625 \n", "oldpeak 138 0.008082 \n", "slopeflat 138 0.102398 \n", "vesselsColored 138 0.000005 \n", "thalreversableEffect 138 0.002200 \n", "\n", "Variable sugarlow will be removed\n", "\n", "\n", "====================== Step 9 backward selection results =======================\n", " Wilks' Lambda Partial R-Square F Value Num DF \\\n", "sexmale 0.437069 0.057221 8.436494 1 \n", "chestpainatypicalAngina 0.437514 0.058180 8.586570 1 \n", "chestpainnonAnginal 0.445484 0.075031 11.275258 1 \n", "chestpaintypicalAngina 0.471064 0.125259 19.904149 1 \n", "restbpress 0.419220 0.017080 2.415438 1 \n", "maxHeartRate 0.427785 0.036761 5.304726 1 \n", "oldpeak 0.435439 0.053692 7.886679 1 \n", "slopeflat 0.420956 0.021135 3.001165 1 \n", "vesselsColored 0.472897 0.128649 20.522313 1 \n", "thalreversableEffect 0.437975 0.059171 8.742009 1 \n", "\n", " Den DF Pr>F \n", "sexmale 139 0.004280 \n", "chestpainatypicalAngina 139 0.003960 \n", "chestpainnonAnginal 139 0.001014 \n", "chestpaintypicalAngina 139 0.000017 \n", "restbpress 139 0.122418 \n", "maxHeartRate 139 0.022752 \n", "oldpeak 139 0.005697 \n", "slopeflat 139 0.085423 \n", "vesselsColored 139 0.000013 \n", "thalreversableEffect 139 0.003655 \n", "\n", "Variable restbpress will be removed\n", "\n", "\n", "====================== Step 10 backward selection results =======================\n", " Wilks' Lambda Partial R-Square F Value Num DF \\\n", "sexmale 0.439368 0.045856 6.728378 1 \n", "chestpainatypicalAngina 0.442844 0.053346 7.889247 1 \n", "chestpainnonAnginal 0.452659 0.073873 11.167170 1 \n", "chestpaintypicalAngina 0.473989 0.115549 18.290368 1 \n", "maxHeartRate 0.433855 0.033732 4.887314 1 \n", "oldpeak 0.450985 0.070436 10.608222 1 \n", "slopeflat 0.427790 0.020034 2.862142 1 \n", "vesselsColored 0.483113 0.132252 21.337160 1 \n", "thalreversableEffect 0.453183 0.074943 11.342002 1 \n", "\n", " Den DF Pr>F \n", "sexmale 140 0.010498 \n", "chestpainatypicalAngina 140 0.005684 \n", "chestpainnonAnginal 140 0.001068 \n", "chestpaintypicalAngina 140 0.000035 \n", "maxHeartRate 140 0.028678 \n", "oldpeak 140 0.001413 \n", "slopeflat 140 0.092911 \n", "vesselsColored 140 0.000009 \n", "thalreversableEffect 140 0.000979 \n", "\n", "Variable slopeflat will be removed\n", "\n", "\n", "====================== Step 11 backward selection results =======================\n", " Wilks' Lambda Partial R-Square F Value Num DF \\\n", "sexmale 0.447931 0.044965 6.638500 1 \n", "chestpainatypicalAngina 0.452985 0.055620 8.304242 1 \n", "chestpainnonAnginal 0.463630 0.077302 11.812728 1 \n", "chestpaintypicalAngina 0.484038 0.116205 18.539203 1 \n", "maxHeartRate 0.447156 0.043307 6.382762 1 \n", "oldpeak 0.465282 0.080579 12.357399 1 \n", "vesselsColored 0.490839 0.128451 20.780993 1 \n", "thalreversableEffect 0.462909 0.075864 11.575027 1 \n", "\n", " Den DF Pr>F \n", "sexmale 141 0.011008 \n", "chestpainatypicalAngina 141 0.004574 \n", "chestpainnonAnginal 141 0.000773 \n", "chestpaintypicalAngina 141 0.000031 \n", "maxHeartRate 141 0.012628 \n", "oldpeak 141 0.000591 \n", "vesselsColored 141 0.000011 \n", "thalreversableEffect 141 0.000870 \n", "\n", "Variable maxHeartRate will be removed\n", "\n", "\n", "====================== Step 12 backward selection results =======================\n", " Wilks' Lambda Partial R-Square F Value Num DF \\\n", "sexmale 0.471424 0.051478 7.706666 1 \n", "chestpainatypicalAngina 0.481815 0.071935 11.006537 1 \n", "chestpainnonAnginal 0.496517 0.099416 15.675469 1 \n", "chestpaintypicalAngina 0.517758 0.136362 22.420771 1 \n", "oldpeak 0.508254 0.120212 19.402486 1 \n", "vesselsColored 0.517777 0.136394 22.426798 1 \n", "thalreversableEffect 0.479144 0.066762 10.158350 1 \n", "\n", " Den DF Pr>F \n", "sexmale 142 0.006244 \n", "chestpainatypicalAngina 142 0.001153 \n", "chestpainnonAnginal 142 0.000118 \n", "chestpaintypicalAngina 142 0.000005 \n", "oldpeak 142 0.000021 \n", "vesselsColored 142 0.000005 \n", "thalreversableEffect 142 0.001767 \n", "\n", "No variable can be removed\n", "\n" ] }, { "data": { "text/html": [ "
STEPDISC(method='backward')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "STEPDISC(method='backward')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from discrimintools import DISCRIM, STEPDISC\n", "#split into X and y\n", "y, X = D[\"disease\"], D.drop(columns=[\"disease\"])\n", "clf = DISCRIM().fit(X,y)\n", "clf2 = STEPDISC(method=\"backward\",alpha=0.01,verbose=True)\n", "clf2.fit(clf)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Selected variables" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['sexmale', 'chestpainatypicalAngina', 'chestpainnonAnginal', 'chestpaintypicalAngina', 'oldpeak', 'vesselsColored', 'thalreversableEffect']\n" ] } ], "source": [ "#selected variables\n", "print(clf2.summary_.selected)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### summary" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Stepwise Discriminant Analysis - Results \n", "\n", "====================== Before backward selection =======================\n", "\n", " Discriminant Analysis - Results \n", "\n", "Summary Information:\n", " Infos Value DF DF value\n", "0 Total Sample Size 150 DF Total 149\n", "1 Variables 18 DF Within Classes 148\n", "2 Classes 2 DF Between Classes 1\n", "\n", "Class Level Information:\n", " Frequency Proportion Prior Probability\n", "absence 82 0.5467 0.5467\n", "presence 68 0.4533 0.4533\n", "\n", "Pooled Covariance Matrix Information:\n", " Rank Natural Log of the Determinant\n", "Pooled 18 -5.6739\n", "\n", "Linear Discriminant Function for disease:\n", " absence presence\n", "Constant -124.3546 -127.3863\n", "age 1.1836 1.1914\n", "sexmale 14.2659 15.9123\n", "chestpainatypicalAngina 0.5668 -1.8839\n", "chestpainnonAnginal 3.4872 1.6652\n", "chestpaintypicalAngina -2.8081 -7.4223\n", "restbpress 0.3460 0.3690\n", "cholesteral 0.0407 0.0373\n", "sugarlow 10.4057 11.7146\n", "electrosttAbnormality -15.8118 -12.8213\n", "electroventricHypertrophy -1.8553 -1.2411\n", "maxHeartRate 0.4856 0.4579\n", "ExerciseAnginayes 4.9170 5.9612\n", "oldpeak 3.0652 3.7751\n", "slopeflat 18.6429 19.8215\n", "slopeupsloping 14.7467 15.2526\n", "vesselsColored -2.5087 -1.0308\n", "thalnormal 21.1525 20.1211\n", "thalreversableEffect 14.8540 16.1636\n", "\n", "Classification Summary for Calibration Data:\n", "\n", "Observation Profile:\n", " Read Used\n", "Number of Observations 150 150\n", "\n", "Number of Observations Classified into disease:\n", "prediction absence presence Total\n", "disease \n", "absence 75 7 82\n", "presence 12 56 68\n", "Total 87 63 150\n", "\n", "Percent Classified into disease:\n", "prediction absence presence Total\n", "disease \n", "absence 91.4634 8.5366 100.0\n", "presence 17.6471 82.3529 100.0\n", "Total 58.0000 42.0000 100.0\n", "Priors 0.5467 0.4533 NaN\n", "\n", "Error Count Estimates for disease:\n", " absence presence Total\n", "Rate 0.0854 0.1765 0.1267\n", "Priors 0.5467 0.4533 NaN\n", "\n", "Classification Report for disease:\n", " precision recall f1-score support\n", "absence 0.8621 0.9146 0.8876 82.0000\n", "presence 0.8889 0.8235 0.8550 68.0000\n", "accuracy 0.8733 0.8733 0.8733 0.8733\n", "macro avg 0.8755 0.8691 0.8713 150.0000\n", "weighted avg 0.8742 0.8733 0.8728 150.0000\n", "\n", "====================== After backward selection =======================\n", "\n", " Discriminant Analysis - Results \n", "\n", "Summary Information:\n", " Infos Value DF DF value\n", "0 Total Sample Size 150 DF Total 149\n", "1 Variables 7 DF Within Classes 148\n", "2 Classes 2 DF Between Classes 1\n", "\n", "Class Level Information:\n", " Frequency Proportion Prior Probability\n", "absence 82 0.5467 0.5467\n", "presence 68 0.4533 0.4533\n", "\n", "Pooled Covariance Matrix Information:\n", " Rank Natural Log of the Determinant\n", "Pooled 7 -10.5735\n", "\n", "Linear Discriminant Function for disease:\n", " absence presence\n", "Constant -3.5002 -6.2648\n", "sexmale 3.0078 4.6211\n", "chestpainatypicalAngina 4.6553 1.9352\n", "chestpainnonAnginal 4.6453 2.1583\n", "chestpaintypicalAngina 2.6881 -2.0794\n", "oldpeak 0.7868 1.8112\n", "vesselsColored 0.7138 2.0291\n", "thalreversableEffect 0.6085 2.4439\n", "\n", "Classification Summary for Calibration Data:\n", "\n", "Observation Profile:\n", " Read Used\n", "Number of Observations 150 150\n", "\n", "Number of Observations Classified into disease:\n", "prediction absence presence Total\n", "disease \n", "absence 72 10 82\n", "presence 12 56 68\n", "Total 84 66 150\n", "\n", "Percent Classified into disease:\n", "prediction absence presence Total\n", "disease \n", "absence 87.8049 12.1951 100.0\n", "presence 17.6471 82.3529 100.0\n", "Total 56.0000 44.0000 100.0\n", "Priors 0.5467 0.4533 NaN\n", "\n", "Error Count Estimates for disease:\n", " absence presence Total\n", "Rate 0.1220 0.1765 0.1467\n", "Priors 0.5467 0.4533 NaN\n", "\n", "Classification Report for disease:\n", " precision recall f1-score support\n", "absence 0.8571 0.8780 0.8675 82.0000\n", "presence 0.8485 0.8235 0.8358 68.0000\n", "accuracy 0.8533 0.8533 0.8533 0.8533\n", "macro avg 0.8528 0.8508 0.8516 150.0000\n", "weighted avg 0.8532 0.8533 0.8531 150.0000\n" ] } ], "source": [ "from discrimintools import summarySTEPDISC\n", "summarySTEPDISC(clf2,detailed=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Evaluation of prediction on testing dataset\n", "\n", "#### Testing data" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 120 entries, 150 to 269\n", "Data columns (total 14 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 disease 120 non-null object \n", " 1 age 120 non-null int64 \n", " 2 sex 120 non-null object \n", " 3 chestpain 120 non-null object \n", " 4 restbpress 120 non-null int64 \n", " 5 cholesteral 120 non-null int64 \n", " 6 sugar 120 non-null object \n", " 7 electro 120 non-null object \n", " 8 maxHeartRate 120 non-null int64 \n", " 9 ExerciseAngina 120 non-null object \n", " 10 oldpeak 120 non-null float64\n", " 11 slope 120 non-null object \n", " 12 vesselsColored 120 non-null int64 \n", " 13 thal 120 non-null object \n", "dtypes: float64(1), int64(5), object(8)\n", "memory usage: 14.1+ KB\n" ] } ], "source": [ "#testining data\n", "DTest = load_heart(\"test\")\n", "DTest.info()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Observation Profile:\n", " Read Used\n", "Number of Observations 120 120\n", "\n", "Number of Observations Classified into disease:\n", "prediction absence presence Total\n", "disease \n", "absence 59 9 68\n", "presence 11 41 52\n", "Total 70 50 120\n", "\n", "Percent Classified into disease:\n", "prediction absence presence Total\n", "disease \n", "absence 86.764706 13.235294 100.0\n", "presence 21.153846 78.846154 100.0\n", "Total 58.333333 41.666667 100.0\n", "Priors 0.546667 0.453333 NaN\n", "\n", "Error Count Estimates for disease:\n", " absence presence Total\n", "Rate 0.132353 0.211538 0.16825\n", "Priors 0.546667 0.453333 NaN\n", "\n", "Classification Report for disease:\n", " precision recall f1-score support\n", "absence 0.842857 0.867647 0.855072 68.000000\n", "presence 0.820000 0.788462 0.803922 52.000000\n", "accuracy 0.833333 0.833333 0.833333 0.833333\n", "macro avg 0.831429 0.828054 0.829497 120.000000\n", "weighted avg 0.832952 0.833333 0.832907 120.000000\n" ] } ], "source": [ "#split into X and y\n", "yTest, XTest = DTest[\"disease\"], DTest.drop(columns=[\"disease\"])\n", "eval_test = clf2.eval_predict(XTest,yTest,verbose=True)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 2 }