{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## GFALDA DISQUAL - vote dataset" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "#disable warnings\n", "from warnings import simplefilter, filterwarnings\n", "simplefilter(action='ignore', category=FutureWarning)\n", "filterwarnings(\"ignore\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### vote dataset" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 435 entries, 0 to 434\n", "Data columns (total 5 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 handicapped_infants 435 non-null object\n", " 1 water_project_cost_sharin 435 non-null object\n", " 2 adoption_of_the_budget_res 435 non-null object\n", " 3 physician_fee_freeze 435 non-null object\n", " 4 group 435 non-null object\n", "dtypes: object(5)\n", "memory usage: 20.4+ KB\n" ] } ], "source": [ "#vins dataset\n", "from discrimintools.datasets import load_vote\n", "D = load_vote(\"subset\")\n", "D.info()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "#split into X and y\n", "y, X = D[\"group\"], D.drop(columns=[\"group\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Instanciation and training" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
GFALDA()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "GFALDA()" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#instanciation and training\n", "from discrimintools import GFALDA\n", "clf = GFALDA(n_components=2)\n", "clf.fit(X,y)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Canonical coefficients" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('standardized', 'raw', 'projection')" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#canonical coefficients\n", "cancoef = clf.cancoef_\n", "cancoef._fields" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Standardized canonical coefficients" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Can1 Can2\n", "handicapped_infants_n -0.527352 0.710502\n", "handicapped_infants_other 5.522611 2.600607\n", "handicapped_infants_y 0.311144 -1.063560\n", "water_project_cost_sharin_n -0.240703 -0.294945\n", "water_project_cost_sharin_other 1.881943 1.074250\n", "water_project_cost_sharin_y -0.226247 0.025977\n", "adoption_of_the_budget_res_n -0.877486 1.243859\n", "adoption_of_the_budget_res_other 5.926929 3.335334\n", "adoption_of_the_budget_res_y 0.335391 -0.985725\n", "physician_fee_freeze_n 0.344976 -1.018166\n", "physician_fee_freeze_other 5.987091 2.959171\n", "physician_fee_freeze_y -0.853487 1.236927\n" ] } ], "source": [ "#standardized canonical coefficients\n", "print(cancoef.standardized)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Pojection canonical coefficients" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Can1 Can2\n", "handicapped_infants_n -0.131838 0.177625\n", "handicapped_infants_other 1.380653 0.650152\n", "handicapped_infants_y 0.077786 -0.265890\n", "water_project_cost_sharin_n -0.060176 -0.073736\n", "water_project_cost_sharin_other 0.470486 0.268562\n", "water_project_cost_sharin_y -0.056562 0.006494\n", "adoption_of_the_budget_res_n -0.219371 0.310965\n", "adoption_of_the_budget_res_other 1.481732 0.833834\n", "adoption_of_the_budget_res_y 0.083848 -0.246431\n", "physician_fee_freeze_n 0.086244 -0.254541\n", "physician_fee_freeze_other 1.496773 0.739793\n", "physician_fee_freeze_y -0.213372 0.309232\n" ] } ], "source": [ "#projection canonical coefficients\n", "print(cancoef.projection)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Raw canonical coefficients" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Can1 Can2\n", "Constant -17.584809 -9.824230\n", "handicapped_infants_n -0.972027 1.309611\n", "handicapped_infants_other 200.194660 94.272001\n", "handicapped_infants_y 0.723783 -2.474056\n", "water_project_cost_sharin_n -0.545343 -0.668236\n", "water_project_cost_sharin_other 17.055104 9.735387\n", "water_project_cost_sharin_y -0.504706 0.057949\n", "adoption_of_the_budget_res_n -2.232200 3.164202\n", "adoption_of_the_budget_res_other 234.383111 131.897318\n", "adoption_of_the_budget_res_y 0.576660 -1.694824\n", "physician_fee_freeze_n 0.607550 -1.793126\n", "physician_fee_freeze_other 236.762232 117.021769\n", "physician_fee_freeze_y -2.097553 3.039905\n" ] } ], "source": [ "#raw canonical coefficients\n", "print(cancoef.raw)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Coefficients" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('standardized', 'raw', 'projection')" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#coefficients\n", "coef = clf.coef_\n", "coef._fields" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Standardized coefficients" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " democrat republican\n", "Constant -1.304482 -3.013431\n", "handicapped_infants_n -2.959729 4.703855\n", "handicapped_infants_other 1.185292 -1.883767\n", "handicapped_infants_y 3.659211 -5.815532\n", "water_project_cost_sharin_n 0.487460 -0.774713\n", "water_project_cost_sharin_other -0.154333 0.245279\n", "water_project_cost_sharin_y -0.441971 0.702418\n", "adoption_of_the_budget_res_n -5.107770 8.117707\n", "adoption_of_the_budget_res_other -0.343928 0.546600\n", "adoption_of_the_budget_res_y 3.467241 -5.510436\n", "physician_fee_freeze_n 3.579007 -5.688065\n", "physician_fee_freeze_other 0.869829 -1.382407\n", "physician_fee_freeze_y -5.048491 8.023495\n" ] } ], "source": [ "#standardized coefficients\n", "print(coef.standardized)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Projection coefficients" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " democrat republican\n", "Constant -1.304482 -3.013431\n", "handicapped_infants_n -0.739932 1.175964\n", "handicapped_infants_other 0.296323 -0.470942\n", "handicapped_infants_y 0.914803 -1.453883\n", "water_project_cost_sharin_n 0.121865 -0.193678\n", "water_project_cost_sharin_other -0.038583 0.061320\n", "water_project_cost_sharin_y -0.110493 0.175604\n", "adoption_of_the_budget_res_n -1.276943 2.029427\n", "adoption_of_the_budget_res_other -0.085982 0.136650\n", "adoption_of_the_budget_res_y 0.866810 -1.377609\n", "physician_fee_freeze_n 0.894752 -1.422016\n", "physician_fee_freeze_other 0.217457 -0.345602\n", "physician_fee_freeze_y -1.262123 2.005874\n" ] } ], "source": [ "#projection coefficients\n", "print(coef.projection)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Raw coefficients" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " democrat republican\n", "Constant -0.496301 -4.297863\n", "handicapped_infants_n -5.455432 8.670241\n", "handicapped_infants_other 42.966824 -68.286560\n", "handicapped_infants_y 8.512069 -13.528109\n", "water_project_cost_sharin_n 1.104401 -1.755208\n", "water_project_cost_sharin_other -1.398640 2.222839\n", "water_project_cost_sharin_y -0.985935 1.566932\n", "adoption_of_the_budget_res_n -12.993451 20.650306\n", "adoption_of_the_budget_res_other -13.600791 21.615544\n", "adoption_of_the_budget_res_y 5.961462 -9.474466\n", "physician_fee_freeze_n 6.303110 -10.017443\n", "physician_fee_freeze_other 34.397788 -54.667912\n", "physician_fee_freeze_y -12.407308 19.718758\n" ] } ], "source": [ "#raw coefficients\n", "print(coef.raw)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Summary" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " General Factor Analysis Linear Discriminant Analysis - Results \n", "\n", "Class Level Information:\n", " Frequency Proportion Prior Probability\n", "democrat 267 0.6138 0.6138\n", "republican 168 0.3862 0.3862\n", "\n", "Importance of components:\n", " Eigenvalue Difference Proportion (%) Cumulative (%)\n", "Can1 0.5323 0.0242 26.6143 26.6143\n", "Can2 0.5081 0.2534 25.4039 52.0182\n", "\n", "Raw Canonical Coefficients:\n", " Can1 Can2\n", "Constant -17.5848 -9.8242\n", "handicapped_infants_n -0.9720 1.3096\n", "handicapped_infants_other 200.1947 94.2720\n", "handicapped_infants_y 0.7238 -2.4741\n", "water_project_cost_sharin_n -0.5453 -0.6682\n", "water_project_cost_sharin_other 17.0551 9.7354\n", "water_project_cost_sharin_y -0.5047 0.0579\n", "adoption_of_the_budget_res_n -2.2322 3.1642\n", "adoption_of_the_budget_res_other 234.3831 131.8973\n", "adoption_of_the_budget_res_y 0.5767 -1.6948\n", "physician_fee_freeze_n 0.6075 -1.7931\n", "physician_fee_freeze_other 236.7622 117.0218\n", "physician_fee_freeze_y -2.0976 3.0399\n", "\n", "Projection functions coefficients:\n", " Can1 Can2\n", "handicapped_infants_n -0.1318 0.1776\n", "handicapped_infants_other 1.3807 0.6502\n", "handicapped_infants_y 0.0778 -0.2659\n", "water_project_cost_sharin_n -0.0602 -0.0737\n", "water_project_cost_sharin_other 0.4705 0.2686\n", "water_project_cost_sharin_y -0.0566 0.0065\n", "adoption_of_the_budget_res_n -0.2194 0.3110\n", "adoption_of_the_budget_res_other 1.4817 0.8338\n", "adoption_of_the_budget_res_y 0.0838 -0.2464\n", "physician_fee_freeze_n 0.0862 -0.2545\n", "physician_fee_freeze_other 1.4968 0.7398\n", "physician_fee_freeze_y -0.2134 0.3092\n", "\n", "Multivariate Analysis of Variance (MANOVA) Summary:\n", " Statistic Value p-value\n", "0 Wilks' Lambda 0.2772 NaN\n", "1 Bartlett -- C(2) 554.1935 0.0\n", "2 Rao -- F(2,432) 563.0956 0.0\n", "\n", "LDA Classification functions & Statistical Evaluation:\n", " democrat republican Wilks' Lambda Partial R-Square F Value \\\n", "Constant -1.3045 -3.0134 NaN NaN NaN \n", "Can1 1.6126 -2.5629 0.4479 0.6190 265.9263 \n", "Can2 -2.9688 4.7182 0.8293 0.3343 860.2649 \n", "\n", " Num DF Den DF Pr>F \n", "Constant NaN NaN NaN \n", "Can1 1.0 432.0 0.0 \n", "Can2 1.0 432.0 0.0 \n", "\n", "Classification Summary for Calibration Data:\n", "\n", "Observation Profile:\n", " Read Used\n", "Number of Observations 435 435\n", "\n", "Number of Observations Classified into group:\n", "prediction democrat republican Total\n", "group \n", "democrat 244 23 267\n", "republican 14 154 168\n", "Total 258 177 435\n", "\n", "Percent Classified into group:\n", "prediction democrat republican Total\n", "group \n", "democrat 91.3858 8.6142 100.0\n", "republican 8.3333 91.6667 100.0\n", "Total 59.3103 40.6897 100.0\n", "Priors 0.6138 0.3862 NaN\n", "\n", "Error Count Estimates for group:\n", " democrat republican Total\n", "Rate 0.0861 0.0833 0.0851\n", "Priors 0.6138 0.3862 NaN\n", "\n", "Classification Report for group:\n", " precision recall f1-score support\n", "democrat 0.9457 0.9139 0.9295 267.0000\n", "republican 0.8701 0.9167 0.8928 168.0000\n", "accuracy 0.9149 0.9149 0.9149 0.9149\n", "macro avg 0.9079 0.9153 0.9111 435.0000\n", "weighted avg 0.9165 0.9149 0.9153 435.0000\n" ] } ], "source": [ "#summary\n", "from discrimintools import summaryGFALDA\n", "summaryGFALDA(clf,detailed=True)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 2 }