{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## GFALDA DISQUAL - mushromm dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#disable warnings\n", "from warnings import simplefilter, filterwarnings\n", "simplefilter(action='ignore', category=FutureWarning)\n", "filterwarnings(\"ignore\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### vote dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 52 entries, 0 to 51\n", "Data columns (total 9 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 TYPE 52 non-null object \n", " 1 MEOH 52 non-null float64\n", " 2 ACET 52 non-null float64\n", " 3 BU1 52 non-null float64\n", " 4 BU2 52 non-null float64\n", " 5 ISOP 52 non-null int64 \n", " 6 MEPR 52 non-null float64\n", " 7 PRO1 52 non-null float64\n", " 8 ACAL 52 non-null float64\n", "dtypes: float64(7), int64(1), object(1)\n", "memory usage: 3.8+ KB\n" ] } ], "source": [ "#vins dataset\n", "from discrimintools.datasets import load_mushroom\n", "D = load_mushroom()\n", "D.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#split into X and y\n", "y, X = D[\"classe\"], D.drop(columns=[\"classe\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Instanciation and training" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#instanciation and training\n", "from discrimintools import GFALDA\n", "clf = GFALDA(n_components=5)\n", "clf.fit(X,y)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Canonical coefficients" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('standardized', 'raw', 'projection')" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#canonical coefficients\n", "cancoef = clf.cancoef_\n", "cancoef._fields" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Standardized canonical coefficients" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Can1 Can2\n", "MEOH 0.520673 0.045841\n", "ACET 0.060796 0.347537\n", "BU1 0.456099 -0.026945\n", "BU2 -0.029491 0.576889\n", "ISOP 0.459790 0.009933\n", "MEPR 0.482844 0.087627\n", "PRO1 -0.195016 0.645653\n", "ACAL 0.183659 0.344884\n" ] } ], "source": [ "#standardized canonical coefficients\n", "print(cancoef.standardized)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Pojection canonical coefficients" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Can1 Can2\n", "MEOH 0.065084 0.005730\n", "ACET 0.007599 0.043442\n", "BU1 0.057012 -0.003368\n", "BU2 -0.003686 0.072111\n", "ISOP 0.057474 0.001242\n", "MEPR 0.060356 0.010953\n", "PRO1 -0.024377 0.080707\n", "ACAL 0.022957 0.043110\n" ] } ], "source": [ "#projection canonical coefficients\n", "print(cancoef.projection)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Raw canonical coefficients" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Can1 Can2\n", "Constant -3.927165 -2.192304\n", "MEOH 0.001422 0.000125\n", "ACET 0.000503 0.002873\n", "BU1 0.041763 -0.002467\n", "BU2 -0.000547 0.010691\n", "ISOP 0.009610 0.000208\n", "MEPR 0.026395 0.004790\n", "PRO1 -0.000314 0.001041\n", "ACAL 0.023025 0.043237\n" ] } ], "source": [ "#raw canonical coefficients\n", "print(cancoef.raw)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Coefficients" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('standardized', 'raw', 'projection')" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#coefficients\n", "coef = clf.coef_\n", "coef._fields" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Standardized coefficients" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " KIRSCH MIRAB POIRE\n", "Constant -2.199936 -1.311627 -1.560770\n", "MEOH -0.658592 0.081985 0.498314\n", "ACET -0.038280 -0.071141 0.085894\n", "BU1 -0.584486 0.087645 0.431080\n", "BU2 0.102704 -0.141331 0.018700\n", "ISOP -0.585029 0.079604 0.437572\n", "MEPR -0.605651 0.065386 0.465763\n", "PRO1 0.321480 -0.187052 -0.132969\n", "ACAL -0.195208 -0.048617 0.202390\n" ] } ], "source": [ "#standardized coefficients\n", "print(coef.standardized)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Projection coefficients" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " KIRSCH MIRAB POIRE\n", "Constant -2.199936 -1.311627 -1.560770\n", "MEOH -0.082324 0.010248 0.062289\n", "ACET -0.004785 -0.008893 0.010737\n", "BU1 -0.073061 0.010956 0.053885\n", "BU2 0.012838 -0.017666 0.002337\n", "ISOP -0.073129 0.009950 0.054696\n", "MEPR -0.075706 0.008173 0.058220\n", "PRO1 0.040185 -0.023381 -0.016621\n", "ACAL -0.024401 -0.006077 0.025299\n" ] } ], "source": [ "#projection coefficients\n", "print(coef.projection)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Raw coefficients" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " KIRSCH MIRAB POIRE\n", "Constant 2.559072 -1.494437 -5.468820\n", "MEOH -0.001798 0.000224 0.001361\n", "ACET -0.000316 -0.000588 0.000710\n", "BU1 -0.053519 0.008025 0.039472\n", "BU2 0.001903 -0.002619 0.000347\n", "ISOP -0.012228 0.001664 0.009146\n", "MEPR -0.033108 0.003574 0.025461\n", "PRO1 0.000518 -0.000302 -0.000214\n", "ACAL -0.024473 -0.006095 0.025373\n" ] } ], "source": [ "#raw coefficients\n", "print(coef.raw)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Summary" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " General Factor Analysis Linear Discriminant Analysis - Results \n", "\n", "Class Level Information:\n", " Frequency Proportion Prior Probability\n", "KIRSCH 17 0.3269 0.3269\n", "MIRAB 15 0.2885 0.2885\n", "POIRE 20 0.3846 0.3846\n", "\n", "Importance of components:\n", " Eigenvalue Difference Proportion (%) Cumulative (%)\n", "Can1 2.7988 1.0799 34.9848 34.9848\n", "Can2 1.7188 0.3154 21.4856 56.4703\n", "\n", "Raw Canonical Coefficients:\n", " Can1 Can2\n", "Constant -3.9272 -2.1923\n", "MEOH 0.0014 0.0001\n", "ACET 0.0005 0.0029\n", "BU1 0.0418 -0.0025\n", "BU2 -0.0005 0.0107\n", "ISOP 0.0096 0.0002\n", "MEPR 0.0264 0.0048\n", "PRO1 -0.0003 0.0010\n", "ACAL 0.0230 0.0432\n", "\n", "Projection functions coefficients:\n", " Can1 Can2\n", "MEOH 0.0651 0.0057\n", "ACET 0.0076 0.0434\n", "BU1 0.0570 -0.0034\n", "BU2 -0.0037 0.0721\n", "ISOP 0.0575 0.0012\n", "MEPR 0.0604 0.0110\n", "PRO1 -0.0244 0.0807\n", "ACAL 0.0230 0.0431\n", "\n", "Multivariate Analysis of Variance (MANOVA) Summary:\n", " Statistic Value p-value\n", "0 Wilks' Lambda 0.4279 NaN\n", "1 Bartlett -- C(4) 41.1720 0.0\n", "2 Rao -- F(4,96) 12.6901 0.0\n", "\n", "LDA Classification functions & Statistical Evaluation:\n", " KIRSCH MIRAB POIRE Wilks' Lambda Partial R-Square F Value \\\n", "Constant -2.1999 -1.3116 -1.5608 NaN NaN NaN \n", "Can1 -1.2748 0.1782 0.9499 0.9608 0.4454 29.8899 \n", "Can2 0.1129 -0.2359 0.0810 0.4455 0.9604 0.9908 \n", "\n", " Num DF Den DF Pr>F \n", "Constant NaN NaN NaN \n", "Can1 2.0 48.0 0.0000 \n", "Can2 2.0 48.0 0.3787 \n", "\n", "Classification Summary for Calibration Data:\n", "\n", "Observation Profile:\n", " Read Used\n", "Number of Observations 52 52\n", "\n", "Number of Observations Classified into TYPE:\n", "prediction KIRSCH MIRAB POIRE Total\n", "TYPE \n", "KIRSCH 17 0 0 17\n", "MIRAB 1 9 5 15\n", "POIRE 2 2 16 20\n", "Total 20 11 21 52\n", "\n", "Percent Classified into TYPE:\n", "prediction KIRSCH MIRAB POIRE Total\n", "TYPE \n", "KIRSCH 100.0000 0.0000 0.0000 100.0\n", "MIRAB 6.6667 60.0000 33.3333 100.0\n", "POIRE 10.0000 10.0000 80.0000 100.0\n", "Total 38.4615 21.1538 40.3846 100.0\n", "Priors 0.3269 0.2885 0.3846 NaN\n", "\n", "Error Count Estimates for TYPE:\n", " KIRSCH MIRAB POIRE Total\n", "Rate 0.0000 0.4000 0.2000 0.1923\n", "Priors 0.3269 0.2885 0.3846 NaN\n", "\n", "Classification Report for TYPE:\n", " precision recall f1-score support\n", "KIRSCH 0.8500 1.0000 0.9189 17.0000\n", "MIRAB 0.8182 0.6000 0.6923 15.0000\n", "POIRE 0.7619 0.8000 0.7805 20.0000\n", "accuracy 0.8077 0.8077 0.8077 0.8077\n", "macro avg 0.8100 0.8000 0.7972 52.0000\n", "weighted avg 0.8069 0.8077 0.8003 52.0000\n" ] } ], "source": [ "#summary\n", "from discrimintools import summaryGFALDA\n", "summaryGFALDA(clf,detailed=True)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 2 }