{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## GFALDA PCADA - alcools dataset" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "#disable warnings\n", "from warnings import simplefilter, filterwarnings\n", "simplefilter(action='ignore', category=FutureWarning)\n", "filterwarnings(\"ignore\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### alcools dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 52 entries, 0 to 51\n", "Data columns (total 9 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 TYPE 52 non-null object \n", " 1 MEOH 52 non-null float64\n", " 2 ACET 52 non-null float64\n", " 3 BU1 52 non-null float64\n", " 4 BU2 52 non-null float64\n", " 5 ISOP 52 non-null int64 \n", " 6 MEPR 52 non-null float64\n", " 7 PRO1 52 non-null float64\n", " 8 ACAL 52 non-null float64\n", "dtypes: float64(7), int64(1), object(1)\n", "memory usage: 3.8+ KB\n" ] } ], "source": [ "#alcools dataset\n", "from discrimintools.datasets import load_alcools\n", "D = load_alcools()\n", "D.info()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "#split into X and y\n", "y, X = D[\"TYPE\"], D.drop(columns=[\"TYPE\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Instanciation and training" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#instanciation and training\n", "from discrimintools import GFALDA\n", "clf = GFALDA(n_components=2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### `fit` function" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
GFALDA()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "GFALDA()" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#fit function\n", "clf.fit(X,y)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Canonical coefficients" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('standardized', 'raw', 'projection')" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#canonical coefficients\n", "cancoef = clf.cancoef_\n", "cancoef._fields" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Standardized canonical coefficients" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Can1 Can2\n", "MEOH 0.520673 0.045841\n", "ACET 0.060796 0.347537\n", "BU1 0.456099 -0.026945\n", "BU2 -0.029491 0.576889\n", "ISOP 0.459790 0.009933\n", "MEPR 0.482844 0.087627\n", "PRO1 -0.195016 0.645653\n", "ACAL 0.183659 0.344884\n" ] } ], "source": [ "#standardized canonical coefficients\n", "print(cancoef.standardized)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Pojection canonical coefficients" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Can1 Can2\n", "MEOH 0.065084 0.005730\n", "ACET 0.007599 0.043442\n", "BU1 0.057012 -0.003368\n", "BU2 -0.003686 0.072111\n", "ISOP 0.057474 0.001242\n", "MEPR 0.060356 0.010953\n", "PRO1 -0.024377 0.080707\n", "ACAL 0.022957 0.043110\n" ] } ], "source": [ "#projection canonical coefficients\n", "print(cancoef.projection)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Raw canonical coefficients" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Can1 Can2\n", "Constant -3.927165 -2.192304\n", "MEOH 0.001422 0.000125\n", "ACET 0.000503 0.002873\n", "BU1 0.041763 -0.002467\n", "BU2 -0.000547 0.010691\n", "ISOP 0.009610 0.000208\n", "MEPR 0.026395 0.004790\n", "PRO1 -0.000314 0.001041\n", "ACAL 0.023025 0.043237\n" ] } ], "source": [ "#raw canonical coefficients\n", "print(cancoef.raw)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Coefficients" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('standardized', 'raw', 'projection')" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#coefficients\n", "coef = clf.coef_\n", "coef._fields" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Standardized coefficients" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " KIRSCH MIRAB POIRE\n", "Constant -2.199936 -1.311627 -1.560770\n", "MEOH -0.658592 0.081985 0.498314\n", "ACET -0.038280 -0.071141 0.085894\n", "BU1 -0.584486 0.087645 0.431080\n", "BU2 0.102704 -0.141331 0.018700\n", "ISOP -0.585029 0.079604 0.437572\n", "MEPR -0.605651 0.065386 0.465763\n", "PRO1 0.321480 -0.187052 -0.132969\n", "ACAL -0.195208 -0.048617 0.202390\n" ] } ], "source": [ "#standardized coefficients\n", "print(coef.standardized)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Projection coefficients" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " KIRSCH MIRAB POIRE\n", "Constant -2.199936 -1.311627 -1.560770\n", "MEOH -0.082324 0.010248 0.062289\n", "ACET -0.004785 -0.008893 0.010737\n", "BU1 -0.073061 0.010956 0.053885\n", "BU2 0.012838 -0.017666 0.002337\n", "ISOP -0.073129 0.009950 0.054696\n", "MEPR -0.075706 0.008173 0.058220\n", "PRO1 0.040185 -0.023381 -0.016621\n", "ACAL -0.024401 -0.006077 0.025299\n" ] } ], "source": [ "#projection coefficients\n", "print(coef.projection)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Raw coefficients" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " KIRSCH MIRAB POIRE\n", "Constant 2.559072 -1.494437 -5.468820\n", "MEOH -0.001798 0.000224 0.001361\n", "ACET -0.000316 -0.000588 0.000710\n", "BU1 -0.053519 0.008025 0.039472\n", "BU2 0.001903 -0.002619 0.000347\n", "ISOP -0.012228 0.001664 0.009146\n", "MEPR -0.033108 0.003574 0.025461\n", "PRO1 0.000518 -0.000302 -0.000214\n", "ACAL -0.024473 -0.006095 0.025373\n" ] } ], "source": [ "#raw coefficients\n", "print(coef.raw)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Summary" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " General Factor Analysis Linear Discriminant Analysis - Results \n", "\n", "Class Level Information:\n", " Frequency Proportion Prior Probability\n", "KIRSCH 17 0.3269 0.3269\n", "MIRAB 15 0.2885 0.2885\n", "POIRE 20 0.3846 0.3846\n", "\n", "Importance of components:\n", " Eigenvalue Difference Proportion (%) Cumulative (%)\n", "Can1 2.7988 1.0799 34.9848 34.9848\n", "Can2 1.7188 0.3154 21.4856 56.4703\n", "\n", "Raw Canonical Coefficients:\n", " Can1 Can2\n", "Constant -3.9272 -2.1923\n", "MEOH 0.0014 0.0001\n", "ACET 0.0005 0.0029\n", "BU1 0.0418 -0.0025\n", "BU2 -0.0005 0.0107\n", "ISOP 0.0096 0.0002\n", "MEPR 0.0264 0.0048\n", "PRO1 -0.0003 0.0010\n", "ACAL 0.0230 0.0432\n", "\n", "Projection functions coefficients:\n", " Can1 Can2\n", "MEOH 0.0651 0.0057\n", "ACET 0.0076 0.0434\n", "BU1 0.0570 -0.0034\n", "BU2 -0.0037 0.0721\n", "ISOP 0.0575 0.0012\n", "MEPR 0.0604 0.0110\n", "PRO1 -0.0244 0.0807\n", "ACAL 0.0230 0.0431\n", "\n", "Multivariate Analysis of Variance (MANOVA) Summary:\n", " Statistic Value p-value\n", "0 Wilks' Lambda 0.4279 NaN\n", "1 Bartlett -- C(4) 41.1720 0.0\n", "2 Rao -- F(4,96) 12.6901 0.0\n", "\n", "LDA Classification functions & Statistical Evaluation:\n", " KIRSCH MIRAB POIRE Wilks' Lambda Partial R-Square F Value \\\n", "Constant -2.1999 -1.3116 -1.5608 NaN NaN NaN \n", "Can1 -1.2748 0.1782 0.9499 0.9608 0.4454 29.8899 \n", "Can2 0.1129 -0.2359 0.0810 0.4455 0.9604 0.9908 \n", "\n", " Num DF Den DF Pr>F \n", "Constant NaN NaN NaN \n", "Can1 2.0 48.0 0.0000 \n", "Can2 2.0 48.0 0.3787 \n", "\n", "Classification Summary for Calibration Data:\n", "\n", "Observation Profile:\n", " Read Used\n", "Number of Observations 52 52\n", "\n", "Number of Observations Classified into TYPE:\n", "prediction KIRSCH MIRAB POIRE Total\n", "TYPE \n", "KIRSCH 17 0 0 17\n", "MIRAB 1 9 5 15\n", "POIRE 2 2 16 20\n", "Total 20 11 21 52\n", "\n", "Percent Classified into TYPE:\n", "prediction KIRSCH MIRAB POIRE Total\n", "TYPE \n", "KIRSCH 100.0000 0.0000 0.0000 100.0\n", "MIRAB 6.6667 60.0000 33.3333 100.0\n", "POIRE 10.0000 10.0000 80.0000 100.0\n", "Total 38.4615 21.1538 40.3846 100.0\n", "Priors 0.3269 0.2885 0.3846 NaN\n", "\n", "Error Count Estimates for TYPE:\n", " KIRSCH MIRAB POIRE Total\n", "Rate 0.0000 0.4000 0.2000 0.1923\n", "Priors 0.3269 0.2885 0.3846 NaN\n", "\n", "Classification Report for TYPE:\n", " precision recall f1-score support\n", "KIRSCH 0.8500 1.0000 0.9189 17.0000\n", "MIRAB 0.8182 0.6000 0.6923 15.0000\n", "POIRE 0.7619 0.8000 0.7805 20.0000\n", "accuracy 0.8077 0.8077 0.8077 0.8077\n", "macro avg 0.8100 0.8000 0.7972 52.0000\n", "weighted avg 0.8069 0.8077 0.8003 52.0000\n" ] } ], "source": [ "#summary\n", "from discrimintools import summaryGFALDA\n", "summaryGFALDA(clf,detailed=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Evaluation of prediction on testing dataset" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 50 entries, 0 to 49\n", "Data columns (total 9 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 TYPE 50 non-null object \n", " 1 MEOH 50 non-null int64 \n", " 2 ACET 50 non-null int64 \n", " 3 BU1 50 non-null float64\n", " 4 BU2 50 non-null float64\n", " 5 ISOP 50 non-null int64 \n", " 6 MEPR 50 non-null int64 \n", " 7 PRO1 50 non-null int64 \n", " 8 ACAL 50 non-null float64\n", "dtypes: float64(3), int64(5), object(1)\n", "memory usage: 3.6+ KB\n", "None\n" ] } ], "source": [ "#testing data\n", "DTest = load_alcools(\"test\")\n", "print(DTest.info())" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Observation Profile:\n", " Read Used\n", "Number of Observations 50 50\n", "\n", "Number of Observations Classified into TYPE:\n", "prediction KIRSCH MIRAB POIRE Total\n", "TYPE \n", "KIRSCH 11 2 1 14\n", "MIRAB 2 10 5 17\n", "POIRE 2 3 14 19\n", "Total 15 15 20 50\n", "\n", "Percent Classified into TYPE:\n", "prediction KIRSCH MIRAB POIRE Total\n", "TYPE \n", "KIRSCH 78.571429 14.285714 7.142857 100.0\n", "MIRAB 11.764706 58.823529 29.411765 100.0\n", "POIRE 10.526316 15.789474 73.684211 100.0\n", "Total 30.000000 30.000000 40.000000 100.0\n", "Priors 0.326923 0.288462 0.384615 NaN\n", "\n", "Error Count Estimates for TYPE:\n", " KIRSCH MIRAB POIRE Total\n", "Rate 0.214286 0.411765 0.263158 0.290048\n", "Priors 0.326923 0.288462 0.384615 NaN\n", "\n", "Classification Report for TYPE:\n", " precision recall f1-score support\n", "KIRSCH 0.733333 0.785714 0.758621 14.0\n", "MIRAB 0.666667 0.588235 0.625000 17.0\n", "POIRE 0.700000 0.736842 0.717949 19.0\n", "accuracy 0.700000 0.700000 0.700000 0.7\n", "macro avg 0.700000 0.703597 0.700523 50.0\n", "weighted avg 0.698000 0.700000 0.697734 50.0\n" ] } ], "source": [ "#split into X and y\n", "yTest, XTest = DTest[\"TYPE\"], DTest.drop(columns=[\"TYPE\"])\n", "#evaluation on testing data\n", "evl_test = clf.eval_predict(XTest,yTest,verbose=True)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 2 }