Source code for discrimintools.datasets.load_vote
# -*- coding: utf-8 -*-
from pandas import read_excel
from pathlib import Path
#set directory
DATASETS_DIR = Path(__file__).parent / "data"
[docs]
def load_vote(element="train"):
"""
Congressional Voting Records dataset
Parameters
----------
element : str, default = 'train'
The dataset to load. Possible values are:
- 'subset' for subset of all dataset (with few columns).
- 'train' for training dataset.
- 'test' for testing dataset.
Returns
-------
vote : DataFrame of shape (n_samples, n_columns)
The congressional voting records dataset.
References
----------
[1] Ricco Rakotomalala (2020), « `Pratique de l'Analyse Discriminante Linéaire <https://hal.science/hal-04868585v1/file/Pratique_Analyse_Discriminante_Lineaire.pdf>`_ », Version 1.0, Université Lumière Lyon 2.
Examples
--------
>>> from discrimintools.datasets import load_vote
>>> from discrimintools import GFALDA
>>> D = load_vote("subset") # load subset data
>>> y, X = D["group"], D.drop(columns=["group"]) # split into X and y
>>> clf = GFALDA()
>>> clf.fit(X,y)
GFALDA()
"""
if element == "train":
vote = read_excel(DATASETS_DIR/"vote.xlsx",sheet_name="Feuil1",header=0,index_col=0)
elif element == "test":
vote = read_excel(DATASETS_DIR/"vote.xlsx",sheet_name="Feuil2",header=0,index_col=0)
elif element == "subset":
vote = read_excel(DATASETS_DIR/"vote.xlsx",sheet_name="Feuil3",header=0,index_col=0)
else:
raise ValueError("'element' should be one of 'train', 'test', 'subset'")
#set documentation
vote.__doc__ = """
Congressional Voting Records dataset
"""
return vote