Source code for ineqpy.statistics

"""Descriptive statistics.

This module contains main descriptive statistics like: mean, variance, etc.

"""

from . import _statistics as stat
from . import utils


[docs]def c_moment( variable=None, weights=None, data=None, order=2, param=None, ddof=0 ): """Calculate central momment. Calculate the central moment of `x` with respect to `param` of order `n`, given the weights `w`. Parameters ---------- variable : 1d-array Variable weights : 1d-array Weights data : pandas.DataFrame Contains all variables needed. order : int, optional Moment order, 2 by default (variance) param : int or array, optional Parameter for which the moment is calculated, the default is None, implies use the mean. ddof : int, optional Degree of freedom, zero by default. Returns ------- central_moment : float Notes ----- - The cmoment of order 1 is 0 - The cmoment of order 2 is the variance. Source : https://en.wikipedia.org/wiki/Moment_(mathematics) Todo ---- Implement: https://en.wikipedia.org/wiki/L-moment#cite_note-wang:96-6 """ variable, weights = utils.extract_values(data, variable, weights) return stat.c_moment(variable, weights, order, param, ddof)
[docs]def percentile( variable=None, weights=None, data=None, p=50, interpolate="lower" ): """Calculate the value of a quantile given a variable and his weights. Parameters ---------- variable : str or array weights : str or array data : pd.DataFrame, optional pd.DataFrame that contains all variables needed. q : float Quantile level, if pass 0.5 means median. interpolate : bool Returns ------- percentile : float or pd.Series """ variable, weights = utils.extract_values(data, variable, weights) return stat.percentile(variable, weights, p, interpolate)
[docs]def std_moment( variable=None, weights=None, data=None, param=None, order=3, ddof=0 ): """Calculate standarized momment. Calculate the standardized moment of order `c` for the variable` x` with respect to `c`. Parameters ---------- variable : 1d-array Random Variable weights : 1d-array, optional Weights or probability data : pd.DataFrame, optional pd.DataFrame that contains all variables needed. order : int, optional Order of Moment, three by default param : int or float or array, optional Central trend, default is the mean. ddof : int, optional Degree of freedom. Returns ------- std_moment : float Returns the standardized `n` order moment. References ---------- - https://en.wikipedia.org/wiki/Moment_(mathematics) - https://en.wikipedia.org/wiki/Standardized_moment Todo ---- It is the general case of the raw and central moments. Review implementation. """ variable, weights = utils.extract_values(data, variable, weights) return stat.std_moment(variable, weights, param, order, ddof)
[docs]def mean(variable=None, weights=None, data=None): """Calculate the mean of `variable` given `weights`. Parameters ---------- variable : array-like or str Variable on which the mean is estimated. weights : array-like or str Weights of the `x` variable. data : pandas.DataFrame Is possible pass a DataFrame with variable and weights, then you must pass as `variable` and `weights` the column name stored in `data`. Returns ------- mean : array-like or float """ # if pass a DataFrame separate variables. if data is not None: variable, weights = utils.extract_values(data, variable, weights) return stat.mean(variable, utils.not_empty_weights(weights, variable))
[docs]def density(variable=None, weights=None, groups=None, data=None): """Density in percentage. Calculates density in percentage. This make division of variable inferring width in groups as max - min. Parameters ---------- variable : numpy.array or pandas.DataFrame Main variable. weights : numpy.array or pandas.DataFrame Weights of main variable. groups : numpy.array or pandas.DataFrame Label that show which group each element belongs to. data : pd.DataFrame, optional Object that contains all variables needed. Returns ------- density : array-like References ---------- Histogram. (2017, May 9). In Wikipedia, The Free Encyclopedia. Retrieved 14:47, May 15, 2017, from https://en.wikipedia.org/w/index.php?title=Histogram&oldid=779516918 """ variable, weights = utils.extract_values(data, variable, weights) if groups is not None: groups = data[groups].values return stat.density(variable, weights, groups)
[docs]def var(variable=None, weights=None, data=None, ddof=0): """Calculate the variance. Calculate the population variance of `variable` given `weights`. Parameters ---------- data : pd.DataFrame, optional pd.DataFrame that contains all variables needed. variable : 1d-array or pd.Series or pd.DataFrame Variable on which the quasivariation is estimated weights : 1d-array or pd.Series or pd.DataFrame Weights of the `variable`. data : pd.DataFrame Object that contains all variables needed. ddof : int Degree of freedom. Returns ------- variance : 1d-array or pd.Series or float Estimation of quasivariance of `variable` References ---------- Moment (mathematics). (2017, May 6). In Wikipedia, The Free Encyclopedia. Retrieved 14:40, May 15, 2017, from https://en.wikipedia.org/w/index.php?title=Moment_(mathematics) Notes ----- If stratificated sample must pass with groupby each strata. """ variable, weights = utils.extract_values(data, variable, weights) return stat.var(variable, weights, ddof)
[docs]def coef_variation(variable=None, weights=None, data=None): """Calculate the coefficient of variation. Calculate the coefficient of variation of a `variable` given weights. The coefficient of variation is the square root of the variance of the incomes divided by the mean income. It has the advantages of being mathematically tractable and is subgroup decomposable, but is not bounded from above. Parameters ---------- variable : array-like or str weights : array-like or str data : pandas.DataFrame Returns ------- coefficient_variation : float References ---------- Coefficient of variation. (2017, May 5). In Wikipedia, The Free Encyclopedia. Retrieved 15:03, May 15, 2017, from https://en.wikipedia.org/w/index.php?title=Coefficient_of_variation """ # TODO complete docstring variable, weights = utils.extract_values(data, variable, weights) return stat.coef_variation(variable, weights)
[docs]def kurt(variable=None, weights=None, data=None): """Calculate the Kurtosis coefficient. Parameters ---------- variable : 1d-array weights : 1d-array data : pandas.DataFrame Object which stores ``variable`` and ``weights``. Returns ------- kurt : float Kurtosis coefficient. References ---------- Moment (mathematics). (2017, May 6). In Wikipedia, The Free Encyclopedia. Retrieved 14:40, May 15, 2017, from https://en.wikipedia.org/w/index.php?title=Moment_(mathematics) Notes ----- It is an alias of the standardized fourth-order moment. """ variable, weights = utils.extract_values(data, variable, weights) return stat.kurt(variable, weights)
[docs]def skew(variable=None, weights=None, data=None): """Return the asymmetry coefficient of a sample. Parameters ---------- data : pandas.DataFrame variable : array-like, str weights : array-like, str data : pandas.DataFrame Object which stores ``variable`` and ``weights``. Returns ------- skew : float References ---------- Moment (mathematics). (2017, May 6). In Wikipedia, The Free Encyclopedia. Retrieved 14:40, May 15, 2017, from https://en.wikipedia.org/w/index.php?title=Moment_(mathematics) Notes ----- It is an alias of the standardized third-order moment. """ variable, weights = utils.extract_values(data, variable, weights) return stat.skew(variable, weights)