Source code for ineqpy.grouped.stats

"""Stats' module."""
from .. import utils
from .._statistics import c_moment, mean, std_moment


[docs]def variance_hat_group(data=None, variable="x", weights="w", group="h"): """Calculate variance. Data a DataFrame calculates the sample variance for each stratum. The objective of this function is to make it easy to calculate the moments of the distribution that follows an estimator, eg. Can be used to calculate the variance that follows the mean. Parameters ---------- data : pandas.DataFrame Dataframe containing the series needed for the calculation x : str weights : str Name of the weights `w` in the DataFrame group : str Name of the stratum variable `h` in the DataFrame Returns ------- vhat_h : pandas.Series A series with the values of the variance of each `h` stratum. Todo ---- Review improvements. Examples -------- >>> # Computes the variance of the mean >>> data = pd.DataFrame(data=[renta, peso, estrato], columns=["renta", "peso", "estrato"]) >>> v = variance_hat_group(data) >>> v stratum 1 700.917.728,64 2 9.431.897.980,96 3 317.865.839.789,10 4 741.304.873.092,88 5 535.275.436.859,10 6 225.573.783.240,68 7 142.048.272.010,63 8 40.136.989.131,06 9 18.501.808.022,56 dtype: float64 >>> # the value of de variance of the mean: >>> v_total = v.sum() / peso.sum() ** 2 24662655225.947945 """ if data is None: data = utils._to_df(x=variable, weights=weights, group=group) variable = "x" weights = "weights" group = "group" def v(df): r"""Calculate the variance of each stratum `h`. Parameters ---------- df : pandas.DataFrame Dataframe containing the data. Returns ------- vhat : float Value of the population variance for the stratum `h`. Notes ----- Source: .. math:: r`N_h ^2 \cdot fpc \cdot \frac{ \hatS ^2 _h }{n_h}` """ xi = df[variable].copy().values Nh = df[weights].sum() fpc = 1 - (len(df) / Nh) ddof = 1 if len(df) > 1 else 0 shat2h = c_moment(variable=xi, order=2, ddof=ddof) return (Nh ** 2) * fpc * shat2h / len(df) return data.groupby(group).apply(v)
[docs]def moment_group(data=None, variable="x", weights="w", group="h", order=2): """Calculate the asymmetry of each `h` stratum. Parameters ---------- variable : array or str weights : array or str group : array or str data : pd.DataFrame, optional order : int, optional Returns ------- moment_of_order : float TODO ---- Review calculations, it does not appear to be correct. Attempt to make a generalization of vhat_group, for any estimator. .. warning:: Actually Does Not Work! """ if data is None: data = utils._to_df(x=variable, weights=weights, group=group) variable = "x" weights = "weights" group = "group" def mh(df, weights=weights): x = df[variable].copy().values weights = utils.not_empty_weights(weights, x) Nh = df.loc[:, weights].sum() fpc = 1 - (len(df) / Nh) ddof = 1 if len(df) > 1 else 0 stdm = std_moment(variable=x, weights=weights, order=order, ddof=ddof) return (Nh ** order) * fpc * stdm / len(df) return data.groupby(group).apply(mh)
[docs]def quasivariance_hat_group( data=None, variable=None, weights=None, group=None ): """Calculate quasivariance. Sample variance of `variable`, calculated as the second-order central moment. Parameters ---------- data : pd.DataFrame, optional pd.DataFrame that contains all variables needed. variable : array or str variable `x` apply the statistic. If `data` is None then must pass this argument as array, else as string name in `data` weights : array or str weights can be interpreted as frequency, probability, density function of `x`, each element in `x`. If `data` is None then must pass this argument as array, else as string name in `data` group : array or str group is a categorical variable to calculate the statistical by each group. If `data` is None then must pass this argument as array, else as string name in `data` Returns ------- shat2_group : array or pd.Series References ---------- Moment (mathematics). (2017, May 6). In Wikipedia, The Free Encyclopedia. Retrieved 14:40, May 15, 2017, from https://en.wikipedia.org/w/index.php?title=Moment_(mathematics)&oldid=778996402 Notes ----- This function is useful to calculate the variance of the mean. TODO ---- Review function """ if data is None: data = utils._to_df(x=variable, weights=weights) variable = "x" weights = "weights" def sd(df): x = variable return c_moment(variable=x, weights=weights, param=mean(x)) return data.groupby(group).apply(sd)