Source code for ineqpy.grouped.inequality

"""inequality module."""
import pandas as pd
from typing import Union
import numpy as np

from .. import inequality as ineq
from .. import _statistics as stats
from .. import utils


[docs]def atkinson_group( data: pd.DataFrame = None, income: Union[str, pd.DataFrame, np.ndarray] = None, weights: Union[str, pd.DataFrame, np.ndarray] = None, group: Union[str, pd.DataFrame, np.ndarray] = None, e: float = 0.5, ): r"""Calculate atkinson index. The Atkinson index (also known as the Atkinson measure or Atkinson grouped measure) is a measure of income grouped developed by British economist Anthony Barnes Atkinson. The measure is useful in determining which end of the distribution contributed most to the observed grouped.The index is subgroup decomposable. This means that overall grouped in the population can be computed as the sum of the corresponding Atkinson indices within each group, and the Atkinson index of the group mean incomes. Parameters ---------- income : str or np.array Income variable, you can pass name of variable in `df` or array-like weights : str or np.array probability or weights, you can pass name of variable in `df` or array-like groups : str or np.array stratum, name of stratum in `df` or array-like e : int, optional Value of epsilon parameter data : pd.DataFrame, optional DataFrame that's contains the previous data. Returns ------- atkinson_by_group : float Reference --------- Atkinson index. (2017, March 12). In Wikipedia, The Free Encyclopedia. Retrieved 14:52, May 15, 2017, from https://en.wikipedia.org/w/index.php?title=Atkinson_index&oldid=769991852 TODO ---- - Review function, has different results with stata. """ if (weights is None) and (data is None): weights = utils.not_empty_weights(weights, income) if data is None: data = utils._to_df(income=income, weights=weights, group=group) income = "income" weights = "weights" group = "group" N = data.shape[0] def a_h(df): """Funtion alias to calculate atkinson from a DataFrame.""" if df is None: raise ValueError inc = df[income].values w = df[weights].values atk = ineq.atkinson(income=inc, weights=w, e=e) out = atk * (len(df) / N) return out # main calc: data = data.copy() groupped = data.groupby(group) atk_by_group = groupped.apply(a_h) mu_by_group = groupped.apply(lambda d: stats.mean(d[income], d[weights])) out = atk_by_group.sum() + ineq.atkinson(income=mu_by_group.values) return out