Source code for access.fca

import warnings
import numpy as np
import pandas as pd

from .weights import step_fn


[docs]def weighted_catchment(
    loc_df,
    cost_df,
    max_cost=None,
    cost_source="origin",
    cost_dest="dest",
    cost_cost="cost",
    loc_index="geoid",
    loc_value=None,
    weight_fn=None,
    three_stage_weight=None,
):
    """
    Calculation of the floating catchment (buffered) accessibility
    sum, from DataFrames with computed distances.
    This catchment may be either a simple buffer -- with cost below
    a single threshold -- or an additional weight may be applied
    as a function of the access cost.

    Parameters
    ----------

    loc_df         : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
                 should contain at _least_ a list of the locations (`df_dest`) at which facilities are located.
    loc_index   : {bool, str}
                 is the the name of the df column that holds the facility locations.
                 If it is a bool, then the it the location is already on the index.
    loc_value   : str
                 If this value is `None`, a count will be used in place of a weight.
                 Use this, for instance, to count restaurants, instead of total doctors in a practice.
    cost_df    : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
                 This dataframe contains the precomputed costs from an origin/index location to destinations.
    cost_source : str
                 The name of the column name of the index locations -- this is what will be grouped.
    cost_dest  : str
                 The name of the column name of the destination locations.
                 This is what will be _in_ each group.
    cost_cost  : str
                 This is is the name of the cost column.
    weight_fn  : function
                 This function will weight the value of resources/facilities,
                 as a function of the raw cost.
    max_cost   : float
                 This is the maximum cost to consider in the weighted sum;
                 note that it applies _along with_ the weight function.

    Returns
    -------
    resources  : pandas.Series
                 A -- potentially weighted -- sum of resources, facilities, or consumers.
    """
    # merge the loc dataframe and cost dataframe together
    if loc_index is True:
        temp = pd.merge(cost_df, loc_df, left_on=cost_source, right_index=True)
    else:
        temp = pd.merge(cost_df, loc_df, left_on=cost_source, right_on=loc_index)

    # constrain by max cost
    if max_cost is not None:
        temp = temp[temp[cost_cost] < max_cost].copy()

    # apply a weight function if inputted -- either enhanced two stage or three stage
    if weight_fn:
        if three_stage_weight is not None:
            new_loc_value_column = temp[loc_value] * temp.W3 * temp.G
            temp = temp.drop([loc_value], axis=1)
            temp[loc_value] = new_loc_value_column
        else:
            temp[loc_value] *= temp[cost_cost].apply(weight_fn)

    return temp.groupby([cost_dest])[loc_value].sum()


[docs]def fca_ratio(
    demand_df,
    supply_df,
    demand_cost_df,
    supply_cost_df,
    max_cost,
    demand_index="geoid",
    demand_name="demand",
    supply_index="geoid",
    supply_name="supply",
    demand_cost_origin="origin",
    demand_cost_dest="dest",
    demand_cost_name="cost",
    supply_cost_origin="origin",
    supply_cost_dest="dest",
    supply_cost_name="cost",
    weight_fn=None,
    normalize=False,
    noise="quiet",
):
    """Calculation of the floating catchment accessibility
    ratio, from DataFrames with precomputed distances.
    This is accomplished through two calls of the :meth:`Access.access.weighted_catchment` method.

    Parameters
    ----------

    demand_df          : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
                         The origins dataframe, containing a location index and a total demand.
    supply_df          : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
                         The origins dataframe, containing a location index and level of supply
    demand_cost_df     : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
                         This dataframe contains a link between neighboring demand locations, and a cost between them.
    supply_cost_df     : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
                         This dataframe contains a link between neighboring supply locations, and a cost between them.
    max_cost           : float
                         This is the maximum cost to consider in the weighted sum;
                         note that it applies *along with* the weight function.
    demand_index       : str
                         is the name of the column that holds the IDs.
    demand_name       : str
                         is the name of the column of `demand` that holds the aggregate demand at a location.
    supply_index       : str
                         is the name of the column that holds the IDs.
    supply_name       : str
                         is the name of the column of `supply_df` that holds the aggregate supply at a location.
    demand_cost_origin : str
                         The column name of the index locations -- this is what will be grouped.
    demand_cost_dest   : str
                         The column name of the index locations -- this is what will be grouped.
    demand_cost_name   : str
                         The column name of the travel cost.
    supply_cost_origin : str
                         The column name of the index locations -- this is what will be grouped.
    supply_cost_dest   : str
                         The column name of the index locations -- this is what will be grouped.
    supply_cost_name   : str
                         The column name of the travel cost.
    weight_fn          : function
                         This function will weight the value of resources/facilities,
                         as a function of the raw cost.
    normalize          : bool
                         True to normalize the FCA series, by default False.
    noise              : str
                         Default 'quiet', otherwise gives messages that indicate potential issues.

    Returns
    -------
    access     : pandas.Series
                 A -- potentially-weighted -- access ratio.
    """

    # if there is a discrepancy between the demand and supply cost dataframe locations, print it
    if (
        len(
            set(demand_df.index.tolist())
            - set(supply_cost_df[supply_cost_dest].unique())
        )
        != 0
    ):
        warnings.warn("some tracts may be unaccounted for in supply_cost", stacklevel=1)

    # get a series of the total demand within the buffer zone
    total_demand_series = weighted_catchment(
        demand_df,
        demand_cost_df,
        max_cost,
        cost_source=demand_cost_dest,
        cost_dest=demand_cost_origin,
        cost_cost=demand_cost_name,
        loc_index=demand_index,
        loc_value=demand_name,
        weight_fn=weight_fn,
    )
    # get a series of the total supply within the buffer zone
    total_supply_series = weighted_catchment(
        supply_df,
        supply_cost_df,
        max_cost,
        cost_source=supply_cost_dest,
        cost_dest=supply_cost_origin,
        cost_cost=supply_cost_name,
        loc_index=supply_index,
        loc_value=supply_name,
        weight_fn=weight_fn,
    )

    # join the aggregate demand and the aggregate supply into one dataframe
    temp = (
        total_supply_series.to_frame(name="supply")
        .join(total_demand_series.to_frame(name="demand"), how="right")
        .fillna(0)
    )

    # calculate the floating catchement area, or supply divided by demand
    temp["FCA"] = temp["supply"] / temp["demand"]
    base_FCA_series = temp["FCA"]

    if noise != "quiet":
        # depending on the version history of the census tract data you use, this will print out the tracts that have undefined FCA values
        print(base_FCA_series[pd.isna(base_FCA_series)])

    return base_FCA_series


[docs]def two_stage_fca(
    demand_df,
    supply_df,
    cost_df,
    max_cost=None,
    demand_index="geoid",
    demand_name="demand",
    supply_index="geoid",
    supply_name="supply",
    cost_origin="origin",
    cost_dest="dest",
    cost_name="cost",
    weight_fn=None,
    normalize=False,
):
    """
    Calculation of the two-stage floating catchment accessibility
    ratio, from DataFrames with precomputed distances.
    This is accomplished through a single call of the `access.weighted_catchment` method,
    to retrieve the patients using each provider.
    The ratio of providers per patient is then calculated at each care destination,
    and that ratio is weighted and summed at each corresponding demand site.
    This is based on the original paper by Luo and Wang :cite:`2002_luo_spatial_accessibility_chicago`,
    as extended by Luo and Qi :cite:`2009_luo_qi_E2SFCA`
    and McGrail and Humphreys :cite:`2009_mcgrail_improved_2SFCA`.

    Parameters
    ----------

    demand_df     : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
                    The origins dataframe, containing a location index and a total demand.
    demand_origin : str
                    is the name of the column of `demand_df` that holds the origin ID.
    demand_value  : str
                    is the name of the column of `demand_df` that holds the aggregate demand at a location.
    supply_df     : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
                    The origins dataframe, containing a location index and level of supply
    supply_origin : str
                    is the name of the column of `supply_df` that holds the origin ID.
    supply_value  : str
                    is the name of the column of `supply_df` that holds the aggregate demand at a location.
    cost_df       : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
                    This dataframe contains a link between neighboring demand locations, and a cost between them.
    cost_origin   : str
                    The column name of the locations of users or consumers.
    cost_dest     : str
                    The column name of the supply or resource locations.
    cost_name     : str
                    The column name of the travel cost between origins and destinations
    weight_fn  : function
                 This fucntion will weight the value of resources/facilities,
                 as a function of the raw cost.
    max_cost   : float
                 This is the maximum cost to consider in the weighted sum;
                   note that it applies _along with_ the weight function.
    normalize  : bool
                  True to normalize the FCA series, by default False.
    Returns
    -------
    access     : pandas.Series
                 A -- potentially-weighted -- two-stage access ratio.
    """
    # get a series of total demand then calculate the supply to total demand ratio for each location
    total_demand_series = weighted_catchment(
        demand_df,
        cost_df,
        max_cost,
        cost_source=cost_origin,
        cost_dest=cost_dest,
        cost_cost=cost_name,
        loc_index=demand_index,
        loc_value=demand_name,
        weight_fn=weight_fn,
    )

    # create a temporary dataframe, temp, that holds the supply and aggregate demand at each location
    total_demand_series.name += "_W"
    temp = supply_df.join(total_demand_series, how="right")

    # there may be NA values due to a shorter supply dataframe than the demand dataframe.
    # in this case, replace any potential NA values(which correspond to supply locations with no supply) with 0.
    temp[supply_name].fillna(0, inplace=True)

    # calculate the fractional ratio of supply to aggregate demand at each location, or Rl
    temp["Rl"] = temp[supply_name] / temp[demand_name + "_W"]

    # separate the fractional ratio of supply to aggregate demand at each location, or Rl, into a new dataframe
    supply_to_total_demand_frame = pd.DataFrame(data={"Rl": temp["Rl"]})
    supply_to_total_demand_frame.index.name = "geoid"

    # sum, into a series, the supply to total demand ratios for each location
    two_stage_fca_series = weighted_catchment(
        supply_to_total_demand_frame,
        cost_df,
        max_cost,
        cost_source=cost_dest,
        cost_dest=cost_origin,
        cost_cost=cost_name,
        loc_index="geoid",
        loc_value="Rl",
        weight_fn=weight_fn,
    )

    return two_stage_fca_series


[docs]def three_stage_fca(
    demand_df,
    supply_df,
    cost_df,
    max_cost,
    demand_index="geoid",
    demand_name="demand",
    supply_index="geoid",
    supply_name="supply",
    cost_origin="origin",
    cost_dest="dest",
    cost_name="cost",
    weight_fn=None,
    normalize=False,
):
    """Calculation of the three-stage floating catchment accessibility
    ratio, from DataFrames with precomputed distances.
    This is accomplished through a single call of the :meth:`access.access.weighted_catchment` method,
    to retrieve the patients using each provider.
    The ratio of providers per patient is then calculated at each care destination,
    and that ratio is weighted and summed at each corresponding demand site.
    The only difference weight respect to the 2SFCA method is that,
    in addition to a distance-dependent weight (`weight_fn`),
    a preference weight *G* is calculated.  That calculation
    uses the value :math:`\\beta`.
    See the original paper by Wan, Zou, and Sternberg. :cite:`2012_wan_3SFCA`

    Parameters
    ----------

    demand_df     : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
                    The origins dataframe, containing a location index and a total demand.
    demand_origin : str
                    is the name of the column of `demand` that holds the origin ID.
    demand_value  : str
                    is the name of the column of `demand` that holds the aggregate demand at a location.
    supply_df     : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
                    The origins dataframe, containing a location index and level of supply
    supply_df     : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
                    The origins dataframe, containing a location index and level of supply
    cost_df       : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
                    This dataframe contains a link between neighboring demand locations, and a cost between them.
    cost_origin   : str
                    The column name of the locations of users or consumers.
    cost_dest     : str
                    The column name of the supply or resource locations.
    cost_name     : str
                    The column name of the travel cost between origins and destinations
    weight_fn  : function
                 This fucntion will weight the value of resources/facilities,
                 as a function of the raw cost.
    max_cost   : float
                 This is the maximum cost to consider in the weighted sum;
                 note that it applies *along with* the weight function.
    preference_weight_beta : float
                             Parameter scaling with the gaussian weights,
                             used to generate preference weights.

    Returns
    -------
    access     : pandas.Series
                 A -- potentially-weighted -- three-stage access ratio.
    """

    # create preference weight 'G', which is the weight
    cost_df["W3"] = cost_df[cost_name].apply(weight_fn)
    W3sum_frame = (
        cost_df[[cost_origin, "W3"]]
        .groupby(cost_origin)
        .sum()
        .rename(columns={"W3": "W3sum"})
        .reset_index()
    )
    cost_df = pd.merge(cost_df, W3sum_frame)
    cost_df["G"] = cost_df.W3 / cost_df.W3sum

    # get a series of total demand then calculate the supply to total demand ratio for each location
    total_demand_series = weighted_catchment(
        demand_df,
        cost_df,
        max_cost,
        cost_source=cost_origin,
        cost_dest=cost_dest,
        cost_cost=cost_name,
        loc_index=demand_index,
        loc_value=demand_name,
        weight_fn=weight_fn,
        three_stage_weight=True,
    )

    # create a temporary dataframe, temp, that holds the supply and aggregate demand at each location
    total_demand_series.name += "_W"
    temp = supply_df.join(total_demand_series, how="right")

    # there may be NA values due to a shorter supply dataframe than the demand dataframe.
    # in this case, replace any potential NA values(which correspond to supply locations with no supply) with 0.
    temp[supply_name].fillna(0, inplace=True)

    # calculate the fractional ratio of supply to aggregate demand at each location, or Rl
    temp["Rl"] = temp[supply_name] / temp[demand_name + "_W"]

    # separate the fractional ratio of supply to aggregate demand at each location, or Rl, into a new dataframe
    supply_to_total_demand_frame = pd.DataFrame(data={"Rl": temp["Rl"]})
    supply_to_total_demand_frame.index.name = "geoid"

    # sum, into a series, the supply to total demand ratios for each location
    three_stage_fca_series = weighted_catchment(
        supply_to_total_demand_frame,
        cost_df.sort_index(),
        max_cost,
        cost_source=cost_dest,
        cost_dest=cost_origin,
        cost_cost=cost_name,
        loc_index="geoid",
        loc_value="Rl",
        weight_fn=weight_fn,
        three_stage_weight=True,
    )

    # remove the preference weight G from the original costs dataframe
    cost_df.drop(columns=["G", "W3", "W3sum"], inplace=True)

    return three_stage_fca_series