Source code for access.fca

import warnings
import numpy as np
import pandas as pd

from .weights import step_fn


[docs]def weighted_catchment( loc_df, cost_df, max_cost=None, cost_source="origin", cost_dest="dest", cost_cost="cost", loc_index="geoid", loc_value=None, weight_fn=None, three_stage_weight=None, ): """ Calculation of the floating catchment (buffered) accessibility sum, from DataFrames with computed distances. This catchment may be either a simple buffer -- with cost below a single threshold -- or an additional weight may be applied as a function of the access cost. Parameters ---------- loc_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ should contain at _least_ a list of the locations (`df_dest`) at which facilities are located. loc_index : {bool, str} is the the name of the df column that holds the facility locations. If it is a bool, then the it the location is already on the index. loc_value : str If this value is `None`, a count will be used in place of a weight. Use this, for instance, to count restaurants, instead of total doctors in a practice. cost_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ This dataframe contains the precomputed costs from an origin/index location to destinations. cost_source : str The name of the column name of the index locations -- this is what will be grouped. cost_dest : str The name of the column name of the destination locations. This is what will be _in_ each group. cost_cost : str This is is the name of the cost column. weight_fn : function This function will weight the value of resources/facilities, as a function of the raw cost. max_cost : float This is the maximum cost to consider in the weighted sum; note that it applies _along with_ the weight function. Returns ------- resources : pandas.Series A -- potentially weighted -- sum of resources, facilities, or consumers. """ # merge the loc dataframe and cost dataframe together if loc_index is True: temp = pd.merge(cost_df, loc_df, left_on=cost_source, right_index=True) else: temp = pd.merge(cost_df, loc_df, left_on=cost_source, right_on=loc_index) # constrain by max cost if max_cost is not None: temp = temp[temp[cost_cost] < max_cost].copy() # apply a weight function if inputted -- either enhanced two stage or three stage if weight_fn: if three_stage_weight is not None: new_loc_value_column = temp[loc_value] * temp.W3 * temp.G temp = temp.drop([loc_value], axis=1) temp[loc_value] = new_loc_value_column else: temp[loc_value] *= temp[cost_cost].apply(weight_fn) return temp.groupby([cost_dest])[loc_value].sum()
[docs]def fca_ratio( demand_df, supply_df, demand_cost_df, supply_cost_df, max_cost, demand_index="geoid", demand_name="demand", supply_index="geoid", supply_name="supply", demand_cost_origin="origin", demand_cost_dest="dest", demand_cost_name="cost", supply_cost_origin="origin", supply_cost_dest="dest", supply_cost_name="cost", weight_fn=None, normalize=False, noise="quiet", ): """Calculation of the floating catchment accessibility ratio, from DataFrames with precomputed distances. This is accomplished through two calls of the :meth:`Access.access.weighted_catchment` method. Parameters ---------- demand_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ The origins dataframe, containing a location index and a total demand. supply_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ The origins dataframe, containing a location index and level of supply demand_cost_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ This dataframe contains a link between neighboring demand locations, and a cost between them. supply_cost_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ This dataframe contains a link between neighboring supply locations, and a cost between them. max_cost : float This is the maximum cost to consider in the weighted sum; note that it applies *along with* the weight function. demand_index : str is the name of the column that holds the IDs. demand_name : str is the name of the column of `demand` that holds the aggregate demand at a location. supply_index : str is the name of the column that holds the IDs. supply_name : str is the name of the column of `supply_df` that holds the aggregate supply at a location. demand_cost_origin : str The column name of the index locations -- this is what will be grouped. demand_cost_dest : str The column name of the index locations -- this is what will be grouped. demand_cost_name : str The column name of the travel cost. supply_cost_origin : str The column name of the index locations -- this is what will be grouped. supply_cost_dest : str The column name of the index locations -- this is what will be grouped. supply_cost_name : str The column name of the travel cost. weight_fn : function This function will weight the value of resources/facilities, as a function of the raw cost. normalize : bool True to normalize the FCA series, by default False. noise : str Default 'quiet', otherwise gives messages that indicate potential issues. Returns ------- access : pandas.Series A -- potentially-weighted -- access ratio. """ # if there is a discrepancy between the demand and supply cost dataframe locations, print it if ( len( set(demand_df.index.tolist()) - set(supply_cost_df[supply_cost_dest].unique()) ) != 0 ): warnings.warn("some tracts may be unaccounted for in supply_cost", stacklevel=1) # get a series of the total demand within the buffer zone total_demand_series = weighted_catchment( demand_df, demand_cost_df, max_cost, cost_source=demand_cost_dest, cost_dest=demand_cost_origin, cost_cost=demand_cost_name, loc_index=demand_index, loc_value=demand_name, weight_fn=weight_fn, ) # get a series of the total supply within the buffer zone total_supply_series = weighted_catchment( supply_df, supply_cost_df, max_cost, cost_source=supply_cost_dest, cost_dest=supply_cost_origin, cost_cost=supply_cost_name, loc_index=supply_index, loc_value=supply_name, weight_fn=weight_fn, ) # join the aggregate demand and the aggregate supply into one dataframe temp = ( total_supply_series.to_frame(name="supply") .join(total_demand_series.to_frame(name="demand"), how="right") .fillna(0) ) # calculate the floating catchement area, or supply divided by demand temp["FCA"] = temp["supply"] / temp["demand"] base_FCA_series = temp["FCA"] if noise != "quiet": # depending on the version history of the census tract data you use, this will print out the tracts that have undefined FCA values print(base_FCA_series[pd.isna(base_FCA_series)]) return base_FCA_series
[docs]def two_stage_fca( demand_df, supply_df, cost_df, max_cost=None, demand_index="geoid", demand_name="demand", supply_index="geoid", supply_name="supply", cost_origin="origin", cost_dest="dest", cost_name="cost", weight_fn=None, normalize=False, ): """ Calculation of the two-stage floating catchment accessibility ratio, from DataFrames with precomputed distances. This is accomplished through a single call of the `access.weighted_catchment` method, to retrieve the patients using each provider. The ratio of providers per patient is then calculated at each care destination, and that ratio is weighted and summed at each corresponding demand site. This is based on the original paper by Luo and Wang :cite:`2002_luo_spatial_accessibility_chicago`, as extended by Luo and Qi :cite:`2009_luo_qi_E2SFCA` and McGrail and Humphreys :cite:`2009_mcgrail_improved_2SFCA`. Parameters ---------- demand_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ The origins dataframe, containing a location index and a total demand. demand_origin : str is the name of the column of `demand_df` that holds the origin ID. demand_value : str is the name of the column of `demand_df` that holds the aggregate demand at a location. supply_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ The origins dataframe, containing a location index and level of supply supply_origin : str is the name of the column of `supply_df` that holds the origin ID. supply_value : str is the name of the column of `supply_df` that holds the aggregate demand at a location. cost_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ This dataframe contains a link between neighboring demand locations, and a cost between them. cost_origin : str The column name of the locations of users or consumers. cost_dest : str The column name of the supply or resource locations. cost_name : str The column name of the travel cost between origins and destinations weight_fn : function This fucntion will weight the value of resources/facilities, as a function of the raw cost. max_cost : float This is the maximum cost to consider in the weighted sum; note that it applies _along with_ the weight function. normalize : bool True to normalize the FCA series, by default False. Returns ------- access : pandas.Series A -- potentially-weighted -- two-stage access ratio. """ # get a series of total demand then calculate the supply to total demand ratio for each location total_demand_series = weighted_catchment( demand_df, cost_df, max_cost, cost_source=cost_origin, cost_dest=cost_dest, cost_cost=cost_name, loc_index=demand_index, loc_value=demand_name, weight_fn=weight_fn, ) # create a temporary dataframe, temp, that holds the supply and aggregate demand at each location total_demand_series.name += "_W" temp = supply_df.join(total_demand_series, how="right") # there may be NA values due to a shorter supply dataframe than the demand dataframe. # in this case, replace any potential NA values(which correspond to supply locations with no supply) with 0. temp[supply_name].fillna(0, inplace=True) # calculate the fractional ratio of supply to aggregate demand at each location, or Rl temp["Rl"] = temp[supply_name] / temp[demand_name + "_W"] # separate the fractional ratio of supply to aggregate demand at each location, or Rl, into a new dataframe supply_to_total_demand_frame = pd.DataFrame(data={"Rl": temp["Rl"]}) supply_to_total_demand_frame.index.name = "geoid" # sum, into a series, the supply to total demand ratios for each location two_stage_fca_series = weighted_catchment( supply_to_total_demand_frame, cost_df, max_cost, cost_source=cost_dest, cost_dest=cost_origin, cost_cost=cost_name, loc_index="geoid", loc_value="Rl", weight_fn=weight_fn, ) return two_stage_fca_series
[docs]def three_stage_fca( demand_df, supply_df, cost_df, max_cost, demand_index="geoid", demand_name="demand", supply_index="geoid", supply_name="supply", cost_origin="origin", cost_dest="dest", cost_name="cost", weight_fn=None, normalize=False, ): """Calculation of the three-stage floating catchment accessibility ratio, from DataFrames with precomputed distances. This is accomplished through a single call of the :meth:`access.access.weighted_catchment` method, to retrieve the patients using each provider. The ratio of providers per patient is then calculated at each care destination, and that ratio is weighted and summed at each corresponding demand site. The only difference weight respect to the 2SFCA method is that, in addition to a distance-dependent weight (`weight_fn`), a preference weight *G* is calculated. That calculation uses the value :math:`\\beta`. See the original paper by Wan, Zou, and Sternberg. :cite:`2012_wan_3SFCA` Parameters ---------- demand_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ The origins dataframe, containing a location index and a total demand. demand_origin : str is the name of the column of `demand` that holds the origin ID. demand_value : str is the name of the column of `demand` that holds the aggregate demand at a location. supply_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ The origins dataframe, containing a location index and level of supply supply_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ The origins dataframe, containing a location index and level of supply cost_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ This dataframe contains a link between neighboring demand locations, and a cost between them. cost_origin : str The column name of the locations of users or consumers. cost_dest : str The column name of the supply or resource locations. cost_name : str The column name of the travel cost between origins and destinations weight_fn : function This fucntion will weight the value of resources/facilities, as a function of the raw cost. max_cost : float This is the maximum cost to consider in the weighted sum; note that it applies *along with* the weight function. preference_weight_beta : float Parameter scaling with the gaussian weights, used to generate preference weights. Returns ------- access : pandas.Series A -- potentially-weighted -- three-stage access ratio. """ # create preference weight 'G', which is the weight cost_df["W3"] = cost_df[cost_name].apply(weight_fn) W3sum_frame = ( cost_df[[cost_origin, "W3"]] .groupby(cost_origin) .sum() .rename(columns={"W3": "W3sum"}) .reset_index() ) cost_df = pd.merge(cost_df, W3sum_frame) cost_df["G"] = cost_df.W3 / cost_df.W3sum # get a series of total demand then calculate the supply to total demand ratio for each location total_demand_series = weighted_catchment( demand_df, cost_df, max_cost, cost_source=cost_origin, cost_dest=cost_dest, cost_cost=cost_name, loc_index=demand_index, loc_value=demand_name, weight_fn=weight_fn, three_stage_weight=True, ) # create a temporary dataframe, temp, that holds the supply and aggregate demand at each location total_demand_series.name += "_W" temp = supply_df.join(total_demand_series, how="right") # there may be NA values due to a shorter supply dataframe than the demand dataframe. # in this case, replace any potential NA values(which correspond to supply locations with no supply) with 0. temp[supply_name].fillna(0, inplace=True) # calculate the fractional ratio of supply to aggregate demand at each location, or Rl temp["Rl"] = temp[supply_name] / temp[demand_name + "_W"] # separate the fractional ratio of supply to aggregate demand at each location, or Rl, into a new dataframe supply_to_total_demand_frame = pd.DataFrame(data={"Rl": temp["Rl"]}) supply_to_total_demand_frame.index.name = "geoid" # sum, into a series, the supply to total demand ratios for each location three_stage_fca_series = weighted_catchment( supply_to_total_demand_frame, cost_df.sort_index(), max_cost, cost_source=cost_dest, cost_dest=cost_origin, cost_cost=cost_name, loc_index="geoid", loc_value="Rl", weight_fn=weight_fn, three_stage_weight=True, ) # remove the preference weight G from the original costs dataframe cost_df.drop(columns=["G", "W3", "W3sum"], inplace=True) return three_stage_fca_series