Source code for access.access

import pandas as pd
import requests
import warnings
import logging

from . import fca
from . import raam
from . import weights
from . import helpers
from .datasets import Datasets

access_log_stream = logging.StreamHandler()
access_log_format = logging.Formatter("%(name)s %(levelname)-8s :: %(message)s")
access_log_stream.setFormatter(access_log_format)


[docs]class Access: """ Spatial Access Class Parameters ---------- demand_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ or `geopandas.GeoDataFrame <http://geopandas.org/reference/geopandas.GeoDataFrame.html>`_ The origins dataframe, containing a location index and, optionally, a level of demand and geometry. demand_index : {bool, str} boolean of True indicates that the locations are already on the df index; otherwise the argument is a string containing the name of the column of `demand_df` that holds the origin ID. demand_value : str is the name of the column of `demand` that holds the aggregate demand at a location. supply_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ or `geopandas.GeoDataFrame <http://geopandas.org/reference/geopandas.GeoDataFrame.html>`_ The origins dataframe, containing a location index and, optionally, level of supply and geometry. supply_index : {bool, str} boolean of True indicates that the locations are already on the df index; otherwise the argument is a string containing the name of the column of `supply_df` that holds the origin ID. supply_value : {str, list} is the name of the column of `supply` that holds the aggregate supply at a location, or a list of such columns. cost_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ This dataframe contains a link from demand to supply locations, and a cost between them. cost_origin : str The column name of the index locations -- this is what will be grouped by. cost_dest : str The column name of the neighborhing demand locations -- this is what goes in the groups. cost_name : {str, list} The column(s) name of the travel cost(s). neighbor_cost_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ This dataframe contains a link from demand to neighbor locations, and a cost between them (running consumer to supplier). neighbor_cost_origin : str The column name of the origin locations -- this is what will be grouped by. neighbor_cost_dest : str The column name of the destination locations -- this is what goes in the groups. neighbor_cost_name : {str, list} The column name(s) of the travel cost(s). Attributes ---------- Access : pandas.DataFrame All of the calculated access measures. access_metadata : pandas.DataFrame Lists currently-available measures of access. cost_metadata : pandas.DataFrame Describes each of the currently-available supply to demand costs. """ logger_initialized = False
[docs] def __init__( self, demand_df, demand_value, supply_df, supply_value=False, demand_index=True, supply_index=True, cost_df=None, cost_origin=None, cost_dest=None, cost_name=None, neighbor_cost_df=None, neighbor_cost_origin=None, neighbor_cost_dest=None, neighbor_cost_name=None, ): """ Initialize the class. Examples -------- Import the base `Access` class and `Datasets`. >>> from access import Access, Datasets Load each of the example datasets: >>> chi_docs_dents = Datasets.load_data('chi_doc') >>> chi_population = Datasets.load_data('chi_pop') >>> chi_travel_costs = Datasets.load_data('chi_times') >>> chi_docs_dents.head() geoid doc dentist 0 17031010100 1 1 1 17031010201 0 1 2 17031010202 4 1 3 17031010300 4 1 4 17031010400 0 2 >>> chi_population.head() geoid pop 0 17031010100 4854 1 17031010201 6450 2 17031010202 2818 3 17031010300 6236 4 17031010400 5042 >>> chi_travel_costs.head() origin dest cost 0 17093890101 17031010100 91.20 1 17093890101 17031010201 92.82 2 17093890101 17031010202 92.95 3 17093890101 17031010300 89.40 4 17093890101 17031010400 84.97 Using the example data, create an `Access` object. >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid", demand_value = "pop", supply_df = chi_docs_dents, supply_index = "geoid", supply_value = ["doc", "dentist"], cost_df = chi_travel_costs, cost_origin = "origin", cost_dest = "destination", cost_name = "cost") """ self.log = logging.getLogger("access") if not Access.logger_initialized: self.log.addHandler(access_log_stream) self.log.setLevel(logging.INFO) self.log.propagate = False Access.logger_initialized = True self.supply_value_provided = True ### First all the dummy checks... if demand_index is not True and demand_index not in demand_df.columns: raise ValueError( "demand_index must either be True -- or it must be a column of demand_df" ) if demand_value not in demand_df.columns: raise ValueError( "demand_value must either be True -- or it must be a column of demand_df" ) if supply_index is not True and supply_index not in supply_df.columns: raise ValueError( "supply_index must either be True -- or it must be a column of supply_df" ) if type(supply_value) is str and supply_value not in supply_df.columns: raise ValueError("supply_value must be a column of supply_df") if type(supply_value) is list: if any([sv not in supply_df.columns for sv in supply_value]): raise ValueError("supply_value must be columns of supply_df") if cost_df is not None: if cost_origin not in cost_df.columns: raise ValueError("cost_origin must be a column of cost_df") if cost_dest not in cost_df.columns: raise ValueError("cost_dest must be a column of cost_df") if type(cost_name) is str and cost_name not in cost_df.columns: raise ValueError("cost_name must be a column of cost_df") if type(cost_name) is list: if any([cn not in cost_df.columns for cn in cost_name]): raise ValueError("cost_name must be columns of cost_df") if neighbor_cost_df is not None: if neighbor_cost_origin not in neighbor_cost_df.columns: raise ValueError( "neighbor_cost_origin must be a column of neighbor_cost_df" ) if neighbor_cost_dest not in neighbor_cost_df.columns: raise ValueError( "neighbor_cost_dest must be a column of neighbor_cost_df" ) if ( type(neighbor_cost_name) is str and neighbor_cost_name not in neighbor_cost_df.columns ): raise ValueError("neighbor_cost_name must be a column of cost_df") if type(neighbor_cost_name) is list: if any( [cn not in neighbor_cost_df.columns for cn in neighbor_cost_name] ): raise ValueError("neighbor_cost_names must be columns of cost_df") ### Now load the demand DFs. self.demand_df = demand_df.copy() self.demand_value = demand_value if demand_index is not True: self.demand_df.set_index(demand_index, inplace=True) ### And now the supply DFs. self.supply_df = supply_df.copy() if supply_value == False: self.log.info( """Warning: A supply value was not provided, so a default supply value of 1 was created in the column named "value". Note that without a supply value, you cannot use any of the floating catchment area methods.""" ) self.supply_value_provided = False supply_value = "value" self.supply_df[supply_value] = 1 if type(supply_value) is str: self.supply_types = [supply_value] elif type(supply_value) is list: self.supply_types = supply_value else: raise ValueError("supply_value must be string or list of strings.") if supply_index is not True: self.supply_df.set_index(supply_index, inplace=True) if cost_df is not None: self.cost_df = cost_df self.cost_origin = cost_origin self.cost_dest = cost_dest if type(cost_name) is str: self.cost_names = [cost_name] elif type(cost_name) is list: self.cost_names = cost_name else: raise ValueError("cost_name must be string or list of strings.") self._default_cost = self.cost_names[0] else: self.cost_df = pd.DataFrame(columns=["origin", "dest"]) self.cost_origin = "origin" self.cost_dest = "dest" self.cost_names = [] if neighbor_cost_df is not None: self.neighbor_cost_df = neighbor_cost_df self.neighbor_cost_origin = neighbor_cost_origin self.neighbor_cost_dest = neighbor_cost_dest self.neighbor_cost_name = neighbor_cost_name if type(neighbor_cost_name) is str: self.neighbor_cost_names = [neighbor_cost_name] elif type(neighbor_cost_name) is list: self.neighbor_cost_names = neighbor_cost_name else: raise ValueError( "neighbor_cost_name must be string or list of strings." ) self._neighbor_default_cost = self.neighbor_cost_names[0] else: self.neighbor_cost_df = pd.DataFrame(columns=["origin", "dest"]) self.neighbor_cost_origin = "origin" self.neighbor_cost_dest = "dest" self.neighbor_cost_names = [] self.access_df = self.demand_df[[self.demand_value]].sort_index() self.access = pd.DataFrame(index=self.supply_df.index) self.access_metadata = pd.DataFrame( columns=["name", "distance", "function", "descriptor"] ) self.cost_metadata = pd.DataFrame(columns=["name", "type", "descriptor"]) return
[docs] def weighted_catchment( self, name="catchment", supply_cost=None, supply_values=None, weight_fn=None, max_cost=None, normalize=False, ): """ Calculate the catchment area (buffer) aggregate access score. Parameters ---------- name : str Column name for access values supply_cost : str Name of supply cost value column in supply_df supply_values : {str, list} Name(s) of supply values in supply_df weight_fn : function function to apply to the cost to reach the supply. In this way, you could run, e.g., a gravity function. (Be careful of course of values as distances go to 0!) max_cost : float Cutoff of cost values normalize : bool If True, return normalized access values; otherwise, return raw access values Returns ------- access : pandas Series Accessibility score for origin locations. Examples -------- Create an Access object, as detailed in __init__.py >>> illinois_primary_care = Access(<...>) Call the floating catchment area with max_cost only >>> gravity = weights.gravity(scale = 60, alpha = -1) >>> illinois_primary_care.weighted_catchment(weight_fn = gravity) """ supply_cost = helpers.sanitize_supply_cost(self, supply_cost, name) supply_values = helpers.sanitize_supplies(self, supply_values) for s in supply_values: # Bryan consistently flipped origin and destination in this one -- very confusing. series = fca.weighted_catchment( loc_df=self.supply_df, loc_index=True, loc_value=s, cost_df=self.cost_df, cost_source=self.cost_dest, cost_dest=self.cost_origin, cost_cost=self._default_cost, weight_fn=weight_fn, max_cost=max_cost, ) series.name = name + "_" + s if series.name in self.access_df.columns: self.log.info("Overwriting {}.".format(series.name)) self.access_df.drop(series.name, axis=1, inplace=True) # store the raw, un-normalized access values self.access_df = self.access_df.join(series) if normalize: columns = [name + "_" + s for s in supply_values] return helpers.normalized_access(self, columns) return self.access_df.filter(regex="^" + name, axis=1)
[docs] def fca_ratio( self, name="fca", demand_cost=None, supply_cost=None, supply_values=None, max_cost=None, normalize=False, noise="quiet", ): """ Calculate the floating catchment area (buffer) ratio access score. Parameters ---------- name : str Column name for access values demand_cost : str Name of demand cost value column in demand_df supply_cost : str Name of supply cost value column in supply_df supply_values : {str, list} Name(s) of supply values in supply_df max_cost : float Cutoff of cost values normalize : bool If True, return normalized access values; otherwise, return raw access values noise : str Default 'quiet', otherwise gives messages that indicate potential issues. Returns ------- access : pandas Series Accessibility score for origin locations. Examples -------- Import the base `Access` class and `Datasets`. >>> from access import Access, Datasets Load each of the example datasets: >>> chi_docs_dents = Datasets.load_data('chi_doc') >>> chi_population = Datasets.load_data('chi_pop') >>> chi_travel_costs = Datasets.load_data('chi_times') >>> chi_docs_dents.head() geoid doc dentist 0 17031010100 1 1 1 17031010201 0 1 2 17031010202 4 1 3 17031010300 4 1 4 17031010400 0 2 >>> chi_population.head() geoid pop 0 17031010100 4854 1 17031010201 6450 2 17031010202 2818 3 17031010300 6236 4 17031010400 5042 >>> chi_travel_costs.head() origin dest cost 0 17093890101 17031010100 91.20 1 17093890101 17031010201 92.82 2 17093890101 17031010202 92.95 3 17093890101 17031010300 89.40 4 17093890101 17031010400 84.97 Using the example data, create an `Access` object. >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid", demand_value = "pop", supply_df = chi_docs_dents, supply_index = "geoid", supply_value = ["doc", "dentist"], cost_df = chi_travel_costs, cost_origin = "origin", cost_dest = "destination", cost_name = "cost", neighbor_cost_df = chi_travel_costs, neighbor_cost_origin = "origin", neighbor_cost_dest = 'dest', neighbor_cost_name = 'cost') >>> chicago_primary_care.fca_ratio(name='fca',max_cost=30) fca_doc fca_dentist geoid 17031010100 0.001630 0.000807 17031010201 0.001524 0.000904 17031010202 0.001521 0.000908 ........... ........ ........ 17197884101 0.000437 0.000442 17197884103 0.000510 0.000498 17197980100 0.000488 0.000432 """ assert ( self.supply_value_provided == True ), "You must provide a supply value in order to use this functionality." supply_cost = helpers.sanitize_supply_cost(self, supply_cost, name) demand_cost = helpers.sanitize_demand_cost(self, demand_cost, name) supply_values = helpers.sanitize_supplies(self, supply_values) for s in supply_values: series = fca.fca_ratio( demand_df=self.demand_df, demand_index=self.demand_df.index.name, demand_name=self.demand_value, supply_df=self.supply_df, supply_index=self.supply_df.index.name, supply_name=s, demand_cost_df=self.neighbor_cost_df, supply_cost_df=self.cost_df, demand_cost_origin=self.neighbor_cost_origin, demand_cost_dest=self.neighbor_cost_dest, demand_cost_name=demand_cost, supply_cost_origin=self.cost_origin, supply_cost_dest=self.cost_dest, supply_cost_name=supply_cost, max_cost=max_cost, normalize=normalize, noise=noise, ) series.name = name + "_" + s if series.name in self.access_df.columns: self.log.info("Overwriting {}.".format(series.name)) self.access_df.drop(series.name, axis=1, inplace=True) # store the raw, un-normalized access values self.access_df = self.access_df.join(series) if normalize: columns = [name + "_" + s for s in supply_values] return helpers.normalized_access(self, columns) return self.access_df.filter(regex="^" + name, axis=1)
[docs] def raam( self, name="raam", cost=None, supply_values=None, normalize=False, tau=60, rho=None, max_cycles=150, initial_step=0.2, half_life=50, min_step=0.005, verbose=False, ): """Calculate the rational agent access model. :cite:`2019_saxon_snow_raam` Parameters ---------- name : str Column name for access values cost : str Name of cost variable, for reaching supply sites. supply_values : {str, list} Name(s) of supply values in supply_df normalize : bool If True, return normalized access values; otherwise, return raw access values tau : float tau parameter (travel time scale) rho : float rho parameter (congestion cost scale) max_cycles : int How many cycles to run the RAAM optimization for. initial_step : {int, float} If an float < 1, it is the proportion of a demand site that can shift, in the first cycle. If it is an integer, it is simply a limit on the total number. half_life : int How many cycles does it take to halve the move rate? min_step : {int, float} This is the minimum value, to which the moving fraction converges. verbose : bool Print some information as the optimization proceeds. Returns ------- access : pandas Series Accessibility score for origin locations. Examples -------- Import the base `Access` class and `Datasets`. >>> from access import Access, Datasets Load each of the example datasets which correspond to the demand (population), supply (doctors and dentists) and cost (travel time), respectively. The sample data represents the Chicago metro area with a 50km buffer around the city boundaries. >>> chi_docs_dents = Datasets.load_data('chi_doc') >>> chi_population = Datasets.load_data('chi_pop') >>> chi_travel_costs = Datasets.load_data('chi_times') >>> chi_docs_dents.head() geoid doc dentist 0 17031010100 1 1 1 17031010201 0 1 2 17031010202 4 1 3 17031010300 4 1 4 17031010400 0 2 >>> chi_population.head() geoid pop 0 17031010100 4854 1 17031010201 6450 2 17031010202 2818 3 17031010300 6236 4 17031010400 5042 The `chi_travel_costs` dataset is the cost matrix, showing the travel time between each of the Census Tracts in the Chicago metro area. >>> chi_travel_costs.head() origin dest cost 0 17093890101 17031010100 91.20 1 17093890101 17031010201 92.82 2 17093890101 17031010202 92.95 3 17093890101 17031010300 89.40 4 17093890101 17031010400 84.97 Now, create an instance of the `Access` class and specify the demand, supply, and cost datasets. >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid", demand_value = "pop", supply_df = chi_docs_dents, supply_index = "geoid", supply_value = ["doc", "dentist"], cost_df = chi_travel_costs, cost_origin = "origin", cost_dest = "dest", cost_name = "cost") With the demand, supply, and cost data provided, we can now produce the RAAM access measures defining a floating catchment area of 30 minutes by setting the tau value to 30 (60 minutes is the default). >>> chicago_primary_care.raam(tau = 30) raam_doc raam_dentist geoid 17031010100 1.027597 1.137901 17031010201 0.940239 1.332557 17031010202 1.031144 1.413279 ........... ........ ........ 17197884101 2.365171 1.758800 17197884103 2.244007 1.709857 17197980100 2.225820 1.778264 You can access the results stored in the `Access.access_df` attribute. >>> chicago_primary_care.access_df pop raam_doc raam_dentist geoid 17031010100 4854 1.027597 1.137901 17031010201 6450 0.940239 1.332557 17031010202 2818 1.031144 1.413279 ........... .... ........ ........ 17197884101 4166 2.365171 1.758800 17197884103 2776 2.244007 1.709857 17197980100 3264 2.225820 1.778264 By providing a string to the `name` argument, you can call the `Access.raam` method again using a different parameter of tau and save the outputs without overwriting previous ones. >>> chicago_primary_care.raam(name = "raam2", tau = 2) >>> chicago_primary_care.access_df pop raam_doc raam_dentist raam45_doc raam45_dentist geoid 17031010100 4854 1.027597 1.137901 0.967900 1.075116 17031010201 6450 0.940239 1.332557 0.908518 1.133207 17031010202 2818 1.031144 1.413279 0.962915 1.206775 ........... .... ........ ........ ........ ........ 17197884101 4166 2.365171 1.758800 1.921161 1.495642 17197884103 2776 2.244007 1.709857 1.900596 1.517022 17197980100 3264 2.225820 1.778264 1.868281 1.582177 If euclidean costs are available (see :meth:`Access.access.create_euclidean_distance`), you can use euclidean distance instead of time to calculate RAAM access measures. Insted of being measured in minutes, tau would now be measured in meters. >>> chicago_primary_care.raam(name = "raam_euclidean", tau = 100, cost = "euclidean") """ assert ( self.supply_value_provided == True ), "You must provide a supply value in order to use this functionality." cost = helpers.sanitize_supply_cost(self, cost, name) supply_values = helpers.sanitize_supplies(self, supply_values) for s in supply_values: raam_costs = raam.raam( demand_df=self.demand_df, supply_df=self.supply_df, cost_df=self.cost_df, demand_name=self.demand_value, supply_name=s, cost_origin=self.cost_origin, cost_dest=self.cost_dest, cost_name=cost, max_cycles=max_cycles, tau=tau, verbose=verbose, initial_step=initial_step, min_step=min_step, ) raam_costs.name = name + "_" + s if raam_costs.name in self.access_df.columns: self.log.info("Overwriting {}.".format(raam_costs.name)) self.access_df.drop(raam_costs.name, axis=1, inplace=True) # store the raw, un-normalized access values self.access_df = self.access_df.join(raam_costs) if normalize: columns = [name + "_" + s for s in supply_values] return helpers.normalized_access(self, columns) return self.access_df.filter(regex="^" + name, axis=1)
[docs] def two_stage_fca( self, name="2sfca", cost=None, max_cost=None, supply_values=None, weight_fn=None, normalize=False, ): """Calculate the two-stage floating catchment area access score. Note that while the 'traditional' 2SFCA method does not weight inputs, most modern implementations do, and `weight_fn` is allowed as an argument. Parameters ---------- name : str Column name for access values cost : str Name of cost value column in cost_df (supply-side) supply_values : {str, list} supply type or types. max_cost : float Cutoff of cost values weight_fn : function Weight to be applied to access values normalize : bool If True, return normalized access values; otherwise, return raw access values Returns ------- access : pandas Series Accessibility score for origin locations. Examples -------- Import the base `Access` class and `Datasets`. >>> from access import Access, Datasets Load each of the example datasets: >>> chi_docs_dents = Datasets.load_data('chi_doc') >>> chi_population = Datasets.load_data('chi_pop') >>> chi_travel_costs = Datasets.load_data('chi_times') >>> chi_docs_dents.head() geoid doc dentist 0 17031010100 1 1 1 17031010201 0 1 2 17031010202 4 1 3 17031010300 4 1 4 17031010400 0 2 >>> chi_population.head() geoid pop 0 17031010100 4854 1 17031010201 6450 2 17031010202 2818 3 17031010300 6236 4 17031010400 5042 >>> chi_travel_costs.head() origin dest cost 0 17093890101 17031010100 91.20 1 17093890101 17031010201 92.82 2 17093890101 17031010202 92.95 3 17093890101 17031010300 89.40 4 17093890101 17031010400 84.97 Using the example data, create an `Access` object. >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid", demand_value = "pop", supply_df = chi_docs_dents, supply_index = "geoid", supply_value = ["doc", "dentist"], cost_df = chi_travel_costs, cost_origin = "origin", cost_dest = "destination", cost_name = "cost", neighbor_cost_df = chi_travel_costs, neighbor_cost_origin = "origin", neighbor_cost_dest = 'dest', neighbor_cost_name = 'cost') >>> chicago_primary_care.two_stage_fca(name = '2sfca', max_cost = 60) pop 2sfca_doc 2sfca_dentist geoid 17031010100 4854 0.000697 0.000402 17031010201 6450 0.000754 0.000455 17031010202 2818 0.000717 0.000424 ........... .... ........ ........ 17197884101 4166 0.000562 0.000370 17197884103 2776 0.000384 0.000291 17197980100 3264 0.000457 0.000325 To create new values for two-stage catchment areas using a different `max_cost`, you can use a new `name` and a different `max_cost` parameter. >>> chicago_primary_care.two_stage_fca(name = '2sfca30', max_cost = 30) 2sfca30_doc 2sfca30_dentist geoid 17031010100 0.000966 0.000480 17031010201 0.000996 0.000552 17031010202 0.000973 0.000542 ........... ........ ........ 17197884101 0.000225 0.000258 17197884103 0.000375 0.000382 17197980100 0.000352 0.000318 Both newly created two stage fca measures are stored in the `access_df` attribute of the `Access` object. >>> chicago_primary_care.access_df.head() pop 2sfca_doc 2sfca_dentist 2sfca30_doc 2sfca30_dentist geoid 17031010100 4854 0.000697 0.000402 0.000963 0.000479 17031010201 6450 0.000754 0.000455 0.000991 0.000551 17031010202 2818 0.000717 0.000424 0.000973 0.000541 17197884103 2776 0.000384 0.000291 0.000371 0.000377 17197980100 3264 0.000457 0.000325 0.000348 0.000314 """ assert ( self.supply_value_provided == True ), "You must provide a supply value in order to use this functionality." if cost is None: cost = self._default_cost if len(self.cost_names) > 1: self.log.info("Using default cost, {}, for {}.".format(cost, name)) if cost not in self.cost_names: raise ValueError("{} not an available cost.".format(cost)) if type(supply_values) is str: supply_values = [supply_values] if supply_values is None: supply_values = self.supply_types for s in supply_values: series = fca.two_stage_fca( demand_df=self.demand_df, demand_index=self.demand_df.index.name, demand_name=self.demand_value, supply_df=self.supply_df, supply_index=self.supply_df.index.name, supply_name=s, cost_df=self.cost_df, cost_origin=self.cost_origin, cost_dest=self.cost_dest, cost_name=cost, max_cost=max_cost, weight_fn=weight_fn, normalize=normalize, ) series.name = name + "_" + s if series.name in self.access_df.columns: self.log.info("Overwriting {}.".format(series.name)) self.access_df.drop(series.name, axis=1, inplace=True) self.access_df = self.access_df.join(series) if normalize: columns = [name + "_" + s for s in supply_values] return helpers.normalized_access(self, columns) return self.access_df.filter(regex="^" + name, axis=1)
[docs] def enhanced_two_stage_fca( self, name="e2sfca", cost=None, supply_values=None, max_cost=None, weight_fn=None, normalize=False, ): """Calculate the enhanced two-stage floating catchment area access score. Note that the only 'practical' difference between this function and the :meth:`Access.access.two_stage_fca` is that the weight function from the original paper, `weights.step_fn({10 : 1, 20 : 0.68, 30 : 0.22})` is applied if none is provided. Parameters ---------- name : str Column name for access values cost : str Name of cost value column in cost_df (supply-side) max_cost : float Cutoff of cost values supply_values : {str, list} supply type or types. weight_fn : function Weight to be applied to access values normalize : bool If True, return normalized access values; otherwise, return raw access values Returns ------- access : pandas Series Accessibility score for origin locations. Examples -------- Import the base `Access` class and `Datasets`. >>> from access import Access, Datasets Load each of the example datasets: >>> chi_docs_dents = Datasets.load_data('chi_doc') >>> chi_population = Datasets.load_data('chi_pop') >>> chi_travel_costs = Datasets.load_data('chi_times') >>> chi_docs_dents.head() geoid doc dentist 0 17031010100 1 1 1 17031010201 0 1 2 17031010202 4 1 3 17031010300 4 1 4 17031010400 0 2 >>> chi_population.head() geoid pop 0 17031010100 4854 1 17031010201 6450 2 17031010202 2818 3 17031010300 6236 4 17031010400 5042 >>> chi_travel_costs.head() origin dest cost 0 17093890101 17031010100 91.20 1 17093890101 17031010201 92.82 2 17093890101 17031010202 92.95 3 17093890101 17031010300 89.40 4 17093890101 17031010400 84.97 Using the example data, create an `Access` object. >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid", demand_value = "pop", supply_df = chi_docs_dents, supply_index = "geoid", supply_value = ["doc", "dentist"], cost_df = chi_travel_costs, cost_origin = "origin", cost_dest = "destination", cost_name = "cost") We can create multiple stepwise functions for weights. >>> fn30 = weights.step_fn({10 : 1, 20 : 0.68, 30 : 0.22}) >>> fn60 = weights.step_fn({20 : 1, 40 : 0.68, 60 : 0.22}) Using those two difference stepwise functions, we can create two separate enhanced two stage fca measures. >>> chicago_primary_care.enhanced_two_stage_fca(name = '2sfca30', weight_fn = fn30) 2sfca30_doc 2sfca30_dentist geoid 17031010100 0.000970 0.000461 17031010201 0.001080 0.000557 17031010202 0.001027 0.000531 ........... ........ ........ 17197884101 0.000159 0.000241 17197884103 0.000285 0.000342 17197980100 0.000266 0.000310 Note the use of the `name` argument in order to specify a different column name prefix for the access measure. >>> chicago_primary_care.enhanced_two_stage_fca(name = '2sfca60', weight_fn = fn60) 2sfca60_doc 2sfca60_dentist geoid 17031010100 0.000687 0.000394 17031010201 0.000750 0.000447 17031010202 0.000720 0.000416 ........... ........ ........ 17197884101 0.000392 0.000301 17197884103 0.000289 0.000243 17197980100 0.000333 0.000268 Both newly created enhanced two stage fca measures are stored in the `access_df` attribute of the `Access` object. >>> chicago_primary_care.access_df.head() pop 2sfca30_doc 2sfca30_dentist 2sfca60_doc 2sfca60_dentist geoid 17031010100 4854 0.000970 0.000461 0.000687 0.000394 17031010201 6450 0.001080 0.000557 0.000750 0.000447 17031010202 2818 0.001027 0.000531 0.000720 0.000416 17031010300 6236 0.001030 0.000496 0.000710 0.000402 17031010400 5042 0.000900 0.000514 0.000786 0.000430 """ assert ( self.supply_value_provided == True ), "You must provide a supply value in order to use this functionality." if weight_fn is None: weight_fn = weights.step_fn({10: 1, 20: 0.68, 30: 0.22}) return self.two_stage_fca( name, cost, max_cost, supply_values, weight_fn, normalize )
[docs] def three_stage_fca( self, name="3sfca", cost=None, supply_values=None, max_cost=None, weight_fn=None, normalize=False, ): """Calculate the three-stage floating catchment area access score. Parameters ---------- name : str Column name for access values cost : str Name of cost value column in cost_df (supply-side) max_cost : float Cutoff of cost values weight_fn : function Weight to be applied to access values normalize : bool If True, return normalized access values; otherwise, return raw access values Returns ------- access : pandas Series Accessibility score for origin locations. Examples -------- Import the base `Access` class and `Datasets`. >>> from access import Access, Datasets Load each of the example datasets: >>> chi_docs_dents = Datasets.load_data('chi_doc') >>> chi_population = Datasets.load_data('chi_pop') >>> chi_travel_costs = Datasets.load_data('chi_times') >>> chi_docs_dents.head() geoid doc dentist 0 17031010100 1 1 1 17031010201 0 1 2 17031010202 4 1 3 17031010300 4 1 4 17031010400 0 2 >>> chi_population.head() geoid pop 0 17031010100 4854 1 17031010201 6450 2 17031010202 2818 3 17031010300 6236 4 17031010400 5042 >>> chi_travel_costs.head() origin dest cost 0 17093890101 17031010100 91.20 1 17093890101 17031010201 92.82 2 17093890101 17031010202 92.95 3 17093890101 17031010300 89.40 4 17093890101 17031010400 84.97 Using the example data, create an `Access` object. >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid", demand_value = "pop", supply_df = chi_docs_dents, supply_index = "geoid", supply_value = ["doc", "dentist"], cost_df = chi_travel_costs, cost_origin = "origin", cost_dest = "destination", cost_name = "cost") >>> chicago_primary_care.three_stage_fca(name='3sfca') 3sfca_doc 3sfca_dentist geoid 17031010100 0.001424 0.000690 17031010201 0.001462 0.000785 17031010202 0.001411 0.000767 ........... ........ ........ 17197884101 0.000285 0.000380 17197884103 0.000404 0.000464 17197980100 0.000365 0.000407 The newly calculated 3sfca access measure is added to the `access_df` attribute of the `Access` object. >>> chicago_primary_care.access_df.head() 3sfca_doc 3sfca_dentist geoid 17031010100 0.001447 0.000698 17031010201 0.001487 0.000795 17031010202 0.001420 0.000777 17031010300 0.001479 0.000742 17031010400 0.001274 0.000726 """ assert ( self.supply_value_provided == True ), "You must provide a supply value in order to use this functionality." if weight_fn is None: weight_fn = weights.step_fn({10: 0.962, 20: 0.704, 30: 0.377, 60: 0.042}) cost = helpers.sanitize_supply_cost(self, cost, name) supply_values = helpers.sanitize_supplies(self, supply_values) for s in supply_values: series = fca.three_stage_fca( demand_df=self.demand_df, demand_index=self.demand_df.index.name, demand_name=self.demand_value, supply_df=self.supply_df, supply_index=self.supply_df.index.name, supply_name=s, cost_df=self.cost_df, cost_origin=self.cost_origin, cost_dest=self.cost_dest, cost_name=cost, max_cost=max_cost, weight_fn=weight_fn, normalize=normalize, ) series.name = name + "_" + s if series.name in self.access_df.columns: self.log.info("Overwriting {}.".format(series.name)) self.access_df.drop(series.name, axis=1, inplace=True) # store the raw, un-normalized access values self.access_df = self.access_df.join(series) if normalize: columns = [name + "_" + s for s in supply_values] return helpers.normalized_access(self, columns) return self.access_df.filter(regex="^" + name, axis=1)
@property def norm_access_df(self): for column in self.access_df.columns.difference([self.demand_value]): mean_access = ( self.access_df[column] * self.access_df[self.demand_value] ).sum() / self.access_df[self.demand_value].sum() self.access_df[column] /= mean_access return self.access_df[self.access_df.columns.difference([self.demand_value])]
[docs] def score(self, col_dict, name="score"): """Weighted aggregate of multiple already-calculated, normalized access components. Parameters ---------- name : str Column name for access values col_dict : dict Column names (keys) and weights. Returns ------- access : pandas Series Single, aggregate score for origin locations. Examples -------- Import the base `Access` class and `Datasets`. >>> from access import Access, Datasets Load each of the example datasets which correspond to the demand (population), supply (doctors and dentists) and cost (travel time), respectively. The sample data represents the Chicago metro area with a 50km buffer around the city boundaries. >>> chi_docs_dents = Datasets.load_data('chi_doc') >>> chi_population = Datasets.load_data('chi_pop') >>> chi_travel_costs = Datasets.load_data('chi_times') >>> chi_docs_dents.head() geoid doc dentist 0 17031010100 1 1 1 17031010201 0 1 2 17031010202 4 1 3 17031010300 4 1 4 17031010400 0 2 >>> chi_population.head() geoid pop 0 17031010100 4854 1 17031010201 6450 2 17031010202 2818 3 17031010300 6236 4 17031010400 5042 The `chi_travel_costs` dataset is the cost matrix, showing the travel time between each of the Census Tracts in the Chicago metro area. >>> chi_travel_costs.head() origin dest cost 0 17093890101 17031010100 91.20 1 17093890101 17031010201 92.82 2 17093890101 17031010202 92.95 3 17093890101 17031010300 89.40 4 17093890101 17031010400 84.97 Now, create an instance of the `Access` class and specify the demand, supply, and cost datasets. >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid", demand_value = "pop", supply_df = chi_docs_dents, supply_index = "geoid", supply_value = ["doc", "dentist"], cost_df = chi_travel_costs, cost_origin = "origin", cost_dest = "dest", cost_name = "cost") With the demand, supply, and cost data provided, we can now produce the RAAM access measures defining a floating catchment area of 30 minutes by setting the tau value to 30 (60 minutes is the default). >>> chicago_primary_care.raam(tau = 30) raam_doc raam_dentist geoid 17031010100 1.027597 1.137901 17031010201 0.940239 1.332557 17031010202 1.031144 1.413279 ........... ........ ........ 17197884101 2.365171 1.758800 17197884103 2.244007 1.709857 17197980100 2.225820 1.778264 Aggregate RAAM for doctors and dentists, weighting doctors more heavily. >>> chicago_primary_care.score(name = "raam_combo", col_dict = {"raam_doc" : 0.8, "raam_dentist" : 0.2}) geoid 17031010100 0.786697 17031010201 0.765081 17031010202 0.831578 ........... ........ 17197884101 1.677075 17197884103 1.597554 17197980100 1.597386 """ for v in col_dict: if v not in self.access_df.columns: raise ValueError("{} is not a calculated access value".format(v)) weights = pd.Series(col_dict) weighted_score = self.norm_access_df[weights.index].dot(weights) weighted_score.name = name if weighted_score.name in self.access_df.columns: self.log.info("Overwriting {}.".format(weighted_score.name)) self.access_df.drop(weighted_score.name, axis=1, inplace=True) self.access_df = self.access_df.join(weighted_score) return weighted_score
@property def default_cost(self): return self._default_cost @default_cost.setter def default_cost(self, new_cost): """Change the default cost measure.""" if new_cost in self.cost_names: self._default_cost = new_cost else: raise ValueError("Tried to set cost not available in cost df") @property def neighbor_default_cost(self): return self._neighbor_default_cost @neighbor_default_cost.setter def neighbor_default_cost(self, new_cost): """Change the default cost measure.""" if new_cost in self.neighbor_cost_names: self._neighbor_default_cost = new_cost else: raise ValueError("Tried to set cost not available in cost df")
[docs] def append_user_cost(self, new_cost_df, origin, destination, name): """Create a user cost, from demand to supply locations. Parameters ---------- new_cost_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ Holds the new cost.... name : str Name of the new cost variable in new_cost_df origin : str Name of the new origin variable in new_cost_df destination : str Name of the new destination variable in new_cost_df Examples -------- Import the base `Access` class and `Datasets`. >>> from access import Access, Datasets Load each of the example datasets: >>> chi_docs_dents = Datasets.load_data('chi_doc') >>> chi_population = Datasets.load_data('chi_pop') >>> chi_travel_costs = Datasets.load_data('chi_times') >>> chi_docs_dents.head() geoid doc dentist 0 17031010100 1 1 1 17031010201 0 1 2 17031010202 4 1 3 17031010300 4 1 4 17031010400 0 2 >>> chi_population.head() geoid pop 0 17031010100 4854 1 17031010201 6450 2 17031010202 2818 3 17031010300 6236 4 17031010400 5042 >>> chi_travel_costs.head() origin dest cost 0 17093890101 17031010100 91.20 1 17093890101 17031010201 92.82 2 17093890101 17031010202 92.95 3 17093890101 17031010300 89.40 4 17093890101 17031010400 84.97 Using the example data, create an `Access` object. >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid", demand_value = "pop", supply_df = chi_docs_dents, supply_index = "geoid", supply_value = ["doc", "dentist"], cost_df = chi_travel_costs, cost_origin = "origin", cost_dest = "destination", cost_name = "cost") To add a new cost from demand to supply locations, first load the new cost data. >>> euclidean_cost = Datasets.load_data('chi_euclidean') euclidean_cost.head() origin dest euclidean 0 17093890101 17031010100 63630.788476 1 17093890101 17031010201 62632.675522 2 17093890101 17031010202 63073.735631 3 17093890101 17031010300 63520.029749 4 17093890101 17031010400 63268.514352 Add new cost data to existing `Access` instance. >>> chicago_primary_care.append_user_cost(new_cost_df = euclidean_cost, name = "euclidean", origin = "origin", destination = "dest") The newly added cost data can be seen in the `cost_df` attribute. >>> chicago_primary_care.cost_df.head() origin dest cost euclidean 0 17093890101 17031010100 91.20 63630.788476 1 17093890101 17031010201 92.82 62632.675522 2 17093890101 17031010202 92.95 63073.735631 3 17093890101 17031010300 89.40 63520.029749 4 17093890101 17031010400 84.97 63268.514352 """ # Add it to the list of costs. self.cost_df = self.cost_df.merge( new_cost_df[[origin, destination, name]], how="outer", left_on=[self.cost_origin, self.cost_dest], right_on=[origin, destination], ) self.cost_names.append(name)
[docs] def append_user_cost_neighbors(self, new_cost_df, origin, destination, name): """Create a user cost, from supply locations to other supply locations. Parameters ---------- new_cost_df : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ Holds the new cost.... cost : str Name of the new cost variable in new_cost_df origin : str Name of the new origin variable in new_cost_df destination : str Name of the new destination variable in new_cost_df Examples -------- Import the base `Access` class and `Datasets`. >>> from access import Access, Datasets Load each of the example datasets: >>> chi_docs_dents = Datasets.load_data('chi_doc') >>> chi_population = Datasets.load_data('chi_pop') >>> chi_travel_costs = Datasets.load_data('chi_times') >>> chi_docs_dents.head() geoid doc dentist 0 17031010100 1 1 1 17031010201 0 1 2 17031010202 4 1 3 17031010300 4 1 4 17031010400 0 2 >>> chi_population.head() geoid pop 0 17031010100 4854 1 17031010201 6450 2 17031010202 2818 3 17031010300 6236 4 17031010400 5042 >>> chi_travel_costs.head() origin dest cost 0 17093890101 17031010100 91.20 1 17093890101 17031010201 92.82 2 17093890101 17031010202 92.95 3 17093890101 17031010300 89.40 4 17093890101 17031010400 84.97 Using the example data, create an `Access` object. >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid", demand_value = "pop", supply_df = chi_docs_dents, supply_index = "geoid", supply_value = ["doc", "dentist"], cost_df = chi_travel_costs, cost_origin = "origin", cost_dest = "destination", cost_name = "cost") To add a new cost from demand to supply locations, first load the new cost data. >>> euclidean_cost_neighbors = Datasets.load_data('chi_euclidean_neighbors') euclidean_cost_neighbors.head() origin dest euclidean_neighbors 0 17031010100 17031010100 0.000000 1 17031010100 17031010201 998.259243 2 17031010100 17031010202 635.203387 3 17031010100 17031010300 653.415713 4 17031010100 17031010400 2065.375554 Add new cost data to existing `Access` instance. >>> chicago_primary_care.append_user_cost_neighbors(new_cost_df = euclidean_cost_neighbors, name = "euclidean_neighbors", origin = "origin", destination = "dest") The newly added cost data can be seen in the `neighbor_cost_df` attribute. >>> chicago_primary_care.neighbor_cost_df.head() origin dest cost euclidean_neighbors 0 17093890101 17031010100 91.20 63630.788476 1 17093890101 17031010201 92.82 62632.675522 2 17093890101 17031010202 92.95 63073.735631 3 17093890101 17031010300 89.40 63520.029749 4 17093890101 17031010400 84.97 63268.514352 """ # Add it to the list of costs. self.neighbor_cost_df = self.neighbor_cost_df.merge( new_cost_df[[origin, destination, name]], how="outer", left_on=[self.neighbor_cost_origin, self.neighbor_cost_dest], right_on=[origin, destination], ) self.neighbor_cost_names.append(name)
[docs] def create_euclidean_distance( self, name="euclidean", threshold=0, centroid_o=False, centroid_d=False ): """Calculate the Euclidean distance from demand to supply locations. This is simply the geopandas `distance` function. The user is responsible for putting the geometries into an appropriate reference system. Parameters ---------- name : str Column name for euclidean distances threshold : int Buffer threshold for non-point geometries, AKA max_distance centroid_o : bool If True, convert geometries of demand_df (origins) to centroids; otherwise, no change centroid_d : bool If True, convert geometries of supply_df (destinations) to centroids; otherwise, no change Examples -------- NOTE: Creating euclidean distance measures requires having a geometry column in a `geopandas.GeoDataFrame <http://geopandas.org/reference/geopandas.GeoDataFrame.html>`_. Import the base `Access` class and `Datasets`. >>> from access import Access, Datasets Load each of the example datasets which correspond to the demand (population), supply (doctors and dentists) and cost (travel time), respectively. The sample data represents the Chicago metro area with a 50km buffer around the city boundaries. >>> chi_docs_dents = Datasets.load_data('chi_doc_geom') >>> chi_population = Datasets.load_data('chi_pop_geom') >>> chi_travel_costs = Datasets.load_data('chi_times') >>> chi_docs_dents.head() doc dentist geometry geoid 17031010100 1 1 POINT (354916.992 594670.505) 17031010201 0 1 POINT (354105.876 594088.600) 17031010202 4 1 POINT (354650.684 594093.822) 17031010300 4 1 POINT (355209.361 594086.149) 17031010400 0 2 POINT (355809.748 592808.043) >>> chi_population.head() pop geometry geoid 17031010100 4854 POINT (354916.992 594670.505) 17031010201 6450 POINT (354105.876 594088.600) 17031010202 2818 POINT (354650.684 594093.822) 17031010300 6236 POINT (355209.361 594086.149) 17031010400 5042 POINT (355809.748 592808.043) The `chi_travel_costs` dataset is the cost matrix, showing the travel time between each of the Census Tracts in the Chicago metro area. >>> chi_travel_costs.head() origin dest cost 0 17093890101 17031010100 91.20 1 17093890101 17031010201 92.82 2 17093890101 17031010202 92.95 3 17093890101 17031010300 89.40 4 17093890101 17031010400 84.97 Now, create an instance of the `Access` class and specify the demand, supply, and cost datasets. >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid", demand_value = "pop", supply_df = chi_docs_dents, supply_index = "geoid", supply_value = ["doc", "dentist"], cost_df = chi_travel_costs, cost_origin = "origin", cost_dest = "dest", cost_name = "cost") To calculate euclidean distances between Census Tracts within 250km of eachother, you can set the `threshold` to 250000 (meters). Setting `centroid_o` and `centroid_d` to `True` calculates the centroid of the geom in your dataset. >>> chicago_primary_care.create_euclidean_distance(threshold = 250000, centroid_o = True, centroid_d = True) The newly calculated euclidean costs are added to the `cost_df` attribute of the `Access` class. >>> chicago_primary_care_geom.cost_df.head() origin dest cost euclidean 0 17093890101 17031010100 91.20 63630.788476 1 17093890101 17031010201 92.82 62632.675522 2 17093890101 17031010202 92.95 63073.735631 3 17093890101 17031010300 89.40 63520.029749 4 17093890101 17031010400 84.97 63268.514352 """ import geopandas as gpd # TO-DO: check for unprojected geometries # Continue if the dataframes are geodataframes, else throw an error if type(self.demand_df) is not gpd.GeoDataFrame: raise TypeError( "Cannot calculate euclidean distance without a geometry of demand side" ) if type(self.supply_df) is not gpd.GeoDataFrame: raise TypeError( "Cannot calculate euclidean distance without a geometry of supply side" ) # Reset the index so that the geoids are accessible df1 = self.demand_df.rename_axis("origin").reset_index() df2 = self.supply_df.rename_axis("dest").reset_index() # Convert to centroids if so-specified if centroid_o: df1.set_geometry(df1.centroid, inplace=True) if centroid_d: df2.set_geometry(df2.centroid, inplace=True) # Calculate the distances. if (df1.geom_type == "Point").all() & (df2.geom_type == "Point").all(): # If both geometries are point types, merge on a temporary dummy column df1["temp"] = 1 df2["temp"] = 1 df1and2 = df1[["temp", "geometry", "origin"]].merge( df2[["temp", "geometry", "dest"]].rename(columns={"geometry": "geomb"}) ) df1and2.drop("temp", inplace=True, axis=1) df1and2[name] = df1and2.distance(df1and2.set_geometry("geomb")) else: # Execute an sjoin for non-point geometries, based upon a buffer zone df1and2 = gpd.sjoin( df1, df2.rename(columns={"geometry": "geomb"}).set_geometry( df2.buffer(threshold) ), ) df1and2[name] = df1and2.distance(df1and2.set_geometry("geomb")) # Add it to the cost df. df1and2 = df1and2[df1and2[name] < threshold] if name in self.cost_df.columns: self.log.info("Overwriting {}.".format(name)) self.cost_df.drop(name, axis=1, inplace=True) self.cost_df = self.cost_df.merge( df1and2[[name, "origin", "dest"]], how="outer", left_on=[self.cost_origin, self.cost_dest], right_on=["origin", "dest"], ) # Add it to the list of costs. if name not in self.cost_names: self.cost_names.append(name) # Set the default cost if it does not exist if not hasattr(self, "_default_cost"): self._default_cost = name
[docs] def create_euclidean_distance_neighbors( self, name="euclidean", threshold=0, centroid=False ): """Calculate the Euclidean distance among demand locations. Parameters ---------- name : str Column name for euclidean distances neighbors threshold : int Buffer threshold for non-point geometries, AKA max_distance centroid : bool If True, convert geometries to centroids; otherwise, no change Examples -------- NOTE: Creating euclidean distance measures requires having a geometry column in a `geopandas.GeoDataFrame <http://geopandas.org/reference/geopandas.GeoDataFrame.html>`_. Import the base `Access` class and `Datasets`. >>> from access import Access, Datasets Load each of the example datasets which correspond to the demand (population), supply (doctors and dentists) and cost (travel time), respectively. The sample data represents the Chicago metro area with a 50km buffer around the city boundaries. >>> chi_docs_dents = Datasets.load_data('chi_doc_geom') >>> chi_population = Datasets.load_data('chi_pop_geom') >>> chi_travel_costs = Datasets.load_data('chi_times') >>> chi_docs_dents.head() doc dentist geometry geoid 17031010100 1 1 POINT (354916.992 594670.505) 17031010201 0 1 POINT (354105.876 594088.600) 17031010202 4 1 POINT (354650.684 594093.822) 17031010300 4 1 POINT (355209.361 594086.149) 17031010400 0 2 POINT (355809.748 592808.043) >>> chi_population.head() pop geometry geoid 17031010100 4854 POINT (354916.992 594670.505) 17031010201 6450 POINT (354105.876 594088.600) 17031010202 2818 POINT (354650.684 594093.822) 17031010300 6236 POINT (355209.361 594086.149) 17031010400 5042 POINT (355809.748 592808.043) The `chi_travel_costs` dataset is the cost matrix, showing the travel time between each of the Census Tracts in the Chicago metro area. >>> chi_travel_costs.head() origin dest cost 0 17093890101 17031010100 91.20 1 17093890101 17031010201 92.82 2 17093890101 17031010202 92.95 3 17093890101 17031010300 89.40 4 17093890101 17031010400 84.97 Make sure you assign your desired geometry projection, which you can change as follows. >>> chi_population = chi_population.to_crs(epsg = 2790) >>> chi_docs_dents = chi_docs_dents.to_crs(epsg = 2790) Now, create an instance of the `Access` class and specify the demand, supply, and cost datasets. >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid", demand_value = "pop", supply_df = chi_docs_dents, supply_index = "geoid", supply_value = ["doc", "dentist"], cost_df = chi_travel_costs, cost_origin = "origin", cost_dest = "dest", cost_name = "cost") To calculate euclidean distances between Census Tracts within 250km of eachother, you can set the `threshold` to 250000 (meters). Setting `centroid_o` and `centroid_d` to `True` calculates the centroid of the geom in your dataset. >>> chicago_primary_care.create_euclidean_distance_neighbors(name= 'euclidean_neighbors', threshold = 250000, centroid_o = True, centroid_d = True) The newly calculated euclidean distance is stored in the `neighbor_cost_df` attribute. >>> chicago_primary_care_geom.neighbor_cost_df.head() origin dest euclidean_neighbors 0 17031010100 17031010100 0.000000 1 17031010100 17031010201 998.259243 2 17031010100 17031010202 635.203387 3 17031010100 17031010300 653.415713 4 17031010100 17031010400 2065.375554 """ import geopandas as gpd # TO-DO: check for unprojected geometries # Continue if the dataframes are geodataframes, else throw an error if type(self.demand_df) is not gpd.GeoDataFrame: raise TypeError( "Cannot calculate euclidean distance without a geometry of supply side" ) # Reset the index so that the geoids are accessible df1 = self.demand_df.rename_axis("origin").reset_index() df2 = self.demand_df.rename_axis("dest").reset_index() # Convert to centroids if so-specified if centroid: df1.set_geometry(df1.centroid, inplace=True) df2.set_geometry(df2.centroid, inplace=True) # Calculate the distances. if (df1.geom_type == "Point").all() & (df2.geom_type == "Point").all(): # If both geometries are point types, merge on a temporary dummy column df1["temp"] = 1 df2["temp"] = 1 df1and2 = df1[["temp", "geometry", "origin"]].merge( df2[["temp", "geometry", "dest"]].rename(columns={"geometry": "geomb"}) ) df1and2.drop("temp", inplace=True, axis=1) df1and2[name] = df1and2.distance(df1and2.set_geometry("geomb")) else: # Execute an sjoin for non-point geometries, based upon a buffer zone df1and2 = gpd.sjoin( df1, df2.rename(columns={"geometry": "geomb"}).set_geometry( df2.buffer(threshold) ), ) df1and2[name] = df1and2.distance(df1and2.set_geometry("geomb")) # Add it to the cost df. df1and2 = df1and2[df1and2[name] < threshold] self.neighbor_cost_df = self.neighbor_cost_df.merge( df1and2[[name, "origin", "dest"]], how="outer", left_on=[self.neighbor_cost_origin, self.neighbor_cost_dest], right_on=["origin", "dest"], ) # Add it to the list of costs. self.neighbor_cost_names.append(name) # Set the default cost if it does not exist if not hasattr(self, "_neighbor_default_cost"): self._neighbor_default_cost = name