Source code for access.access

import pandas as pd
import requests
import warnings
import logging

from . import fca
from . import raam
from . import weights
from . import helpers
from .datasets import Datasets

access_log_stream = logging.StreamHandler()
access_log_format = logging.Formatter("%(name)s %(levelname)-8s :: %(message)s")
access_log_stream.setFormatter(access_log_format)


[docs]class Access:
    """
    Spatial Access Class

    Parameters
    ----------
    demand_df            : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ or `geopandas.GeoDataFrame <http://geopandas.org/reference/geopandas.GeoDataFrame.html>`_
                           The origins dataframe, containing a location index and, optionally, a level of demand and geometry.
    demand_index         : {bool, str}
                           boolean of True indicates that the locations are already on the df index;
                           otherwise the argument is a string containing the name of the column of `demand_df` that holds the origin ID.
    demand_value         : str
                           is the name of the column of `demand` that holds the aggregate demand at a location.
    supply_df            : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ or `geopandas.GeoDataFrame <http://geopandas.org/reference/geopandas.GeoDataFrame.html>`_
                           The origins dataframe, containing a location index and, optionally, level of supply and geometry.
    supply_index         : {bool, str}
                           boolean of True indicates that the locations are already on the df index;
                           otherwise the argument is a string containing the name of the column of `supply_df` that holds the origin ID.
    supply_value         : {str, list}
                           is the name of the column of `supply` that holds the aggregate supply at a location, or a list of such columns.
    cost_df              : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
                           This dataframe contains a link from demand to supply locations, and a cost between them.
    cost_origin          : str
                           The column name of the index locations -- this is what will be grouped by.
    cost_dest            : str
                           The column name of the neighborhing demand locations -- this is what goes in the groups.
    cost_name            : {str, list}
                           The column(s) name of the travel cost(s).
    neighbor_cost_df     : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
                           This dataframe contains a link from demand to neighbor locations, and a cost between them (running consumer to supplier).
    neighbor_cost_origin : str
                           The column name of the origin locations -- this is what will be grouped by.
    neighbor_cost_dest   : str
                           The column name of the destination locations -- this is what goes in the groups.
    neighbor_cost_name   : {str, list}
                           The column name(s) of the travel cost(s).

    Attributes
    ----------

    Access               : pandas.DataFrame
                           All of the calculated access measures.
    access_metadata      : pandas.DataFrame
                           Lists currently-available measures of access.
    cost_metadata        : pandas.DataFrame
                           Describes each of the currently-available supply to demand costs.
    """

    logger_initialized = False

[docs]    def __init__(
        self,
        demand_df,
        demand_value,
        supply_df,
        supply_value=False,
        demand_index=True,
        supply_index=True,
        cost_df=None,
        cost_origin=None,
        cost_dest=None,
        cost_name=None,
        neighbor_cost_df=None,
        neighbor_cost_origin=None,
        neighbor_cost_dest=None,
        neighbor_cost_name=None,
    ):

        """
        Initialize the class.

        Examples
        --------

        Import the base `Access` class and `Datasets`.

        >>> from access import Access, Datasets

        Load each of the example datasets:

        >>> chi_docs_dents   = Datasets.load_data('chi_doc')
        >>> chi_population   = Datasets.load_data('chi_pop')
        >>> chi_travel_costs = Datasets.load_data('chi_times')

        >>> chi_docs_dents.head()
                 geoid  doc  dentist
        0  17031010100    1        1
        1  17031010201    0        1
        2  17031010202    4        1
        3  17031010300    4        1
        4  17031010400    0        2

        >>> chi_population.head()
                 geoid   pop
        0  17031010100  4854
        1  17031010201  6450
        2  17031010202  2818
        3  17031010300  6236
        4  17031010400  5042

        >>> chi_travel_costs.head()
                origin         dest   cost
        0  17093890101  17031010100  91.20
        1  17093890101  17031010201  92.82
        2  17093890101  17031010202  92.95
        3  17093890101  17031010300  89.40
        4  17093890101  17031010400  84.97

        Using the example data, create an `Access` object.

        >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid",
                                          demand_value = "pop",
                                          supply_df = chi_docs_dents, supply_index = "geoid",
                                          supply_value = ["doc", "dentist"],
                                          cost_df = chi_travel_costs, cost_origin  = "origin",
                                          cost_dest = "destination", cost_name = "cost")
        """
        self.log = logging.getLogger("access")

        if not Access.logger_initialized:
            self.log.addHandler(access_log_stream)
            self.log.setLevel(logging.INFO)
            self.log.propagate = False
            Access.logger_initialized = True

        self.supply_value_provided = True

        ### First all the dummy checks...

        if demand_index is not True and demand_index not in demand_df.columns:
            raise ValueError(
                "demand_index must either be True -- or it must be a column of demand_df"
            )

        if demand_value not in demand_df.columns:
            raise ValueError(
                "demand_value must either be True -- or it must be a column of demand_df"
            )

        if supply_index is not True and supply_index not in supply_df.columns:
            raise ValueError(
                "supply_index must either be True -- or it must be a column of supply_df"
            )

        if type(supply_value) is str and supply_value not in supply_df.columns:
            raise ValueError("supply_value must be a column of supply_df")

        if type(supply_value) is list:
            if any([sv not in supply_df.columns for sv in supply_value]):
                raise ValueError("supply_value must be columns of supply_df")

        if cost_df is not None:

            if cost_origin not in cost_df.columns:
                raise ValueError("cost_origin must be a column of cost_df")

            if cost_dest not in cost_df.columns:
                raise ValueError("cost_dest must be a column of cost_df")

            if type(cost_name) is str and cost_name not in cost_df.columns:
                raise ValueError("cost_name must be a column of cost_df")

            if type(cost_name) is list:
                if any([cn not in cost_df.columns for cn in cost_name]):
                    raise ValueError("cost_name must be columns of cost_df")

        if neighbor_cost_df is not None:

            if neighbor_cost_origin not in neighbor_cost_df.columns:
                raise ValueError(
                    "neighbor_cost_origin must be a column of neighbor_cost_df"
                )

            if neighbor_cost_dest not in neighbor_cost_df.columns:
                raise ValueError(
                    "neighbor_cost_dest must be a column of neighbor_cost_df"
                )

            if (
                type(neighbor_cost_name) is str
                and neighbor_cost_name not in neighbor_cost_df.columns
            ):
                raise ValueError("neighbor_cost_name must be a column of cost_df")

            if type(neighbor_cost_name) is list:
                if any(
                    [cn not in neighbor_cost_df.columns for cn in neighbor_cost_name]
                ):
                    raise ValueError("neighbor_cost_names must be columns of cost_df")

        ### Now load the demand DFs.

        self.demand_df = demand_df.copy()
        self.demand_value = demand_value
        if demand_index is not True:
            self.demand_df.set_index(demand_index, inplace=True)

        ### And now the supply DFs.

        self.supply_df = supply_df.copy()

        if supply_value == False:
            self.log.info(
                """Warning: A supply value was not provided, so a default
                             supply value of 1 was created in the column named "value".
                             Note that without a supply value, you cannot use any of the
                             floating catchment area methods."""
            )
            self.supply_value_provided = False
            supply_value = "value"
            self.supply_df[supply_value] = 1

        if type(supply_value) is str:
            self.supply_types = [supply_value]
        elif type(supply_value) is list:
            self.supply_types = supply_value
        else:
            raise ValueError("supply_value must be string or list of strings.")

        if supply_index is not True:
            self.supply_df.set_index(supply_index, inplace=True)

        if cost_df is not None:

            self.cost_df = cost_df
            self.cost_origin = cost_origin
            self.cost_dest = cost_dest

            if type(cost_name) is str:
                self.cost_names = [cost_name]

            elif type(cost_name) is list:
                self.cost_names = cost_name

            else:
                raise ValueError("cost_name must be string or list of strings.")

            self._default_cost = self.cost_names[0]

        else:
            self.cost_df = pd.DataFrame(columns=["origin", "dest"])
            self.cost_origin = "origin"
            self.cost_dest = "dest"
            self.cost_names = []

        if neighbor_cost_df is not None:

            self.neighbor_cost_df = neighbor_cost_df
            self.neighbor_cost_origin = neighbor_cost_origin
            self.neighbor_cost_dest = neighbor_cost_dest
            self.neighbor_cost_name = neighbor_cost_name

            if type(neighbor_cost_name) is str:
                self.neighbor_cost_names = [neighbor_cost_name]

            elif type(neighbor_cost_name) is list:
                self.neighbor_cost_names = neighbor_cost_name

            else:
                raise ValueError(
                    "neighbor_cost_name must be string or list of strings."
                )

            self._neighbor_default_cost = self.neighbor_cost_names[0]

        else:
            self.neighbor_cost_df = pd.DataFrame(columns=["origin", "dest"])
            self.neighbor_cost_origin = "origin"
            self.neighbor_cost_dest = "dest"
            self.neighbor_cost_names = []

        self.access_df = self.demand_df[[self.demand_value]].sort_index()

        self.access = pd.DataFrame(index=self.supply_df.index)

        self.access_metadata = pd.DataFrame(
            columns=["name", "distance", "function", "descriptor"]
        )
        self.cost_metadata = pd.DataFrame(columns=["name", "type", "descriptor"])

        return

[docs]    def weighted_catchment(
        self,
        name="catchment",
        supply_cost=None,
        supply_values=None,
        weight_fn=None,
        max_cost=None,
        normalize=False,
    ):
        """
        Calculate the catchment area (buffer) aggregate access score.

        Parameters
        ----------
        name                : str
                              Column name for access values
        supply_cost         : str
                              Name of supply cost value column in supply_df
        supply_values       : {str, list}
                              Name(s) of supply values in supply_df
        weight_fn           : function
                              function to apply to the cost to reach the supply.
                              In this way, you could run, e.g., a gravity function.
                              (Be careful of course of values as distances go to 0!)
        max_cost            : float
                              Cutoff of cost values
        normalize           : bool
                              If True, return normalized access values; otherwise, return raw access values

        Returns
        -------

        access              : pandas Series
                              Accessibility score for origin locations.

        Examples
        --------

        Create an Access object, as detailed in __init__.py

        >>> illinois_primary_care = Access(<...>)

        Call the floating catchment area with max_cost only

        >>> gravity = weights.gravity(scale = 60, alpha = -1)
        >>> illinois_primary_care.weighted_catchment(weight_fn = gravity)

        """

        supply_cost = helpers.sanitize_supply_cost(self, supply_cost, name)
        supply_values = helpers.sanitize_supplies(self, supply_values)

        for s in supply_values:

            # Bryan consistently flipped origin and destination in this one -- very confusing.
            series = fca.weighted_catchment(
                loc_df=self.supply_df,
                loc_index=True,
                loc_value=s,
                cost_df=self.cost_df,
                cost_source=self.cost_dest,
                cost_dest=self.cost_origin,
                cost_cost=self._default_cost,
                weight_fn=weight_fn,
                max_cost=max_cost,
            )

            series.name = name + "_" + s
            if series.name in self.access_df.columns:
                self.log.info("Overwriting {}.".format(series.name))
                self.access_df.drop(series.name, axis=1, inplace=True)

            # store the raw, un-normalized access values
            self.access_df = self.access_df.join(series)

        if normalize:

            columns = [name + "_" + s for s in supply_values]
            return helpers.normalized_access(self, columns)

        return self.access_df.filter(regex="^" + name, axis=1)

[docs]    def fca_ratio(
        self,
        name="fca",
        demand_cost=None,
        supply_cost=None,
        supply_values=None,
        max_cost=None,
        normalize=False,
        noise="quiet",
    ):
        """
        Calculate the floating catchment area (buffer) ratio access score.

        Parameters
        ----------
        name                : str
                              Column name for access values
        demand_cost         : str
                              Name of demand cost value column in demand_df
        supply_cost         : str
                              Name of supply cost value column in supply_df
        supply_values       : {str, list}
                              Name(s) of supply values in supply_df
        max_cost            : float
                              Cutoff of cost values
        normalize           : bool
                              If True, return normalized access values; otherwise, return raw access values
        noise              : str
                             Default 'quiet', otherwise gives messages that indicate potential issues.

        Returns
        -------
        access              : pandas Series
                              Accessibility score for origin locations.

        Examples
        --------

        Import the base `Access` class and `Datasets`.

        >>> from access import Access, Datasets

        Load each of the example datasets:

        >>> chi_docs_dents   = Datasets.load_data('chi_doc')
        >>> chi_population   = Datasets.load_data('chi_pop')
        >>> chi_travel_costs = Datasets.load_data('chi_times')

        >>> chi_docs_dents.head()
                 geoid  doc  dentist
        0  17031010100    1        1
        1  17031010201    0        1
        2  17031010202    4        1
        3  17031010300    4        1
        4  17031010400    0        2

        >>> chi_population.head()
                 geoid   pop
        0  17031010100  4854
        1  17031010201  6450
        2  17031010202  2818
        3  17031010300  6236
        4  17031010400  5042

        >>> chi_travel_costs.head()
                origin         dest   cost
        0  17093890101  17031010100  91.20
        1  17093890101  17031010201  92.82
        2  17093890101  17031010202  92.95
        3  17093890101  17031010300  89.40
        4  17093890101  17031010400  84.97

        Using the example data, create an `Access` object.

        >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid",
                                          demand_value = "pop",
                                          supply_df = chi_docs_dents, supply_index = "geoid",
                                          supply_value = ["doc", "dentist"],
                                          cost_df = chi_travel_costs, cost_origin  = "origin",
                                          cost_dest = "destination", cost_name = "cost",
                                          neighbor_cost_df = chi_travel_costs, neighbor_cost_origin = "origin",
                                          neighbor_cost_dest = 'dest', neighbor_cost_name = 'cost')

        >>> chicago_primary_care.fca_ratio(name='fca',max_cost=30)
                      fca_doc  fca_dentist
        geoid
        17031010100  0.001630     0.000807
        17031010201  0.001524     0.000904
        17031010202  0.001521     0.000908
        ...........  ........     ........
        17197884101  0.000437     0.000442
        17197884103  0.000510     0.000498
        17197980100  0.000488     0.000432
        """

        assert (
            self.supply_value_provided == True
        ), "You must provide a supply value in order to use this functionality."

        supply_cost = helpers.sanitize_supply_cost(self, supply_cost, name)
        demand_cost = helpers.sanitize_demand_cost(self, demand_cost, name)
        supply_values = helpers.sanitize_supplies(self, supply_values)

        for s in supply_values:

            series = fca.fca_ratio(
                demand_df=self.demand_df,
                demand_index=self.demand_df.index.name,
                demand_name=self.demand_value,
                supply_df=self.supply_df,
                supply_index=self.supply_df.index.name,
                supply_name=s,
                demand_cost_df=self.neighbor_cost_df,
                supply_cost_df=self.cost_df,
                demand_cost_origin=self.neighbor_cost_origin,
                demand_cost_dest=self.neighbor_cost_dest,
                demand_cost_name=demand_cost,
                supply_cost_origin=self.cost_origin,
                supply_cost_dest=self.cost_dest,
                supply_cost_name=supply_cost,
                max_cost=max_cost,
                normalize=normalize,
                noise=noise,
            )

            series.name = name + "_" + s
            if series.name in self.access_df.columns:
                self.log.info("Overwriting {}.".format(series.name))
                self.access_df.drop(series.name, axis=1, inplace=True)

            # store the raw, un-normalized access values
            self.access_df = self.access_df.join(series)

        if normalize:

            columns = [name + "_" + s for s in supply_values]
            return helpers.normalized_access(self, columns)

        return self.access_df.filter(regex="^" + name, axis=1)

[docs]    def raam(
        self,
        name="raam",
        cost=None,
        supply_values=None,
        normalize=False,
        tau=60,
        rho=None,
        max_cycles=150,
        initial_step=0.2,
        half_life=50,
        min_step=0.005,
        verbose=False,
    ):
        """Calculate the rational agent access model. :cite:`2019_saxon_snow_raam`

        Parameters
        ----------
        name                : str
                              Column name for access values
        cost                : str
                              Name of cost variable, for reaching supply sites.
        supply_values       : {str, list}
                              Name(s) of supply values in supply_df
        normalize           : bool
                              If True, return normalized access values; otherwise, return raw access values
        tau                 : float
                              tau parameter (travel time scale)
        rho                 : float
                              rho parameter (congestion cost scale)
        max_cycles          : int
                              How many cycles to run the RAAM optimization for.
        initial_step        : {int, float}
                              If an float < 1, it is the proportion of a demand site that can shift, in the first cycle.
                              If it is an integer, it is simply a limit on the total number.
        half_life           : int
                              How many cycles does it take to halve the move rate?
        min_step            : {int, float}
                              This is the minimum value, to which the moving fraction converges.
        verbose             : bool
                              Print some information as the optimization proceeds.

        Returns
        -------

        access              : pandas Series
                              Accessibility score for origin locations.

        Examples
        --------

        Import the base `Access` class and `Datasets`.

        >>> from access import Access, Datasets

        Load each of the example datasets which correspond to the demand (population), supply (doctors and dentists)
        and cost (travel time), respectively. The sample data represents the Chicago metro area with a 50km buffer around the city boundaries.

        >>> chi_docs_dents   = Datasets.load_data('chi_doc')
        >>> chi_population   = Datasets.load_data('chi_pop')
        >>> chi_travel_costs = Datasets.load_data('chi_times')

        >>> chi_docs_dents.head()
                 geoid  doc  dentist
        0  17031010100    1        1
        1  17031010201    0        1
        2  17031010202    4        1
        3  17031010300    4        1
        4  17031010400    0        2

        >>> chi_population.head()
                 geoid   pop
        0  17031010100  4854
        1  17031010201  6450
        2  17031010202  2818
        3  17031010300  6236
        4  17031010400  5042

        The `chi_travel_costs` dataset is the cost matrix, showing the travel time between each of the Census Tracts in the Chicago metro area.

        >>> chi_travel_costs.head()
                origin         dest   cost
        0  17093890101  17031010100  91.20
        1  17093890101  17031010201  92.82
        2  17093890101  17031010202  92.95
        3  17093890101  17031010300  89.40
        4  17093890101  17031010400  84.97

        Now, create an instance of the `Access` class and specify the demand, supply, and cost datasets.

        >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid",
                                          demand_value = "pop",
                                          supply_df = chi_docs_dents, supply_index = "geoid",
                                          supply_value = ["doc", "dentist"],
                                          cost_df = chi_travel_costs, cost_origin  = "origin",
                                          cost_dest = "dest", cost_name = "cost")

        With the demand, supply, and cost data provided, we can now produce the RAAM access measures defining a floating catchment area of 30 minutes by setting the tau value to 30 (60 minutes is the default).

        >>> chicago_primary_care.raam(tau = 30)
                     raam_doc  raam_dentist
        geoid
        17031010100  1.027597      1.137901
        17031010201  0.940239      1.332557
        17031010202  1.031144      1.413279
        ...........  ........      ........
        17197884101  2.365171      1.758800
        17197884103  2.244007      1.709857
        17197980100  2.225820      1.778264

        You can access the results stored in the `Access.access_df` attribute.

        >>> chicago_primary_care.access_df
                      pop  raam_doc  raam_dentist
        geoid
        17031010100  4854  1.027597      1.137901
        17031010201  6450  0.940239      1.332557
        17031010202  2818  1.031144      1.413279
        ...........   ....  ........      ........
        17197884101  4166  2.365171      1.758800
        17197884103  2776  2.244007      1.709857
        17197980100  3264  2.225820      1.778264


        By providing a string to the `name` argument, you can call the `Access.raam` method again using a different parameter of tau and save the outputs without overwriting previous ones.

        >>> chicago_primary_care.raam(name = "raam2", tau = 2)
        >>> chicago_primary_care.access_df
                      pop  raam_doc  raam_dentist  raam45_doc  raam45_dentist
        geoid
        17031010100  4854  1.027597      1.137901    0.967900        1.075116
        17031010201  6450  0.940239      1.332557    0.908518        1.133207
        17031010202  2818  1.031144      1.413279    0.962915        1.206775
        ...........   ....  ........      ........   ........       ........
        17197884101  4166  2.365171      1.758800    1.921161        1.495642
        17197884103  2776  2.244007      1.709857    1.900596        1.517022
        17197980100  3264  2.225820      1.778264    1.868281        1.582177

        If euclidean costs are available (see :meth:`Access.access.create_euclidean_distance`),
        you can use euclidean distance instead of time to calculate RAAM access measures. Insted of being measured in minutes, tau would now be measured in meters.

        >>> chicago_primary_care.raam(name = "raam_euclidean", tau = 100, cost = "euclidean")

        """

        assert (
            self.supply_value_provided == True
        ), "You must provide a supply value in order to use this functionality."

        cost = helpers.sanitize_supply_cost(self, cost, name)
        supply_values = helpers.sanitize_supplies(self, supply_values)

        for s in supply_values:

            raam_costs = raam.raam(
                demand_df=self.demand_df,
                supply_df=self.supply_df,
                cost_df=self.cost_df,
                demand_name=self.demand_value,
                supply_name=s,
                cost_origin=self.cost_origin,
                cost_dest=self.cost_dest,
                cost_name=cost,
                max_cycles=max_cycles,
                tau=tau,
                verbose=verbose,
                initial_step=initial_step,
                min_step=min_step,
            )

            raam_costs.name = name + "_" + s
            if raam_costs.name in self.access_df.columns:
                self.log.info("Overwriting {}.".format(raam_costs.name))
                self.access_df.drop(raam_costs.name, axis=1, inplace=True)

            # store the raw, un-normalized access values
            self.access_df = self.access_df.join(raam_costs)

        if normalize:

            columns = [name + "_" + s for s in supply_values]
            return helpers.normalized_access(self, columns)

        return self.access_df.filter(regex="^" + name, axis=1)

[docs]    def two_stage_fca(
        self,
        name="2sfca",
        cost=None,
        max_cost=None,
        supply_values=None,
        weight_fn=None,
        normalize=False,
    ):
        """Calculate the two-stage floating catchment area access score.
        Note that while the 'traditional' 2SFCA method does not weight inputs,
        most modern implementations do, and `weight_fn` is allowed as an argument.

        Parameters
        ----------
        name                : str
                              Column name for access values
        cost                : str
                              Name of cost value column in cost_df (supply-side)
        supply_values       : {str, list}
                              supply type or types.
        max_cost            : float
                              Cutoff of cost values
        weight_fn           : function
                              Weight to be applied to access values
        normalize           : bool
                              If True, return normalized access values; otherwise, return raw access values

        Returns
        -------

        access              : pandas Series
                              Accessibility score for origin locations.

        Examples
        --------

        Import the base `Access` class and `Datasets`.

        >>> from access import Access, Datasets

        Load each of the example datasets:

        >>> chi_docs_dents   = Datasets.load_data('chi_doc')
        >>> chi_population   = Datasets.load_data('chi_pop')
        >>> chi_travel_costs = Datasets.load_data('chi_times')

        >>> chi_docs_dents.head()
                 geoid  doc  dentist
        0  17031010100    1        1
        1  17031010201    0        1
        2  17031010202    4        1
        3  17031010300    4        1
        4  17031010400    0        2

        >>> chi_population.head()
                 geoid   pop
        0  17031010100  4854
        1  17031010201  6450
        2  17031010202  2818
        3  17031010300  6236
        4  17031010400  5042

        >>> chi_travel_costs.head()
                origin         dest   cost
        0  17093890101  17031010100  91.20
        1  17093890101  17031010201  92.82
        2  17093890101  17031010202  92.95
        3  17093890101  17031010300  89.40
        4  17093890101  17031010400  84.97

        Using the example data, create an `Access` object.

        >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid",
                                          demand_value = "pop",
                                          supply_df = chi_docs_dents, supply_index = "geoid",
                                          supply_value = ["doc", "dentist"],
                                          cost_df = chi_travel_costs, cost_origin  = "origin",
                                          cost_dest = "destination", cost_name = "cost",
                                          neighbor_cost_df = chi_travel_costs, neighbor_cost_origin = "origin",
                                          neighbor_cost_dest = 'dest', neighbor_cost_name = 'cost')

        >>> chicago_primary_care.two_stage_fca(name = '2sfca', max_cost = 60)
                      pop  2sfca_doc  2sfca_dentist
        geoid
        17031010100  4854   0.000697       0.000402
        17031010201  6450   0.000754       0.000455
        17031010202  2818   0.000717       0.000424
        ...........  ....   ........       ........
        17197884101  4166   0.000562       0.000370
        17197884103  2776   0.000384       0.000291
        17197980100  3264   0.000457       0.000325

        To create new values for two-stage catchment areas using a different `max_cost`, you can use a new `name` and a different `max_cost` parameter.

        >>> chicago_primary_care.two_stage_fca(name = '2sfca30', max_cost = 30)
                     2sfca30_doc  2sfca30_dentist
        geoid
        17031010100     0.000966         0.000480
        17031010201     0.000996         0.000552
        17031010202     0.000973         0.000542
        ...........     ........         ........
        17197884101     0.000225         0.000258
        17197884103     0.000375         0.000382
        17197980100     0.000352         0.000318

        Both newly created two stage fca measures are stored in the `access_df` attribute of the `Access` object.

        >>> chicago_primary_care.access_df.head()
                      pop  2sfca_doc  2sfca_dentist  2sfca30_doc  2sfca30_dentist
        geoid
        17031010100  4854   0.000697       0.000402     0.000963         0.000479
        17031010201  6450   0.000754       0.000455     0.000991         0.000551
        17031010202  2818   0.000717       0.000424     0.000973         0.000541
        17197884103  2776   0.000384       0.000291     0.000371         0.000377
        17197980100  3264   0.000457       0.000325     0.000348         0.000314
        """

        assert (
            self.supply_value_provided == True
        ), "You must provide a supply value in order to use this functionality."

        if cost is None:

            cost = self._default_cost
            if len(self.cost_names) > 1:
                self.log.info("Using default cost, {}, for {}.".format(cost, name))

        if cost not in self.cost_names:

            raise ValueError("{} not an available cost.".format(cost))

        if type(supply_values) is str:
            supply_values = [supply_values]
        if supply_values is None:
            supply_values = self.supply_types

        for s in supply_values:

            series = fca.two_stage_fca(
                demand_df=self.demand_df,
                demand_index=self.demand_df.index.name,
                demand_name=self.demand_value,
                supply_df=self.supply_df,
                supply_index=self.supply_df.index.name,
                supply_name=s,
                cost_df=self.cost_df,
                cost_origin=self.cost_origin,
                cost_dest=self.cost_dest,
                cost_name=cost,
                max_cost=max_cost,
                weight_fn=weight_fn,
                normalize=normalize,
            )

            series.name = name + "_" + s
            if series.name in self.access_df.columns:
                self.log.info("Overwriting {}.".format(series.name))
                self.access_df.drop(series.name, axis=1, inplace=True)

            self.access_df = self.access_df.join(series)

        if normalize:

            columns = [name + "_" + s for s in supply_values]
            return helpers.normalized_access(self, columns)

        return self.access_df.filter(regex="^" + name, axis=1)

[docs]    def enhanced_two_stage_fca(
        self,
        name="e2sfca",
        cost=None,
        supply_values=None,
        max_cost=None,
        weight_fn=None,
        normalize=False,
    ):
        """Calculate the enhanced two-stage floating catchment area access score.
        Note that the only 'practical' difference between this function and the
        :meth:`Access.access.two_stage_fca` is that the weight function from the original paper,
        `weights.step_fn({10 : 1, 20 : 0.68, 30 : 0.22})` is applied if none is provided.

        Parameters
        ----------
        name                : str
                              Column name for access values
        cost                : str
                              Name of cost value column in cost_df (supply-side)
        max_cost            : float
                              Cutoff of cost values
        supply_values       : {str, list}
                              supply type or types.
        weight_fn           : function
                              Weight to be applied to access values
        normalize           : bool
                              If True, return normalized access values; otherwise, return raw access values

        Returns
        -------

        access              : pandas Series
                              Accessibility score for origin locations.

        Examples
        --------

        Import the base `Access` class and `Datasets`.

        >>> from access import Access, Datasets

        Load each of the example datasets:

        >>> chi_docs_dents   = Datasets.load_data('chi_doc')
        >>> chi_population   = Datasets.load_data('chi_pop')
        >>> chi_travel_costs = Datasets.load_data('chi_times')

        >>> chi_docs_dents.head()
                 geoid  doc  dentist
        0  17031010100    1        1
        1  17031010201    0        1
        2  17031010202    4        1
        3  17031010300    4        1
        4  17031010400    0        2

        >>> chi_population.head()
                 geoid   pop
        0  17031010100  4854
        1  17031010201  6450
        2  17031010202  2818
        3  17031010300  6236
        4  17031010400  5042


        >>> chi_travel_costs.head()
                origin         dest   cost
        0  17093890101  17031010100  91.20
        1  17093890101  17031010201  92.82
        2  17093890101  17031010202  92.95
        3  17093890101  17031010300  89.40
        4  17093890101  17031010400  84.97

        Using the example data, create an `Access` object.

        >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid",
                                          demand_value = "pop",
                                          supply_df = chi_docs_dents, supply_index = "geoid",
                                          supply_value = ["doc", "dentist"],
                                          cost_df = chi_travel_costs, cost_origin  = "origin",
                                          cost_dest = "destination", cost_name = "cost")

        We can create multiple stepwise functions for weights.

        >>> fn30 = weights.step_fn({10 : 1, 20 : 0.68, 30 : 0.22})
        >>> fn60 = weights.step_fn({20 : 1, 40 : 0.68, 60 : 0.22})

        Using those two difference stepwise functions, we can create two separate enhanced two stage fca measures.

        >>> chicago_primary_care.enhanced_two_stage_fca(name = '2sfca30', weight_fn = fn30)
                     2sfca30_doc  2sfca30_dentist
        geoid
        17031010100     0.000970         0.000461
        17031010201     0.001080         0.000557
        17031010202     0.001027         0.000531
        ...........     ........         ........
        17197884101     0.000159         0.000241
        17197884103     0.000285         0.000342
        17197980100     0.000266         0.000310

        Note the use of the `name` argument in order to specify a different column name prefix for the access measure.

        >>> chicago_primary_care.enhanced_two_stage_fca(name = '2sfca60', weight_fn = fn60)
                     2sfca60_doc  2sfca60_dentist
        geoid
        17031010100     0.000687         0.000394
        17031010201     0.000750         0.000447
        17031010202     0.000720         0.000416
        ...........     ........         ........
        17197884101     0.000392         0.000301
        17197884103     0.000289         0.000243
        17197980100     0.000333         0.000268

        Both newly created enhanced two stage fca measures are stored in the `access_df` attribute of the `Access` object.

        >>> chicago_primary_care.access_df.head()
                      pop  2sfca30_doc  2sfca30_dentist  2sfca60_doc  2sfca60_dentist
        geoid
        17031010100  4854     0.000970         0.000461     0.000687         0.000394
        17031010201  6450     0.001080         0.000557     0.000750         0.000447
        17031010202  2818     0.001027         0.000531     0.000720         0.000416
        17031010300  6236     0.001030         0.000496     0.000710         0.000402
        17031010400  5042     0.000900         0.000514     0.000786         0.000430
        """

        assert (
            self.supply_value_provided == True
        ), "You must provide a supply value in order to use this functionality."

        if weight_fn is None:
            weight_fn = weights.step_fn({10: 1, 20: 0.68, 30: 0.22})

        return self.two_stage_fca(
            name, cost, max_cost, supply_values, weight_fn, normalize
        )

[docs]    def three_stage_fca(
        self,
        name="3sfca",
        cost=None,
        supply_values=None,
        max_cost=None,
        weight_fn=None,
        normalize=False,
    ):
        """Calculate the three-stage floating catchment area access score.

        Parameters
        ----------
        name                : str
                              Column name for access values
        cost                : str
                              Name of cost value column in cost_df (supply-side)
        max_cost            : float
                              Cutoff of cost values
        weight_fn           : function
                              Weight to be applied to access values
        normalize           : bool
                              If True, return normalized access values; otherwise, return raw access values

        Returns
        -------

        access              : pandas Series
                              Accessibility score for origin locations.

        Examples
        --------

        Import the base `Access` class and `Datasets`.

        >>> from access import Access, Datasets

        Load each of the example datasets:

        >>> chi_docs_dents   = Datasets.load_data('chi_doc')
        >>> chi_population   = Datasets.load_data('chi_pop')
        >>> chi_travel_costs = Datasets.load_data('chi_times')

        >>> chi_docs_dents.head()
                 geoid  doc  dentist
        0  17031010100    1        1
        1  17031010201    0        1
        2  17031010202    4        1
        3  17031010300    4        1
        4  17031010400    0        2

        >>> chi_population.head()
                 geoid   pop
        0  17031010100  4854
        1  17031010201  6450
        2  17031010202  2818
        3  17031010300  6236
        4  17031010400  5042

        >>> chi_travel_costs.head()
                origin         dest   cost
        0  17093890101  17031010100  91.20
        1  17093890101  17031010201  92.82
        2  17093890101  17031010202  92.95
        3  17093890101  17031010300  89.40
        4  17093890101  17031010400  84.97

        Using the example data, create an `Access` object.

        >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid",
                                          demand_value = "pop",
                                          supply_df = chi_docs_dents, supply_index = "geoid",
                                          supply_value = ["doc", "dentist"],
                                          cost_df = chi_travel_costs, cost_origin  = "origin",
                                          cost_dest = "destination", cost_name = "cost")

        >>> chicago_primary_care.three_stage_fca(name='3sfca')
                     3sfca_doc  3sfca_dentist
        geoid
        17031010100   0.001424       0.000690
        17031010201   0.001462       0.000785
        17031010202   0.001411       0.000767
        ...........   ........       ........
        17197884101   0.000285       0.000380
        17197884103   0.000404       0.000464
        17197980100   0.000365       0.000407

        The newly calculated 3sfca access measure is added to the `access_df` attribute of the `Access` object.

        >>> chicago_primary_care.access_df.head()
                             3sfca_doc  3sfca_dentist
        geoid
        17031010100   0.001447       0.000698
        17031010201   0.001487       0.000795
        17031010202   0.001420       0.000777
        17031010300   0.001479       0.000742
        17031010400   0.001274       0.000726
        """

        assert (
            self.supply_value_provided == True
        ), "You must provide a supply value in order to use this functionality."

        if weight_fn is None:
            weight_fn = weights.step_fn({10: 0.962, 20: 0.704, 30: 0.377, 60: 0.042})

        cost = helpers.sanitize_supply_cost(self, cost, name)
        supply_values = helpers.sanitize_supplies(self, supply_values)

        for s in supply_values:

            series = fca.three_stage_fca(
                demand_df=self.demand_df,
                demand_index=self.demand_df.index.name,
                demand_name=self.demand_value,
                supply_df=self.supply_df,
                supply_index=self.supply_df.index.name,
                supply_name=s,
                cost_df=self.cost_df,
                cost_origin=self.cost_origin,
                cost_dest=self.cost_dest,
                cost_name=cost,
                max_cost=max_cost,
                weight_fn=weight_fn,
                normalize=normalize,
            )

            series.name = name + "_" + s
            if series.name in self.access_df.columns:
                self.log.info("Overwriting {}.".format(series.name))
                self.access_df.drop(series.name, axis=1, inplace=True)

            # store the raw, un-normalized access values
            self.access_df = self.access_df.join(series)

        if normalize:

            columns = [name + "_" + s for s in supply_values]
            return helpers.normalized_access(self, columns)

        return self.access_df.filter(regex="^" + name, axis=1)

    @property
    def norm_access_df(self):
        for column in self.access_df.columns.difference([self.demand_value]):
            mean_access = (
                self.access_df[column] * self.access_df[self.demand_value]
            ).sum() / self.access_df[self.demand_value].sum()
            self.access_df[column] /= mean_access
        return self.access_df[self.access_df.columns.difference([self.demand_value])]

[docs]    def score(self, col_dict, name="score"):
        """Weighted aggregate of multiple already-calculated, normalized access components.

        Parameters
        ----------
        name                : str
                              Column name for access values
        col_dict            : dict
                              Column names (keys) and weights.

        Returns
        -------

        access              : pandas Series
                              Single, aggregate score for origin locations.

        Examples
        --------

        Import the base `Access` class and `Datasets`.

        >>> from access import Access, Datasets

        Load each of the example datasets which correspond to the demand (population), supply (doctors and dentists)
        and cost (travel time), respectively. The sample data represents the Chicago metro area with a 50km buffer around the city boundaries.

        >>> chi_docs_dents   = Datasets.load_data('chi_doc')
        >>> chi_population   = Datasets.load_data('chi_pop')
        >>> chi_travel_costs = Datasets.load_data('chi_times')

        >>> chi_docs_dents.head()
                 geoid  doc  dentist
        0  17031010100    1        1
        1  17031010201    0        1
        2  17031010202    4        1
        3  17031010300    4        1
        4  17031010400    0        2

        >>> chi_population.head()
                 geoid   pop
        0  17031010100  4854
        1  17031010201  6450
        2  17031010202  2818
        3  17031010300  6236
        4  17031010400  5042

        The `chi_travel_costs` dataset is the cost matrix, showing the travel time between each of the Census Tracts in the Chicago metro area.

        >>> chi_travel_costs.head()
                origin         dest   cost
        0  17093890101  17031010100  91.20
        1  17093890101  17031010201  92.82
        2  17093890101  17031010202  92.95
        3  17093890101  17031010300  89.40
        4  17093890101  17031010400  84.97

        Now, create an instance of the `Access` class and specify the demand, supply, and cost datasets.

        >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid",
                                          demand_value = "pop",
                                          supply_df = chi_docs_dents, supply_index = "geoid",
                                          supply_value = ["doc", "dentist"],
                                          cost_df = chi_travel_costs, cost_origin  = "origin",
                                          cost_dest = "dest", cost_name = "cost")

        With the demand, supply, and cost data provided, we can now produce the RAAM access measures defining a floating catchment area of 30 minutes by setting the tau value to 30 (60 minutes is the default).

        >>> chicago_primary_care.raam(tau = 30)
                     raam_doc  raam_dentist
        geoid
        17031010100  1.027597      1.137901
        17031010201  0.940239      1.332557
        17031010202  1.031144      1.413279
        ...........  ........      ........
        17197884101  2.365171      1.758800
        17197884103  2.244007      1.709857
        17197980100  2.225820      1.778264

        Aggregate RAAM for doctors and dentists, weighting doctors more heavily.

        >>> chicago_primary_care.score(name = "raam_combo", col_dict = {"raam_doc" : 0.8, "raam_dentist" : 0.2})
        geoid
        17031010100    0.786697
        17031010201    0.765081
        17031010202    0.831578
        ...........    ........
        17197884101    1.677075
        17197884103    1.597554
        17197980100    1.597386
        """

        for v in col_dict:
            if v not in self.access_df.columns:
                raise ValueError("{} is not a calculated access value".format(v))

        weights = pd.Series(col_dict)

        weighted_score = self.norm_access_df[weights.index].dot(weights)

        weighted_score.name = name
        if weighted_score.name in self.access_df.columns:
            self.log.info("Overwriting {}.".format(weighted_score.name))
            self.access_df.drop(weighted_score.name, axis=1, inplace=True)

        self.access_df = self.access_df.join(weighted_score)

        return weighted_score

    @property
    def default_cost(self):
        return self._default_cost

    @default_cost.setter
    def default_cost(self, new_cost):
        """Change the default cost measure."""

        if new_cost in self.cost_names:
            self._default_cost = new_cost

        else:
            raise ValueError("Tried to set cost not available in cost df")

    @property
    def neighbor_default_cost(self):
        return self._neighbor_default_cost

    @neighbor_default_cost.setter
    def neighbor_default_cost(self, new_cost):
        """Change the default cost measure."""

        if new_cost in self.neighbor_cost_names:
            self._neighbor_default_cost = new_cost

        else:
            raise ValueError("Tried to set cost not available in cost df")

[docs]    def append_user_cost(self, new_cost_df, origin, destination, name):
        """Create a user cost, from demand to supply locations.

        Parameters
        ----------
        new_cost_df         : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
                              Holds the new cost....
        name                : str
                              Name of the new cost variable in new_cost_df
        origin              : str
                              Name of the new origin variable in new_cost_df
        destination         : str
                              Name of the new destination variable in new_cost_df

        Examples
        --------

        Import the base `Access` class and `Datasets`.

        >>> from access import Access, Datasets

        Load each of the example datasets:

        >>> chi_docs_dents   = Datasets.load_data('chi_doc')
        >>> chi_population   = Datasets.load_data('chi_pop')
        >>> chi_travel_costs = Datasets.load_data('chi_times')

        >>> chi_docs_dents.head()
                 geoid  doc  dentist
        0  17031010100    1        1
        1  17031010201    0        1
        2  17031010202    4        1
        3  17031010300    4        1
        4  17031010400    0        2

        >>> chi_population.head()
                 geoid   pop
        0  17031010100  4854
        1  17031010201  6450
        2  17031010202  2818
        3  17031010300  6236
        4  17031010400  5042

        >>> chi_travel_costs.head()
                origin         dest   cost
        0  17093890101  17031010100  91.20
        1  17093890101  17031010201  92.82
        2  17093890101  17031010202  92.95
        3  17093890101  17031010300  89.40
        4  17093890101  17031010400  84.97

        Using the example data, create an `Access` object.

        >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid",
                                          demand_value = "pop",
                                          supply_df = chi_docs_dents, supply_index = "geoid",
                                          supply_value = ["doc", "dentist"],
                                          cost_df = chi_travel_costs, cost_origin  = "origin",
                                          cost_dest = "destination", cost_name = "cost")

        To add a new cost from demand to supply locations, first load the new cost data.

        >>> euclidean_cost = Datasets.load_data('chi_euclidean')
            euclidean_cost.head()
                origin         dest     euclidean
        0  17093890101  17031010100  63630.788476
        1  17093890101  17031010201  62632.675522
        2  17093890101  17031010202  63073.735631
        3  17093890101  17031010300  63520.029749
        4  17093890101  17031010400  63268.514352

        Add new cost data to existing `Access` instance.

        >>> chicago_primary_care.append_user_cost(new_cost_df = euclidean_cost,
                                           name = "euclidean",
                                           origin = "origin",
                                           destination = "dest")

        The newly added cost data can be seen in the `cost_df` attribute.

        >>> chicago_primary_care.cost_df.head()
                origin         dest   cost     euclidean
        0  17093890101  17031010100  91.20  63630.788476
        1  17093890101  17031010201  92.82  62632.675522
        2  17093890101  17031010202  92.95  63073.735631
        3  17093890101  17031010300  89.40  63520.029749
        4  17093890101  17031010400  84.97  63268.514352

        """

        # Add it to the list of costs.
        self.cost_df = self.cost_df.merge(
            new_cost_df[[origin, destination, name]],
            how="outer",
            left_on=[self.cost_origin, self.cost_dest],
            right_on=[origin, destination],
        )
        self.cost_names.append(name)

[docs]    def append_user_cost_neighbors(self, new_cost_df, origin, destination, name):
        """Create a user cost, from supply locations to other supply locations.

        Parameters
        ----------
        new_cost_df         : `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
                              Holds the new cost....
        cost                : str
                              Name of the new cost variable in new_cost_df
        origin              : str
                              Name of the new origin variable in new_cost_df
        destination         : str
                              Name of the new destination variable in new_cost_df

        Examples
        --------

        Import the base `Access` class and `Datasets`.

        >>> from access import Access, Datasets

        Load each of the example datasets:

        >>> chi_docs_dents   = Datasets.load_data('chi_doc')
        >>> chi_population   = Datasets.load_data('chi_pop')
        >>> chi_travel_costs = Datasets.load_data('chi_times')

        >>> chi_docs_dents.head()
                 geoid  doc  dentist
        0  17031010100    1        1
        1  17031010201    0        1
        2  17031010202    4        1
        3  17031010300    4        1
        4  17031010400    0        2

        >>> chi_population.head()
                 geoid   pop
        0  17031010100  4854
        1  17031010201  6450
        2  17031010202  2818
        3  17031010300  6236
        4  17031010400  5042

        >>> chi_travel_costs.head()
                origin         dest   cost
        0  17093890101  17031010100  91.20
        1  17093890101  17031010201  92.82
        2  17093890101  17031010202  92.95
        3  17093890101  17031010300  89.40
        4  17093890101  17031010400  84.97

        Using the example data, create an `Access` object.

        >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid",
                                          demand_value = "pop",
                                          supply_df = chi_docs_dents, supply_index = "geoid",
                                          supply_value = ["doc", "dentist"],
                                          cost_df = chi_travel_costs, cost_origin  = "origin",
                                          cost_dest = "destination", cost_name = "cost")

        To add a new cost from demand to supply locations, first load the new cost data.

        >>> euclidean_cost_neighbors = Datasets.load_data('chi_euclidean_neighbors')
            euclidean_cost_neighbors.head()
                origin         dest  euclidean_neighbors
        0  17031010100  17031010100             0.000000
        1  17031010100  17031010201           998.259243
        2  17031010100  17031010202           635.203387
        3  17031010100  17031010300           653.415713
        4  17031010100  17031010400          2065.375554

        Add new cost data to existing `Access` instance.

        >>> chicago_primary_care.append_user_cost_neighbors(new_cost_df = euclidean_cost_neighbors,
                                                     name = "euclidean_neighbors",
                                                     origin = "origin",
                                                     destination = "dest")

        The newly added cost data can be seen in the `neighbor_cost_df` attribute.

        >>> chicago_primary_care.neighbor_cost_df.head()
                origin         dest   cost   euclidean_neighbors
        0  17093890101  17031010100  91.20          63630.788476
        1  17093890101  17031010201  92.82          62632.675522
        2  17093890101  17031010202  92.95          63073.735631
        3  17093890101  17031010300  89.40          63520.029749
        4  17093890101  17031010400  84.97          63268.514352
        """

        # Add it to the list of costs.
        self.neighbor_cost_df = self.neighbor_cost_df.merge(
            new_cost_df[[origin, destination, name]],
            how="outer",
            left_on=[self.neighbor_cost_origin, self.neighbor_cost_dest],
            right_on=[origin, destination],
        )
        self.neighbor_cost_names.append(name)

[docs]    def create_euclidean_distance(
        self, name="euclidean", threshold=0, centroid_o=False, centroid_d=False
    ):
        """Calculate the Euclidean distance from demand to supply locations.
        This is simply the geopandas `distance` function.
        The user is responsible for putting the geometries into an appropriate reference system.

        Parameters
        ----------
        name                : str
                              Column name for euclidean distances
        threshold           : int
                              Buffer threshold for non-point geometries, AKA max_distance
        centroid_o          : bool
                              If True, convert geometries of demand_df (origins) to centroids; otherwise, no change
        centroid_d          : bool
                              If True, convert geometries of supply_df (destinations) to centroids; otherwise, no change

        Examples
        --------

        NOTE: Creating euclidean distance measures requires having a geometry column in a `geopandas.GeoDataFrame <http://geopandas.org/reference/geopandas.GeoDataFrame.html>`_.

        Import the base `Access` class and `Datasets`.

        >>> from access import Access, Datasets

        Load each of the example datasets which correspond to the demand (population), supply (doctors and dentists)
        and cost (travel time), respectively. The sample data represents the Chicago metro area with a 50km buffer around the city boundaries.

        >>> chi_docs_dents   = Datasets.load_data('chi_doc_geom')
        >>> chi_population   = Datasets.load_data('chi_pop_geom')
        >>> chi_travel_costs = Datasets.load_data('chi_times')

        >>> chi_docs_dents.head()
                     doc  dentist                       geometry
        geoid
        17031010100    1        1  POINT (354916.992 594670.505)
        17031010201    0        1  POINT (354105.876 594088.600)
        17031010202    4        1  POINT (354650.684 594093.822)
        17031010300    4        1  POINT (355209.361 594086.149)
        17031010400    0        2  POINT (355809.748 592808.043)

        >>> chi_population.head()
                      pop                       geometry
        geoid
        17031010100  4854  POINT (354916.992 594670.505)
        17031010201  6450  POINT (354105.876 594088.600)
        17031010202  2818  POINT (354650.684 594093.822)
        17031010300  6236  POINT (355209.361 594086.149)
        17031010400  5042  POINT (355809.748 592808.043)

        The `chi_travel_costs` dataset is the cost matrix, showing the travel time between each of the Census Tracts in the Chicago metro area.

        >>> chi_travel_costs.head()
                origin         dest   cost
        0  17093890101  17031010100  91.20
        1  17093890101  17031010201  92.82
        2  17093890101  17031010202  92.95
        3  17093890101  17031010300  89.40
        4  17093890101  17031010400  84.97

        Now, create an instance of the `Access` class and specify the demand, supply, and cost datasets.

        >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid",
                                          demand_value = "pop",
                                          supply_df = chi_docs_dents, supply_index = "geoid",
                                          supply_value = ["doc", "dentist"],
                                          cost_df = chi_travel_costs, cost_origin  = "origin",
                                          cost_dest = "dest", cost_name = "cost")

        To calculate euclidean distances between Census Tracts within 250km of eachother, you can set the `threshold` to 250000 (meters). Setting `centroid_o` and `centroid_d` to `True` calculates the centroid of the geom in your dataset.

        >>> chicago_primary_care.create_euclidean_distance(threshold = 250000, centroid_o = True, centroid_d = True)

        The newly calculated euclidean costs are added to the `cost_df` attribute of the `Access` class.

        >>> chicago_primary_care_geom.cost_df.head()
                origin         dest   cost     euclidean
        0  17093890101  17031010100  91.20  63630.788476
        1  17093890101  17031010201  92.82  62632.675522
        2  17093890101  17031010202  92.95  63073.735631
        3  17093890101  17031010300  89.40  63520.029749
        4  17093890101  17031010400  84.97  63268.514352
        """
        import geopandas as gpd

        # TO-DO: check for unprojected geometries

        # Continue if the dataframes are geodataframes, else throw an error
        if type(self.demand_df) is not gpd.GeoDataFrame:
            raise TypeError(
                "Cannot calculate euclidean distance without a geometry of demand side"
            )

        if type(self.supply_df) is not gpd.GeoDataFrame:
            raise TypeError(
                "Cannot calculate euclidean distance without a geometry of supply side"
            )

        # Reset the index so that the geoids are accessible
        df1 = self.demand_df.rename_axis("origin").reset_index()
        df2 = self.supply_df.rename_axis("dest").reset_index()

        # Convert to centroids if so-specified
        if centroid_o:
            df1.set_geometry(df1.centroid, inplace=True)
        if centroid_d:
            df2.set_geometry(df2.centroid, inplace=True)

        # Calculate the distances.
        if (df1.geom_type == "Point").all() & (df2.geom_type == "Point").all():
            # If both geometries are point types, merge on a temporary dummy column
            df1["temp"] = 1
            df2["temp"] = 1
            df1and2 = df1[["temp", "geometry", "origin"]].merge(
                df2[["temp", "geometry", "dest"]].rename(columns={"geometry": "geomb"})
            )
            df1and2.drop("temp", inplace=True, axis=1)
            df1and2[name] = df1and2.distance(df1and2.set_geometry("geomb"))
        else:
            # Execute an sjoin for non-point geometries, based upon a buffer zone
            df1and2 = gpd.sjoin(
                df1,
                df2.rename(columns={"geometry": "geomb"}).set_geometry(
                    df2.buffer(threshold)
                ),
            )
            df1and2[name] = df1and2.distance(df1and2.set_geometry("geomb"))

        # Add it to the cost df.
        df1and2 = df1and2[df1and2[name] < threshold]

        if name in self.cost_df.columns:
            self.log.info("Overwriting {}.".format(name))
            self.cost_df.drop(name, axis=1, inplace=True)

        self.cost_df = self.cost_df.merge(
            df1and2[[name, "origin", "dest"]],
            how="outer",
            left_on=[self.cost_origin, self.cost_dest],
            right_on=["origin", "dest"],
        )

        # Add it to the list of costs.
        if name not in self.cost_names:
            self.cost_names.append(name)
        # Set the default cost if it does not exist
        if not hasattr(self, "_default_cost"):
            self._default_cost = name

[docs]    def create_euclidean_distance_neighbors(
        self, name="euclidean", threshold=0, centroid=False
    ):
        """Calculate the Euclidean distance among demand locations.

        Parameters
        ----------
        name                : str
                              Column name for euclidean distances neighbors
        threshold           : int
                              Buffer threshold for non-point geometries, AKA max_distance
        centroid            : bool
                              If True, convert geometries to centroids; otherwise, no change

        Examples
        --------

        NOTE: Creating euclidean distance measures requires having a geometry column in a `geopandas.GeoDataFrame <http://geopandas.org/reference/geopandas.GeoDataFrame.html>`_.

        Import the base `Access` class and `Datasets`.

        >>> from access import Access, Datasets

        Load each of the example datasets which correspond to the demand (population), supply (doctors and dentists)
        and cost (travel time), respectively. The sample data represents the Chicago metro area with a 50km buffer around the city boundaries.

        >>> chi_docs_dents   = Datasets.load_data('chi_doc_geom')
        >>> chi_population   = Datasets.load_data('chi_pop_geom')
        >>> chi_travel_costs = Datasets.load_data('chi_times')

        >>> chi_docs_dents.head()
                     doc  dentist                       geometry
        geoid
        17031010100    1        1  POINT (354916.992 594670.505)
        17031010201    0        1  POINT (354105.876 594088.600)
        17031010202    4        1  POINT (354650.684 594093.822)
        17031010300    4        1  POINT (355209.361 594086.149)
        17031010400    0        2  POINT (355809.748 592808.043)

        >>> chi_population.head()
                      pop                       geometry
        geoid
        17031010100  4854  POINT (354916.992 594670.505)
        17031010201  6450  POINT (354105.876 594088.600)
        17031010202  2818  POINT (354650.684 594093.822)
        17031010300  6236  POINT (355209.361 594086.149)
        17031010400  5042  POINT (355809.748 592808.043)

        The `chi_travel_costs` dataset is the cost matrix, showing the travel time between each of the Census Tracts in the Chicago metro area.

        >>> chi_travel_costs.head()
                origin         dest   cost
        0  17093890101  17031010100  91.20
        1  17093890101  17031010201  92.82
        2  17093890101  17031010202  92.95
        3  17093890101  17031010300  89.40
        4  17093890101  17031010400  84.97

        Make sure you assign your desired geometry projection, which you can change as follows.

        >>> chi_population = chi_population.to_crs(epsg = 2790)
        >>> chi_docs_dents = chi_docs_dents.to_crs(epsg = 2790)

        Now, create an instance of the `Access` class and specify the demand, supply, and cost datasets.

        >>> chicago_primary_care = Access(demand_df = chi_population, demand_index = "geoid",
                                          demand_value = "pop",
                                          supply_df = chi_docs_dents, supply_index = "geoid",
                                          supply_value = ["doc", "dentist"],
                                          cost_df = chi_travel_costs, cost_origin  = "origin",
                                          cost_dest = "dest", cost_name = "cost")

        To calculate euclidean distances between Census Tracts within 250km of eachother, you can set the `threshold` to 250000 (meters). Setting `centroid_o` and `centroid_d` to `True` calculates the centroid of the geom in your dataset.

        >>> chicago_primary_care.create_euclidean_distance_neighbors(name= 'euclidean_neighbors', threshold = 250000, centroid_o = True, centroid_d = True)

        The newly calculated euclidean distance is stored in the `neighbor_cost_df` attribute.

        >>> chicago_primary_care_geom.neighbor_cost_df.head()
                origin         dest  euclidean_neighbors
        0  17031010100  17031010100             0.000000
        1  17031010100  17031010201           998.259243
        2  17031010100  17031010202           635.203387
        3  17031010100  17031010300           653.415713
        4  17031010100  17031010400          2065.375554
        """
        import geopandas as gpd

        # TO-DO: check for unprojected geometries

        # Continue if the dataframes are geodataframes, else throw an error
        if type(self.demand_df) is not gpd.GeoDataFrame:
            raise TypeError(
                "Cannot calculate euclidean distance without a geometry of supply side"
            )

        # Reset the index so that the geoids are accessible
        df1 = self.demand_df.rename_axis("origin").reset_index()
        df2 = self.demand_df.rename_axis("dest").reset_index()

        # Convert to centroids if so-specified
        if centroid:
            df1.set_geometry(df1.centroid, inplace=True)
            df2.set_geometry(df2.centroid, inplace=True)

        # Calculate the distances.
        if (df1.geom_type == "Point").all() & (df2.geom_type == "Point").all():
            # If both geometries are point types, merge on a temporary dummy column
            df1["temp"] = 1
            df2["temp"] = 1
            df1and2 = df1[["temp", "geometry", "origin"]].merge(
                df2[["temp", "geometry", "dest"]].rename(columns={"geometry": "geomb"})
            )
            df1and2.drop("temp", inplace=True, axis=1)
            df1and2[name] = df1and2.distance(df1and2.set_geometry("geomb"))
        else:
            # Execute an sjoin for non-point geometries, based upon a buffer zone
            df1and2 = gpd.sjoin(
                df1,
                df2.rename(columns={"geometry": "geomb"}).set_geometry(
                    df2.buffer(threshold)
                ),
            )
            df1and2[name] = df1and2.distance(df1and2.set_geometry("geomb"))

        # Add it to the cost df.
        df1and2 = df1and2[df1and2[name] < threshold]
        self.neighbor_cost_df = self.neighbor_cost_df.merge(
            df1and2[[name, "origin", "dest"]],
            how="outer",
            left_on=[self.neighbor_cost_origin, self.neighbor_cost_dest],
            right_on=["origin", "dest"],
        )
        # Add it to the list of costs.
        self.neighbor_cost_names.append(name)
        # Set the default cost if it does not exist
        if not hasattr(self, "_neighbor_default_cost"):
            self._neighbor_default_cost = name