Skip to content

Commit fcef6b3

Browse files
authored
Merge pull request #7 from outerl/deterministic-alt-sample-randoms
Deterministic alt sample randoms
2 parents 08b2b03 + 10db123 commit fcef6b3

23 files changed

Lines changed: 1206 additions & 684 deletions

.github/workflows/core_tests.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ jobs:
141141
matrix:
142142
region:
143143
- prototype_mtc
144+
- prototype_arc
144145
- placeholder_psrc
145146
- prototype_marin
146147
- prototype_mtc_extended

activitysim/abm/models/location_choice.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from activitysim.core.exceptions import DuplicateWorkflowTableError
1919
from activitysim.core.interaction_sample import interaction_sample
2020
from activitysim.core.interaction_sample_simulate import interaction_sample_simulate
21+
from activitysim.core.logit import AltsContext
2122
from activitysim.core.util import reindex
2223

2324
"""
@@ -603,6 +604,7 @@ def run_location_simulate(
603604
chunk_tag,
604605
trace_label,
605606
skip_choice=False,
607+
alts_context: AltsContext | None = None,
606608
):
607609
"""
608610
run location model on location_sample annotated with mode_choice logsum
@@ -712,6 +714,7 @@ def run_location_simulate(
712714
compute_settings=model_settings.compute_settings.subcomponent_settings(
713715
"simulate"
714716
),
717+
alts_context=alts_context,
715718
)
716719

717720
if not want_logsums:
@@ -737,6 +740,7 @@ def run_location_choice(
737740
chunk_tag,
738741
trace_label,
739742
skip_choice=False,
743+
alts_context: AltsContext | None = None,
740744
):
741745
"""
742746
Run the three-part location choice algorithm to generate a location choice for each chooser
@@ -756,6 +760,8 @@ def run_location_choice(
756760
model_settings : dict
757761
chunk_size : int
758762
trace_label : str
763+
skip_choice : bool
764+
alts_context : AltsContext or None
759765
760766
Returns
761767
-------
@@ -788,6 +794,9 @@ def run_location_choice(
788794
if choosers.shape[0] == 0:
789795
logger.info(f"{trace_label} skipping segment {segment_name}: no choosers")
790796
continue
797+
# dest_size_terms contains 0-attraction zones so using this directly here, important for stable error terms
798+
# when a zone goes from 0 base -> nonzero project
799+
alts_context = AltsContext.from_series(dest_size_terms.index)
791800

792801
# - location_sample
793802
location_sample_df = run_location_sample(
@@ -841,6 +850,7 @@ def run_location_choice(
841850
trace_label, "simulate.%s" % segment_name
842851
),
843852
skip_choice=skip_choice,
853+
alts_context=alts_context,
844854
)
845855

846856
if estimator:

activitysim/abm/models/parking_location_choice.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from activitysim.core.configuration.base import PreprocessorSettings
2222
from activitysim.core.configuration.logit import LogitComponentSettings
2323
from activitysim.core.interaction_sample_simulate import interaction_sample_simulate
24+
from activitysim.core.logit import AltsContext
2425
from activitysim.core.tracing import print_elapsed_time
2526
from activitysim.core.util import assign_in_place, drop_unused_columns
2627
from activitysim.core.exceptions import DuplicateWorkflowTableError
@@ -112,6 +113,7 @@ def parking_destination_simulate(
112113
chunk_size,
113114
trace_hh_id,
114115
trace_label,
116+
alts_context: AltsContext | None = None,
115117
):
116118
"""
117119
Chose destination from destination_sample (with od_logsum and dp_logsum columns added)
@@ -150,6 +152,7 @@ def parking_destination_simulate(
150152
trace_label=trace_label,
151153
trace_choice_name="parking_loc",
152154
explicit_chunk_size=model_settings.explicit_chunk,
155+
alts_context=alts_context,
153156
)
154157

155158
# drop any failed zero_prob destinations
@@ -211,6 +214,9 @@ def choose_parking_location(
211214
)
212215
destination_sample.index = np.repeat(trips.index.values, len(alternatives))
213216
destination_sample.index.name = trips.index.name
217+
# use full land_use index to ensure AltsContext spans full range of potential zones
218+
land_use = state.get_dataframe("land_use")
219+
alts_context = AltsContext.from_series(land_use.index)
214220

215221
destinations = parking_destination_simulate(
216222
state,
@@ -223,6 +229,7 @@ def choose_parking_location(
223229
chunk_size=chunk_size,
224230
trace_hh_id=trace_hh_id,
225231
trace_label=trace_label,
232+
alts_context=alts_context,
226233
)
227234

228235
if want_sample_table:

activitysim/abm/models/trip_destination.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from activitysim.core.configuration.logit import LocationComponentSettings
3333
from activitysim.core.interaction_sample import interaction_sample
3434
from activitysim.core.interaction_sample_simulate import interaction_sample_simulate
35+
from activitysim.core.logit import AltsContext
3536
from activitysim.core.skim_dictionary import DataFrameMatrix
3637
from activitysim.core.tracing import print_elapsed_time
3738
from activitysim.core.util import assign_in_place, reindex
@@ -950,6 +951,7 @@ def trip_destination_simulate(
950951
skim_hotel,
951952
estimator,
952953
trace_label,
954+
alts_context: AltsContext | None = None,
953955
):
954956
"""
955957
Chose destination from destination_sample (with od_logsum and dp_logsum columns added)
@@ -1036,6 +1038,7 @@ def trip_destination_simulate(
10361038
trace_choice_name="trip_dest",
10371039
estimator=estimator,
10381040
explicit_chunk_size=model_settings.explicit_chunk,
1041+
alts_context=alts_context,
10391042
)
10401043

10411044
if not want_logsums:
@@ -1080,6 +1083,10 @@ def choose_trip_destination(
10801083

10811084
t0 = print_elapsed_time()
10821085

1086+
# use full index (including zero-size zones) to ensure stable random results
1087+
# fetch alts_context early so we don't worry about mutating alternatives first
1088+
alts_context = AltsContext.from_series(alternatives.index)
1089+
10831090
# - trip_destination_sample
10841091
destination_sample = trip_destination_sample(
10851092
state,
@@ -1126,7 +1133,6 @@ def choose_trip_destination(
11261133
destination_sample["dp_logsum"] = 0.0
11271134

11281135
t0 = print_elapsed_time("%s.compute_logsums" % trace_label, t0, debug=True)
1129-
11301136
destinations = trip_destination_simulate(
11311137
state,
11321138
primary_purpose=primary_purpose,
@@ -1138,6 +1144,7 @@ def choose_trip_destination(
11381144
skim_hotel=skim_hotel,
11391145
estimator=estimator,
11401146
trace_label=trace_label,
1147+
alts_context=alts_context,
11411148
)
11421149

11431150
dropped_trips = ~trips.index.isin(destinations.index)

activitysim/abm/models/trip_scheduling_choice.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,15 @@ def run_trip_scheduling_choice(
279279
) in chunk.adaptive_chunked_choosers(state, indirect_tours, trace_label):
280280
# Sort the choosers and get the schedule alternatives
281281
choosers = choosers.sort_index()
282+
# FIXME-EET: For explicit error term choices, we need a stable alternative ID. Currently, we use
283+
# SCHEDULE_ID, which justs enumerates all schedule alternatives, of which there are choosers times
284+
# alternative, in the order they are processed, which depends on if there stops on outward/return leg.
285+
# We might want to change SCHEDULE_ID to a fixed pattern of all possible combinations of
286+
# (outbound, main, inbound) duration for the maximum possible tour duration (max time window). For
287+
# 30min intervals, this leads to 1225 alternatives and therefore reasonable memory-wise for random numbers.
288+
# It looks like all that would need to change for this is the generation of the schedule alternatives and
289+
# the lookup of choices as elements in schedule after simulation because choosers are indexed by tour_id.
290+
282291
schedules = generate_schedule_alternatives(choosers).sort_index()
283292

284293
# preprocessing alternatives

activitysim/abm/models/util/tour_destination.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from activitysim.core.configuration.logit import TourLocationComponentSettings
2222
from activitysim.core.interaction_sample import interaction_sample
2323
from activitysim.core.interaction_sample_simulate import interaction_sample_simulate
24+
from activitysim.core.logit import AltsContext
2425
from activitysim.core.util import reindex
2526

2627
logger = logging.getLogger(__name__)
@@ -873,6 +874,10 @@ def run_destination_simulate(
873874
state.tracing.dump_df(DUMP, choosers, trace_label, "choosers")
874875

875876
log_alt_losers = state.settings.log_alt_losers
877+
# use full land_use index to ensure AltsContext spans full range of potential destinations
878+
# (maintains stable random number generation even if zones flip zero/non-zero size)
879+
land_use = state.get_dataframe("land_use")
880+
alts_context = AltsContext.from_series(land_use.index)
876881

877882
choices = interaction_sample_simulate(
878883
state,
@@ -891,6 +896,7 @@ def run_destination_simulate(
891896
estimator=estimator,
892897
skip_choice=skip_choice,
893898
compute_settings=model_settings.compute_settings,
899+
alts_context=alts_context,
894900
)
895901

896902
if not want_logsums:

activitysim/abm/models/util/vectorize_tour_scheduling.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from activitysim.core.configuration.base import ComputeSettings, PreprocessorSettings
1818
from activitysim.core.configuration.logit import LogitComponentSettings
1919
from activitysim.core.interaction_sample_simulate import interaction_sample_simulate
20+
from activitysim.core.logit import AltsContext
2021
from activitysim.core.util import reindex
2122

2223
logger = logging.getLogger(__name__)
@@ -849,6 +850,9 @@ def _schedule_tours(
849850
estimator.write_interaction_sample_alternatives(alt_tdd)
850851

851852
log_alt_losers = state.settings.log_alt_losers
853+
# use full TDD alternatives index to ensure AltsContext spans full range of potential slots
854+
tdd_alts = state.get_injectable("tdd_alts")
855+
alts_context = AltsContext.from_series(tdd_alts.index)
852856

853857
choices = interaction_sample_simulate(
854858
state,
@@ -862,6 +866,7 @@ def _schedule_tours(
862866
trace_label=tour_trace_label,
863867
estimator=estimator,
864868
compute_settings=compute_settings,
869+
alts_context=alts_context,
865870
)
866871
chunk_sizer.log_df(tour_trace_label, "choices", choices)
867872

activitysim/abm/test/test_misc/test_trip_scheduling_choice.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1-
import numpy as np
2-
import pandas as pd
3-
import pytest
1+
from __future__ import annotations
2+
43
import os
54
from pathlib import Path
65

6+
import numpy as np
7+
import pandas as pd
8+
import pytest
79

810
from activitysim.abm.models import trip_scheduling_choice as tsc
911
from activitysim.abm.tables.skims import skim_dict

activitysim/core/interaction_sample_simulate.py

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@
99

1010
from activitysim.core import chunk, interaction_simulate, logit, tracing, util, workflow
1111
from activitysim.core.configuration.base import ComputeSettings
12-
from activitysim.core.simulate import set_skim_wrapper_targets
1312
from activitysim.core.exceptions import SegmentedSpecificationError
13+
from activitysim.core.logit import AltsContext
14+
from activitysim.core.simulate import set_skim_wrapper_targets
1415

1516
logger = logging.getLogger(__name__)
1617

@@ -34,6 +35,7 @@ def _interaction_sample_simulate(
3435
*,
3536
chunk_sizer: chunk.ChunkSizer,
3637
compute_settings: ComputeSettings | None = None,
38+
alts_context: AltsContext | None = None,
3739
):
3840
"""
3941
Run a MNL simulation in the situation in which alternatives must
@@ -220,9 +222,6 @@ def _interaction_sample_simulate(
220222
)
221223
chunk_sizer.log_df(trace_label, "interaction_utilities", interaction_utilities)
222224

223-
del interaction_df
224-
chunk_sizer.log_df(trace_label, "interaction_df", None)
225-
226225
if have_trace_targets:
227226
state.tracing.trace_interaction_eval_results(
228227
trace_eval_results,
@@ -264,19 +263,29 @@ def _interaction_sample_simulate(
264263

265264
# insert the zero-prob utilities to pad each alternative set to same size
266265
padded_utilities = np.insert(interaction_utilities.utility.values, inserts, -999)
266+
padded_alt_nrs = np.insert(interaction_df[choice_column], inserts, -999)
267267
chunk_sizer.log_df(trace_label, "padded_utilities", padded_utilities)
268-
del inserts
269268

270-
del interaction_utilities
271-
chunk_sizer.log_df(trace_label, "interaction_utilities", None)
269+
del interaction_df
270+
chunk_sizer.log_df(trace_label, "interaction_df", None)
271+
272+
del inserts
272273

273274
# reshape to array with one row per chooser, one column per alternative
274275
padded_utilities = padded_utilities.reshape(-1, max_sample_count)
276+
padded_alt_nrs = padded_alt_nrs.reshape(-1, max_sample_count)
275277

276278
# convert to a dataframe with one row per chooser and one column per alternative
277279
utilities_df = pd.DataFrame(padded_utilities, index=choosers.index)
278280
chunk_sizer.log_df(trace_label, "utilities_df", utilities_df)
279281

282+
# alt_nrs_df has columns for each alt in the choice set, with values indicating which alt_id
283+
# they correspond to (as opposed to the 0-n index implied by the column number).
284+
if alts_context is not None:
285+
alt_nrs_df = pd.DataFrame(padded_alt_nrs, index=choosers.index)
286+
else:
287+
alt_nrs_df = None # if we don't provide the number of dense alternatives, assume that we'll use the old approach
288+
280289
del padded_utilities
281290
chunk_sizer.log_df(trace_label, "padded_utilities", None)
282291

@@ -320,7 +329,12 @@ def _interaction_sample_simulate(
320329
# positions is series with the chosen alternative represented as a column index in utilities_df
321330
# which is an integer between zero and num alternatives in the alternative sample
322331
positions, rands = logit.make_choices_utility_based(
323-
state, utilities_df, trace_label=trace_label, trace_choosers=choosers
332+
state,
333+
utilities_df,
334+
trace_label=trace_label,
335+
trace_choosers=choosers,
336+
alts_context=alts_context,
337+
alt_nrs_df=alt_nrs_df,
324338
)
325339

326340
del utilities_df
@@ -451,6 +465,7 @@ def interaction_sample_simulate(
451465
skip_choice=False,
452466
explicit_chunk_size=0,
453467
*,
468+
alts_context: AltsContext | None = None,
454469
compute_settings: ComputeSettings | None = None,
455470
):
456471
"""
@@ -496,6 +511,12 @@ def interaction_sample_simulate(
496511
explicit_chunk_size : float, optional
497512
If > 0, specifies the chunk size to use when chunking the interaction
498513
simulation. If < 1, specifies the fraction of the total number of choosers.
514+
alts_context: AltsContext, optional
515+
Representation of the full alternatives domain (min and max alternative id)
516+
in the absence of sampling.
517+
This is used with EET simulation to ensure consistent random numbers across the whole alternative set
518+
( as the sampled set may change between base and project). When not provided,
519+
EET with integer-coded choice ids will raise an error.
499520
500521
Returns
501522
-------
@@ -517,6 +538,18 @@ def interaction_sample_simulate(
517538
trace_label = tracing.extend_trace_label(trace_label, "interaction_sample_simulate")
518539
chunk_tag = chunk_tag or trace_label
519540

541+
if state.settings.use_explicit_error_terms:
542+
choice_ids_are_int = pd.api.types.is_integer_dtype(alternatives[choice_column])
543+
if alts_context is None and choice_ids_are_int:
544+
raise ValueError(
545+
"alts_context is required for interaction_sample_simulate when "
546+
"use_explicit_error_terms is True and choice_column is integer-coded"
547+
)
548+
if alts_context is not None and not choice_ids_are_int:
549+
raise ValueError(
550+
"alts_context can only be used with integer-coded choice_column values"
551+
)
552+
520553
result_list = []
521554
for (
522555
i,
@@ -551,6 +584,7 @@ def interaction_sample_simulate(
551584
skip_choice,
552585
chunk_sizer=chunk_sizer,
553586
compute_settings=compute_settings,
587+
alts_context=alts_context,
554588
)
555589

556590
result_list.append(choices)

0 commit comments

Comments
 (0)