You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hi, I'm working on designing a synthetic transaction dataset and struggling with how to set the fraud rate to only 0.10% across the dataset. I think the solution is how I implement context-prompting so the models can learn from the previous row.
config_builder.add_column(
SamplerColumnConfig(
name="timestamp",
sampler_type=SamplerType.DATETIME,
params=DatetimeSamplerParams(start="2021-1-1", end="2026-1-1"),
)
)
config_builder.add_column(
SamplerColumnConfig(
name="transaction_id",
sampler_type=SamplerType.UUID,
params=UUIDSamplerParams(short_form=True),
)
)
config_builder.add_column(
SamplerColumnConfig(
name="customer_id",
sampler_type=SamplerType.UUID,
params=UUIDSamplerParams(short_form=False),
)
)
config_builder.add_column(
SamplerColumnConfig(
name="transaction_total",
sampler_type=SamplerType.UNIFORM,
params=UniformSamplerParams(low=1000, high=10_000_000, decimal_places=0),
convert_to="int",
)
)
config_builder.add_column(
SamplerColumnConfig(
name="merchant_category",
sampler_type=SamplerType.CATEGORY,
params=CategorySamplerParams(
values=["Electronics", "Clothing", "Groceries", "Home & Garden", "Sports"],
weights=[0.2, 0.3, 0.25, 0.15, 0.1],
),
)
)
config_builder.add_column(
SamplerColumnConfig(
name="payment_method",
sampler_type=SamplerType.CATEGORY,
params=CategorySamplerParams(
values=["Credit Card", "Debit Card", "PayPal", "Bank Transfer"],
weights=[0.3, 0.3, 0.15, 0.25],
),
)
)
config_builder.add_column(
SamplerColumnConfig(
name="region",
sampler_type=SamplerType.CATEGORY,
params=CategorySamplerParams(
values=["Jakarta", "Surabaya", "Bandung", "Medan", "Semarang"],
weights=[0.3, 0.25, 0.2, 0.15, 0.1],
),
)
)
config_builder.add_column(
SamplerColumnConfig(
name="total_quantity",
sampler_type=SamplerType.UNIFORM,
params=UniformSamplerParams(low=1, high=100, decimal_places=0),
convert_to="int",
)
)
config_builder.add_column(
LLMStructuredColumnConfig(
name="structured_transaction",
model_alias=structure_model_alias,
prompt=(
"Generate a realistic value for total_quantity_7d based on the ratio of the {{total_quantity}} over the value. The chance that the ratio is greater than 2 is 10%, otherwise the possibility that it is less than 2 is 90%.\n""Generate a realistic value for reported_transaction_total. The value should be equal to {{transaction_total}} for 90%% of the transactions and be lower for the remaining 10%. The value should be a random value between 1000 and 1000,000. If the {{total_quantity}}/total_quantity_7d is greater than 2, then the value of the reported_transaction_total should be less than {{transaction_total}}.\n"
),
output_format=Transaction,
)
)
config_builder.add_column(
ExpressionColumnConfig(
name="quantity_spike_ratio",
expr="{{ total_quantity / structured_transaction.total_quantity_7d }}",
dtype="float",
)
)
config_builder.add_column(
ExpressionColumnConfig(
name="transaction_total_delta",
expr="{{ structured_transaction.reported_transaction_total - transaction_total }}",
)
)
config_builder.add_column(
LLMJudgeColumnConfig(
name="fraud_analysis",
model_alias=judge_model_alias,
prompt=(
"Determine if the transaction is fraudulent based on the following conditions:\n""1. If the {{transaction_total_delta}} is not equal to 0, it indicates a discrepancy between the expected and reported transaction totals, which is a strong indicator of potential fraud.\n""2. If the {{quantity_spike_ratio}} is greater than or equal to 2.0, it suggests a significant increase in quantity compared to the average, which could also indicate fraudulent activity.\n""If either of these conditions is met, classify the transaction as fraudulent (True). Otherwise, classify it as non-fraudulent (False)."
),
scores=[
Score(
name="is_fraud",
description="Indicates whether the transaction is fraudulent (1) or not (0).",
options={
0: "The transaction is not fraud",
1: "The transaction is fraud",
},
)
],
)
)
config_builder.validate()
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
Uh oh!
There was an error while loading. Please reload this page.
Uh oh!
There was an error while loading. Please reload this page.
-
Hi, I'm working on designing a synthetic transaction dataset and struggling with how to set the fraud rate to only 0.10% across the dataset. I think the solution is how I implement context-prompting so the models can learn from the previous row.
Beta Was this translation helpful? Give feedback.
All reactions