oracle-devrel · AssafRab · May 18, 2026 · May 18, 2026
diff --git a/data-platform/data-science/oracle-data-science/anomaly-detection/LICENSE b/data-platform/data-science/oracle-data-science/anomaly-detection/LICENSE
@@ -0,0 +1,35 @@
+Copyright (c) 2026 Oracle and/or its affiliates.
+
+The Universal Permissive License (UPL), Version 1.0
+
+Subject to the condition set forth below, permission is hereby granted to any
+person obtaining a copy of this software, associated documentation and/or data
+(collectively the "Software"), free of charge and under any and all copyright
+rights in the Software, and any and all patent rights owned or freely
+licensable by each licensor hereunder covering either (i) the unmodified
+Software as contributed to or provided by such licensor, or (ii) the Larger
+Works (as defined below), to deal in both
+
+(a) the Software, and
+(b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
+one is included with the Software (each a "Larger Work" to which the Software
+is contributed by such licensors),
+
+without restriction, including without limitation the rights to copy, create
+derivative works of, display, perform, and distribute the Software and make,
+use, sell, offer for sale, import, export, have made, and have sold the
+Software and the Larger Work(s), and to sublicense the foregoing rights on
+either these or other terms.
+
+This license is subject to the following condition:
+The above copyright notice and either this complete permission notice or at
+a minimum a reference to the UPL must be included in all copies or
+substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/data-platform/data-science/oracle-data-science/anomaly-detection/README.md b/data-platform/data-science/oracle-data-science/anomaly-detection/README.md
@@ -0,0 +1,58 @@
+# Overview
+
+This repository contains end-to-end anomaly detection projects demonstrating both supervised and unsupervised approaches using OCI Data Science and the ADS SDK.
+
+The projects cover key stages of the ML lifecycle, including data preparation, modeling, validation, model registration and deployment, deployment invocation, and monitoring workflows.
+
+The repository currently includes:
+
+* **Fraud Classification** – supervised fraud detection using classification models.
+* **Sales Unlabeled Anomaly Detection** – time-series anomaly detection using SARIMAX forecasting and prediction intervals.
+
+---
+
+# Projects
+
+## Fraud Classification
+
+This project demonstrates a supervised fraud detection workflow, including preprocessing, modeling, validation, model deployment, and deployment invocation.
+
+The project also demonstrates production-oriented concepts such as:
+1. Scikit-learn pipelines
+2. Custom deployment artifacts
+3. Feature engineering within the deployment pipeline, and handling high-cardinality categorical features.
+
+The deployed model can support both real-time and batch fraud monitoring workflows. The batch implementation is also covered in this project, the real time use case requires streaming tool, and not covered here.
+
+---
+
+## Sales Unlabeled Anomaly Detection
+
+This project demonstrates anomaly detection for a continuous unlabeled target variable using time-series regression and SARIMAX forecasting.
+
+The workflow includes exploratory analysis, time-series modeling and validation, anomaly detection using prediction intervals, custom model deployment, and production monitoring workflows integrated with OCI Monitoring.
+
+
+---
+
+# Environment
+
+Conda environment: `generalml_p311_cpu_x86_64_v1`
+
+Created: 2026
+
+---
+
+# Prerequisites
+
+* Access to OCI Data Science
+* Required IAM permissions
+* Familiarity with Python and machine learning workflows
+
+---
+
+# License
+
+Copyright (c) 2026 Oracle and/or its affiliates.
+
+Licensed under the Universal Permissive License (UPL), Version 1.0.
diff --git a/...data-science/oracle-data-science/anomaly-detection/files/fraud_classification/__init__.py b/...data-science/oracle-data-science/anomaly-detection/files/fraud_classification/__init__.py
diff --git a/...data-science/oracle-data-science/anomaly-detection/files/fraud_classification/analysis.py b/...data-science/oracle-data-science/anomaly-detection/files/fraud_classification/analysis.py
@@ -0,0 +1,178 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import shap
+from sklearn.metrics import roc_curve, roc_auc_score
+
+def compute_roc_metrics(y_true, y_probs):
+    """
+    Compute ROC curve metrics.
+    """
+    fpr, tpr, thresholds = roc_curve(y_true, y_probs)
+    auc = roc_auc_score(y_true, y_probs)
+
+    return fpr, tpr, thresholds, auc
+
+
+def plot_roc_curve(fpr, tpr, auc):
+    plt.figure(figsize=(7,5))
+
+    plt.plot(fpr, tpr, label=f"AUC = {auc:.3f}")
+    plt.plot([0,1], [0,1], linestyle="--")
+
+    plt.xlabel("False Positive Rate")
+    plt.ylabel("True Positive Rate")
+    plt.title("ROC Curve")
+    plt.legend()
+
+    plt.show()
+
+
+def get_feature_names(preprocessor, low_cardinality, high_cardinality, numeric_cols):
+
+    feature_names = []
+
+    if "onehot" in preprocessor.named_transformers_:
+        onehot_cols = preprocessor.named_transformers_["onehot"].get_feature_names_out(low_cardinality)
+        feature_names.extend(onehot_cols)
+
+    if "target" in preprocessor.named_transformers_:
+        feature_names.extend(high_cardinality)
+
+    feature_names.extend(numeric_cols)
+
+    return feature_names
+
+
+def plot_feature_importance(pipeline, feature_names, top_n=20):
+
+    model = pipeline.named_steps["model"]
+
+    importance = model.feature_importances_
+
+    indices = np.argsort(importance)[::-1]
+
+    sorted_features = [feature_names[i] for i in indices]
+    sorted_importance = importance[indices]
+
+    plt.figure(figsize=(10,6))
+
+    plt.barh(range(min(top_n, len(sorted_importance))), sorted_importance[:top_n])
+    plt.yticks(range(min(top_n, len(sorted_importance))), sorted_features[:top_n])
+
+    plt.gca().invert_yaxis()
+
+    plt.xlabel("Feature Importance")
+    plt.title(f"Top {top_n} Feature Importances")
+
+    plt.show()
+
+
+
+def compute_shap_values(pipeline, X_train):
+
+    model = pipeline.named_steps["model"]
+    preprocessor = pipeline.named_steps["preprocess"]
+
+    X_transformed = preprocessor.transform(X_train)
+
+    explainer = shap.TreeExplainer(model)
+
+    shap_values = explainer.shap_values(X_transformed)
+
+    return shap_values, X_transformed
+
+
+def plot_shap_summary(shap_values, X_transformed, feature_names):
+
+    shap.summary_plot(
+        shap_values,
+        X_transformed,
+        feature_names=feature_names
+    )
+
+
+def plot_prop(df,var_x):
+    plt.figure(figsize=(8, 6))
+    fraud_counts = df[var_x].value_counts()
+    plt.pie(fraud_counts.values, labels=fraud_counts.index, autopct='%1.1f%%', startangle=90)
+    plt.title('Proportion of Fraud Categories')
+    plt.axis('equal')
+    plt.show()
+
+    print(f"Class distribution:")
+    print(fraud_counts)
+
+
+def plot_fraud_rate_by_category(df, top_n=5):
+    fraud_rate_by_cat = (df.groupby('category')['is_fraud']
+                           .mean()
+                           .sort_values(ascending=False))
+    top_n_cats = fraud_rate_by_cat.head(top_n) * 100
+    avg_fraud_rate = fraud_rate_by_cat.iloc[top_n:].mean() * 100
+
+    plot_data = top_n_cats.copy()
+    plot_data['Other avg'] = avg_fraud_rate
+
+    colors = ['steelblue'] * len(top_n_cats) + ['tomato']
+
+    fig, ax = plt.subplots(figsize=(8, 5))
+    plot_data.plot(kind='bar', ax=ax, color=colors, alpha=0.8, edgecolor='none')
+    ax.set_title(f'Fraud Rate (%) — Top {top_n} Categories vs Other Categories Average')
+    ax.set_xlabel('')
+    ax.set_ylabel('Fraud Rate (%)')
+    ax.tick_params(axis='x', rotation=30)
+    plt.tight_layout()
+    plt.show()
+
+
+def plot_fraud_by_hour(df):
+    fraud_by_hour = (df[df['is_fraud'] == 1]
+                     .groupby('hour')
+                     .size()
+                     .reindex(range(24), fill_value=0))
+
+    hours = fraud_by_hour.index.values
+    counts = fraud_by_hour.values
+    angles = np.deg2rad((hours / 24) * 360 - 90)
+    max_count = counts.max()
+    bar_heights = counts / max_count * 0.4
+
+    fig, ax = plt.subplots(figsize=(6, 6))
+    ax.set_aspect('equal')
+    ax.axis('off')
+    ax.add_patch(plt.Circle((0, 0), 0.5, color='lightgray', alpha=0.15))
+    ax.set_xlim(-1, 1)
+    ax.set_ylim(-1, 1)
+
+    for angle, height, count in zip(angles, bar_heights, counts):
+        x_start = 0.1 * np.cos(angle)
+        y_start = 0.1 * np.sin(angle)
+        x_end = (0.1 + height) * np.cos(angle)
+        y_end = (0.1 + height) * np.sin(angle)
+        color = plt.cm.Reds(0.4 + 0.6 * (count / max_count))
+        ax.plot([x_start, x_end], [y_start, y_end],
+                color=color, linewidth=6, solid_capstyle='round')
+
+    for h, label in zip([0, 6, 12, 18], ['12am', '6am', '12pm', '6pm']):
+        angle = np.deg2rad((h / 24) * 360 - 90)
+        ax.text(0.7 * np.cos(angle), 0.7 * np.sin(angle),
+                label, ha='center', va='center', fontsize=9, color='gray')
+
+    plt.title('Fraud by Hour of Day')
+    plt.tight_layout()
+    plt.show()
+
+
+def plot_boxplot_by_fraud(df, column):
+    data = [df[df['is_fraud'] == 0][column].dropna(),
+            df[df['is_fraud'] == 1][column].dropna()]
+
+    fig, ax = plt.subplots(figsize=(8, 5))
+    ax.boxplot(data, labels=['Not Fraud', 'Fraud'], patch_artist=True,
+               boxprops=dict(facecolor='steelblue', alpha=0.6),
+               medianprops=dict(color='red', linewidth=2))
+    ax.set_title(f'{column} Distribution by Fraud Label')
+    ax.set_ylabel(column)
+    ax.set_xlabel('is_fraud')
+    plt.tight_layout()
+    plt.show()