diff --git a/Classification/Model/DecisionTree/XGBoostNode.py b/Classification/Model/DecisionTree/XGBoostNode.py
new file mode 100644
index 0000000..fc6055c
--- /dev/null
+++ b/Classification/Model/DecisionTree/XGBoostNode.py
@@ -0,0 +1,302 @@
+"""
+XGBoost Node for gradient boosting trees
+"""
+
+from typing import List, Optional
+from Classification.Instance.Instance import Instance
+from Classification.InstanceList.InstanceList import InstanceList
+from Classification.Attribute.ContinuousAttribute import ContinuousAttribute
+from Classification.Attribute.DiscreteAttribute import DiscreteAttribute
+from Classification.Parameter.XGBoostParameter import XGBoostParameter
+
+
+class XGBoostNode:
+    """
+    A node in the XGBoost decision tree.
+    
+    This class represents a node in a regression tree used for gradient boosting.
+    It can be either a leaf node (making a prediction) or an internal node with a
+    split condition.
+    """
+    
+    def __init__(self, 
+                 data: InstanceList,
+                 gradients: List[float],
+                 hessians: List[float],
+                 instance_indices: List[int],
+                 parent: Optional['XGBoostNode'],
+                 parameter: XGBoostParameter,
+                 depth: int = 0,
+                 feature_subset: Optional[List[int]] = None):
+        """
+        Initialize an XGBoostNode.
+        
+        Args:
+            data (InstanceList): Training instances for this node
+            gradients (List[float]): First-order gradient values
+            hessians (List[float]): Second-order gradient (Hessian) values
+            instance_indices (List[int]): Indices of instances in this node
+            parent (Optional[XGBoostNode]): Parent node
+            parameter (XGBoostParameter): XGBoost hyperparameters
+            depth (int): Current depth in the tree
+            feature_subset (Optional[List[int]]): Subset of features to consider
+        """
+        self._data = data
+        self._gradients = gradients
+        self._hessians = hessians
+        self._instance_indices = instance_indices
+        self._parent = parent
+        self._parameter = parameter
+        self._depth = depth
+        self._feature_subset = feature_subset
+        
+        self._children = []
+        self._condition = None
+        self._leaf = True
+        self._leaf_value = 0.0
+        
+        # Calculate leaf value for this node
+        self._leaf_value = self._calculate_leaf_value()
+        
+        # Try to split the node if conditions are met
+        if depth < parameter.getMaxDepth() and len(instance_indices) >= parameter.getMinChildWeight():
+            self._build_tree()
+    
+    def _calculate_leaf_value(self) -> float:
+        """
+        Calculate the leaf value (weight) for gradient boosting.
+        
+        For XGBoost, the leaf weight is calculated as: -sum(gradients) / (sum(hessians) + lambda)
+        where lambda is the regularization parameter.
+        
+        Returns:
+            float: The calculated leaf value
+        """
+        if not self._instance_indices:
+            return 0.0
+        
+        sum_gradients = sum(self._gradients[i] for i in self._instance_indices)
+        sum_hessians = sum(self._hessians[i] for i in self._instance_indices)
+        
+        # Add regularization (lambda)
+        lambda_param = self._parameter.getRegLambda() if hasattr(self._parameter, 'getRegLambda') else 1.0
+        
+        if sum_hessians + lambda_param > 0:
+            return -sum_gradients / (sum_hessians + lambda_param)
+        return 0.0
+    
+    def _build_tree(self):
+        """Build the tree by finding the best split."""
+        best_gain = 0.0
+        best_feature = -1
+        best_threshold = None
+        best_left_indices = None
+        best_right_indices = None
+        
+        # Try each feature
+        features_to_try = self._feature_subset if self._feature_subset else range(self._data.get(0).attributeSize())
+        
+        for feature_idx in features_to_try:
+            # Find best split for this feature
+            gain, threshold, left_indices, right_indices = self._find_best_split(feature_idx)
+            
+            if gain > best_gain and gain > 0:
+                best_gain = gain
+                best_feature = feature_idx
+                best_threshold = threshold
+                best_left_indices = left_indices
+                best_right_indices = right_indices
+        
+        # If we found a good split, create children
+        if best_feature >= 0 and best_gain > 0:
+            self._leaf = False
+            
+            # Create left child
+            left_child = XGBoostNode(
+                self._data, self._gradients, self._hessians,
+                best_left_indices, self, self._parameter,
+                self._depth + 1, self._feature_subset
+            )
+            self._children.append(left_child)
+            
+            # Create right child
+            right_child = XGBoostNode(
+                self._data, self._gradients, self._hessians,
+                best_right_indices, self, self._parameter,
+                self._depth + 1, self._feature_subset
+            )
+            self._children.append(right_child)
+            
+            self._condition = (best_feature, best_threshold)
+    
+    def _find_best_split(self, feature_idx: int):
+        """
+        Find the best split point for a given feature.
+        
+        Args:
+            feature_idx (int): Index of the feature to split on
+            
+        Returns:
+            tuple: (gain, threshold, left_indices, right_indices)
+        """
+        if not self._instance_indices:
+            return 0.0, None, [], []
+        
+        # Get unique values for this feature
+        attribute = self._data.get(self._instance_indices[0]).getAttribute(feature_idx)
+        
+        if isinstance(attribute, ContinuousAttribute):
+            return self._find_best_continuous_split(feature_idx)
+        else:
+            return self._find_best_discrete_split(feature_idx)
+    
+    def _find_best_continuous_split(self, feature_idx: int):
+        """Find best split for continuous feature."""
+        values = []
+        for idx in self._instance_indices:
+            val = self._data.get(idx).getAttribute(feature_idx).getValue()
+            values.append((val, idx))
+        
+        values.sort()
+        
+        best_gain = 0.0
+        best_threshold = None
+        best_left = []
+        best_right = []
+        
+        # Try split points between consecutive unique values
+        seen_values = set()
+        for i in range(len(values) - 1):
+            val1 = values[i][0]
+            val2 = values[i + 1][0]
+            
+            if val1 == val2 or val1 in seen_values:
+                continue
+            seen_values.add(val1)
+            
+            threshold = (val1 + val2) / 2.0
+            
+            left_indices = [idx for val, idx in values if val <= threshold]
+            right_indices = [idx for val, idx in values if val > threshold]
+            
+            if len(left_indices) < self._parameter.getMinChildWeight() or \
+               len(right_indices) < self._parameter.getMinChildWeight():
+                continue
+            
+            gain = self._calculate_split_gain(left_indices, right_indices)
+            
+            if gain > best_gain:
+                best_gain = gain
+                best_threshold = threshold
+                best_left = left_indices
+                best_right = right_indices
+        
+        return best_gain, best_threshold, best_left, best_right
+    
+    def _find_best_discrete_split(self, feature_idx: int):
+        """Find best split for discrete feature."""
+        # Group instances by feature value
+        groups = {}
+        for idx in self._instance_indices:
+            val = str(self._data.get(idx).getAttribute(feature_idx).getValue())
+            if val not in groups:
+                groups[val] = []
+            groups[val].append(idx)
+        
+        best_gain = 0.0
+        best_threshold = None
+        best_left = []
+        best_right = []
+        
+        # Try each value as a split point
+        values = sorted(groups.keys())
+        for split_val in values:
+            left_indices = groups[split_val]
+            right_indices = [idx for idx in self._instance_indices if idx not in left_indices]
+            
+            if len(left_indices) < self._parameter.getMinChildWeight() or \
+               len(right_indices) < self._parameter.getMinChildWeight():
+                continue
+            
+            gain = self._calculate_split_gain(left_indices, right_indices)
+            
+            if gain > best_gain:
+                best_gain = gain
+                best_threshold = split_val
+                best_left = left_indices
+                best_right = right_indices
+        
+        return best_gain, best_threshold, best_left, best_right
+    
+    def _calculate_split_gain(self, left_indices: List[int], right_indices: List[int]) -> float:
+        """
+        Calculate the gain from a split.
+        
+        XGBoost gain formula:
+        Gain = 0.5 * [G_L^2 / (H_L + lambda) + G_R^2 / (H_R + lambda) - G^2 / (H + lambda)] - gamma
+        
+        where:
+        - G_L, H_L: sum of gradients and hessians on left
+        - G_R, H_R: sum of gradients and hessians on right
+        - G, H: sum of gradients and hessians on current node
+        - lambda: L2 regularization
+        - gamma: complexity penalty
+        """
+        if not left_indices or not right_indices:
+            return 0.0
+            
+        sum_grad_left = sum(self._gradients[i] for i in left_indices)
+        sum_hess_left = sum(self._hessians[i] for i in left_indices)
+        
+        sum_grad_right = sum(self._gradients[i] for i in right_indices)
+        sum_hess_right = sum(self._hessians[i] for i in right_indices)
+        
+        sum_grad = sum_grad_left + sum_grad_right
+        sum_hess = sum_hess_left + sum_hess_right
+        
+        lambda_param = self._parameter.getRegLambda() if hasattr(self._parameter, 'getRegLambda') else 1.0
+        gamma = self._parameter.getGamma() if hasattr(self._parameter, 'getGamma') else 0.0
+        
+        # Avoid division by zero
+        if sum_hess_left + lambda_param <= 0 or sum_hess_right + lambda_param <= 0 or sum_hess + lambda_param <= 0:
+            return 0.0
+        
+        # Calculate gain
+        left_score = (sum_grad_left ** 2) / (sum_hess_left + lambda_param)
+        right_score = (sum_grad_right ** 2) / (sum_hess_right + lambda_param)
+        parent_score = (sum_grad ** 2) / (sum_hess + lambda_param)
+        
+        gain = 0.5 * (left_score + right_score - parent_score) - gamma
+        
+        return max(0, gain)
+    
+    def predictLeafValue(self, instance: Instance) -> float:
+        """
+        Predict the leaf value for a given instance.
+        
+        Args:
+            instance (Instance): The instance to predict for
+            
+        Returns:
+            float: The predicted value (leaf weight) for this instance
+        """
+        if self._leaf:
+            return self._leaf_value
+        
+        feature_idx, threshold = self._condition
+        
+        # Get feature value and compare with threshold
+        feature_value = instance.getAttribute(feature_idx).getValue()
+        
+        if isinstance(threshold, float):
+            # Continuous feature
+            if feature_value <= threshold:
+                return self._children[0].predictLeafValue(instance)
+            else:
+                return self._children[1].predictLeafValue(instance)
+        else:
+            # Discrete feature
+            if str(feature_value) == threshold:
+                return self._children[0].predictLeafValue(instance)
+            else:
+                return self._children[1].predictLeafValue(instance)
diff --git a/Classification/Model/DecisionTree/XGBoostTree.py b/Classification/Model/DecisionTree/XGBoostTree.py
new file mode 100644
index 0000000..71ba82d
--- /dev/null
+++ b/Classification/Model/DecisionTree/XGBoostTree.py
@@ -0,0 +1,69 @@
+"""
+XGBoost Decision Tree
+"""
+
+import random
+from typing import List
+from Classification.Instance.Instance import Instance
+from Classification.InstanceList.InstanceList import InstanceList
+from Classification.Model.DecisionTree.DecisionTree import DecisionTree
+from Classification.Model.DecisionTree.XGBoostNode import XGBoostNode
+from Classification.Parameter.XGBoostParameter import XGBoostParameter
+
+
+class XGBoostTree(DecisionTree):
+    """
+    Single tree in the XGBoost ensemble.
+    
+    This class represents an individual decision tree used in the XGBoost
+    gradient boosting ensemble. It extends the DecisionTree class with
+    XGBoost-specific functionality including gradient-based splits and
+    feature subsampling.
+    
+    Attributes:
+        _root (XGBoostNode): Root node of the decision tree
+    """
+    
+    def __init__(self, data: InstanceList, 
+                 gradients: List[float], 
+                 hessians: List[float],
+                 instance_indices: List[int],
+                 parameter: XGBoostParameter):
+        """
+        Initialize XGBoost tree with gradient information.
+        
+        Args:
+            data (InstanceList): Training instances for building the tree
+            gradients (List[float]): First-order gradient values for each instance
+            hessians (List[float]): Second-order gradient (Hessian) values for each instance
+            instance_indices (List[int]): Indices of instances to use for this tree
+            parameter (XGBoostParameter): Hyperparameters controlling tree construction
+                including max depth, regularization, and feature sampling
+        """
+        # Determine feature subset for this tree (colsample_bytree)
+        _feature_subset = None
+        if parameter and parameter.getColsampleByTree() < 1.0:
+            n_features = data.get(0).attributeSize()
+            n_sample = max(1, int(n_features * parameter.getColsampleByTree()))
+            _feature_subset = random.sample(range(n_features), n_sample)
+        
+        _root = XGBoostNode(data, gradients, hessians, instance_indices, 
+                           None, parameter, 0, _feature_subset)
+        self._DecisionTree__root = _root
+    
+    def predictValue(self, instance: Instance) -> float:
+        """
+        Predict the raw value for gradient boosting.
+        
+        This method traverses the tree to find the leaf node corresponding
+        to the given instance and returns its predicted value (weight).
+        The returned value is used as an additive update in the gradient
+        boosting process.
+        
+        Args:
+            instance (Instance): Instance to predict the value for
+            
+        Returns:
+            float: Raw predicted value (leaf weight) from this tree
+        """
+        return self._DecisionTree__root.predictLeafValue(instance)
\ No newline at end of file
diff --git a/Classification/Model/Ensemble/XGBoostModel.py b/Classification/Model/Ensemble/XGBoostModel.py
new file mode 100644
index 0000000..1308c53
--- /dev/null
+++ b/Classification/Model/Ensemble/XGBoostModel.py
@@ -0,0 +1,498 @@
+"""
+XGBoost Gradient Boosting Model
+This module provides an enhanced XGBoost gradient boosting classifier with bug fixes,
+performance optimizations, and additional features.
+"""
+
+from math import log, exp
+import random
+from typing import List, Dict, Optional
+from Classification.Instance.Instance import Instance
+from Classification.InstanceList.InstanceList import InstanceList
+from Classification.Model.DecisionTree.XGBoostTree import XGBoostTree
+from Classification.Model.ValidatedModel import ValidatedModel
+from Classification.Parameter.XGBoostParameter import XGBoostParameter
+
+
+class XGBoostModel(ValidatedModel):
+    """
+    XGBoost Gradient Boosting Classifier.
+    
+    Attributes
+    ----------
+    __trees : List[XGBoostTree] or List[List[XGBoostTree]]
+        Collection of decision trees. For binary classification, it's a flat list.
+        For multiclass, it's a list of lists where each sublist contains trees for one class.
+    __class_labels : List[str]
+        Distinct class labels from the training set.
+    __n_classes : int
+        Number of distinct classes.
+    __base_score : float
+        Initial prediction score (log odds for binary classification).
+    __parameter : XGBoostParameter
+        Training parameters configuration.
+    __feature_importance : Dict[int, float]
+        Feature importance scores mapping feature index to importance value.
+    __training_history : List[Dict]
+        Training history containing validation metrics per iteration.
+    """
+    
+    __trees: List
+    __class_labels: List[str]
+    __n_classes: int
+    __base_score: float
+    __parameter: Optional[XGBoostParameter]
+    __feature_importance: Dict[int, float]
+    __training_history: List[Dict]
+    
+    def __init__(self):
+        """
+        Initialize XGBoost classifier with empty state.
+        
+        Creates a new XGBoostModel instance with initialized but empty attributes
+        for trees, class labels, and training metrics.
+        """
+        self.__trees = []
+        self.__class_labels = []
+        self.__n_classes = 0
+        self.__base_score = 0.0
+        self.__parameter = None
+        self.__feature_importance = {}
+        self.__training_history = []
+    
+    def __sigmoid(self, x: float) -> float:
+        """
+        Apply sigmoid function with numerical stability.
+        
+        Parameters
+        ----------
+        x : float
+            Input value to transform.
+        
+        Returns
+        -------
+        float
+            Sigmoid transformation of x, clamped between 0 and 1.
+            Returns 1.0 for x > 20, 0.0 for x < -20 to prevent overflow.
+        """
+        if x > 20:
+            return 1.0
+        if x < -20:
+            return 0.0
+        return 1.0 / (1.0 + exp(-x))
+    
+    def __softmax(self, scores: List[float]) -> List[float]:
+        """
+        Apply softmax function with numerical stability.
+        
+        Parameters
+        ----------
+        scores : List[float]
+            Raw scores for each class.
+        
+        Returns
+        -------
+        List[float]
+            Normalized probability distribution over classes.
+            Sum of all probabilities equals 1.0.
+        """
+        max_score = max(scores)
+        exp_scores = [exp(s - max_score) for s in scores]
+        sum_exp = sum(exp_scores)
+        return [e / sum_exp for e in exp_scores]
+    
+    def train(self, trainSet: InstanceList, parameters: XGBoostParameter,
+              validationSet: Optional[InstanceList] = None) -> None:
+        """
+        Train the XGBoost classifier using gradient boosting.
+        
+        Parameters
+        ----------
+        trainSet : InstanceList
+            Training dataset containing labeled instances.
+        parameters : XGBoostParameter
+            Configuration parameters for training (learning rate, max depth, etc.).
+        validationSet : Optional[InstanceList], default=None
+            Optional validation set for early stopping and performance monitoring.
+        
+        Returns
+        -------
+        None
+            Model is trained in-place, modifying internal state.
+        
+        Notes
+        -----
+        - Automatically detects binary vs multiclass classification
+        - Uses early stopping if validation set is provided
+        - Supports instance subsampling for stochastic boosting
+        """
+        self.__parameter = parameters
+        self.__class_labels = trainSet.getDistinctClassLabels()
+        self.__n_classes = len(self.__class_labels)
+        self.__training_history = []
+        self.__trees = []
+        
+        random.seed(parameters.getSeed())
+        
+        if self.__n_classes == 2:
+            self.__trainBinary(trainSet, parameters, validationSet)
+        else:
+            self.__trainMulticlass(trainSet, parameters, validationSet)
+    
+    def __trainBinary(self, trainSet: InstanceList, 
+                     parameters: XGBoostParameter,
+                     validationSet: Optional[InstanceList] = None) -> None:
+        """
+        Train for binary classification using logistic loss.
+        
+        Parameters
+        ----------
+        trainSet : InstanceList
+            Training dataset with binary class labels.
+        parameters : XGBoostParameter
+            Training configuration parameters.
+        validationSet : Optional[InstanceList], default=None
+            Optional validation set for early stopping.
+        
+        Returns
+        -------
+        None
+            Updates internal trees and base score.
+        
+        Notes
+        -----
+        - Initializes predictions with log odds of positive class
+        - Uses gradient and hessian of logistic loss
+        - Implements early stopping based on validation error
+        """
+        n_samples = trainSet.size()
+        
+        positive_count = sum(1 for i in range(n_samples) 
+                            if trainSet.get(i).getClassLabel() == self.__class_labels[1])
+        
+        if positive_count == 0:
+            self.__base_score = -5.0
+        elif positive_count == n_samples:
+            self.__base_score = 5.0
+        else:
+            self.__base_score = log(positive_count / (n_samples - positive_count))
+        
+        predictions = [self.__base_score] * n_samples
+        
+        best_val_error = float('inf')
+        rounds_without_improvement = 0
+        best_n_trees = 0
+        
+        for iteration in range(parameters.getNEstimators()):
+            if parameters.getSubsample() < 1.0:
+                n_subsample = max(1, int(n_samples * parameters.getSubsample()))
+                sample_indices = random.sample(range(n_samples), n_subsample)
+            else:
+                sample_indices = list(range(n_samples))
+            
+            gradients = [0.0] * n_samples
+            hessians = [0.0] * n_samples
+            
+            for i in range(n_samples):
+                pred_prob = self.__sigmoid(predictions[i])
+                true_label = 1.0 if trainSet.get(i).getClassLabel() == self.__class_labels[1] else 0.0
+                
+                gradients[i] = pred_prob - true_label
+                hessians[i] = max(pred_prob * (1.0 - pred_prob), 1e-6)
+            
+            tree = XGBoostTree(trainSet, gradients, hessians, sample_indices, parameters)
+            self.__trees.append(tree)
+            
+            learning_rate = parameters.getLearningRate()
+            for i in range(n_samples):
+                predictions[i] += learning_rate * tree.predictValue(trainSet.get(i))
+            
+            if validationSet is not None:
+                val_error = self.__calculateError(validationSet)
+                self.__training_history.append({
+                    'iteration': iteration,
+                    'validation_error': val_error
+                })
+                
+                if val_error < best_val_error:
+                    best_val_error = val_error
+                    best_n_trees = iteration + 1
+                    rounds_without_improvement = 0
+                else:
+                    rounds_without_improvement += 1
+                    
+                    if rounds_without_improvement >= parameters.getEarlyStoppingRounds():
+                        self.__trees = self.__trees[:best_n_trees]
+                        break
+    
+    def __trainMulticlass(self, trainSet: InstanceList, 
+                         parameters: XGBoostParameter,
+                         validationSet: Optional[InstanceList] = None) -> None:
+        """
+        Train for multiclass classification using softmax loss.
+        
+        Parameters
+        ----------
+        trainSet : InstanceList
+            Training dataset with multiple class labels.
+        parameters : XGBoostParameter
+            Training configuration parameters.
+        validationSet : Optional[InstanceList], default=None
+            Optional validation set for early stopping.
+        
+        Returns
+        -------
+        None
+            Updates internal trees structure (one tree list per class).
+        
+        Notes
+        -----
+        - Uses one-vs-all approach with softmax probabilities
+        - Trains separate tree ensemble for each class
+        - Gradient and hessian computed from softmax derivatives
+        """
+        n_samples = trainSet.size()
+        
+        predictions = [[0.0 for _ in range(n_samples)] for _ in range(self.__n_classes)]
+        
+        self.__trees = [[] for _ in range(self.__n_classes)]
+        
+        best_val_error = float('inf')
+        rounds_without_improvement = 0
+        best_n_trees = 0
+        
+        for iteration in range(parameters.getNEstimators()):
+            if parameters.getSubsample() < 1.0:
+                n_subsample = max(1, int(n_samples * parameters.getSubsample()))
+                sample_indices = random.sample(range(n_samples), n_subsample)
+            else:
+                sample_indices = list(range(n_samples))
+            
+            for class_idx in range(self.__n_classes):
+                target_class = self.__class_labels[class_idx]
+                
+                gradients = [0.0] * n_samples
+                hessians = [0.0] * n_samples
+                
+                for i in range(n_samples):
+                    scores = [predictions[c][i] for c in range(self.__n_classes)]
+                    probs = self.__softmax(scores)
+                    
+                    true_label = 1.0 if trainSet.get(i).getClassLabel() == target_class else 0.0
+                    pred_prob = probs[class_idx]
+                    
+                    gradients[i] = pred_prob - true_label
+                    hessians[i] = max(pred_prob * (1.0 - pred_prob), 1e-6)
+                
+                tree = XGBoostTree(trainSet, gradients, hessians, sample_indices, parameters)
+                self.__trees[class_idx].append(tree)
+                
+                learning_rate = parameters.getLearningRate()
+                for i in range(n_samples):
+                    predictions[class_idx][i] += learning_rate * tree.predictValue(trainSet.get(i))
+            
+            if validationSet is not None:
+                val_error = self.__calculateError(validationSet)
+                self.__training_history.append({
+                    'iteration': iteration,
+                    'validation_error': val_error
+                })
+                
+                if val_error < best_val_error:
+                    best_val_error = val_error
+                    best_n_trees = iteration + 1
+                    rounds_without_improvement = 0
+                else:
+                    rounds_without_improvement += 1
+                    
+                    if rounds_without_improvement >= parameters.getEarlyStoppingRounds():
+                        for class_idx in range(self.__n_classes):
+                            self.__trees[class_idx] = self.__trees[class_idx][:best_n_trees]
+                        break
+    
+    def __calculateError(self, testSet: InstanceList) -> float:
+        """
+        Calculate classification error rate on a dataset.
+        
+        Parameters
+        ----------
+        testSet : InstanceList
+            Dataset to evaluate predictions on.
+        
+        Returns
+        -------
+        float
+            Error rate as fraction of misclassified instances (range: 0.0 to 1.0).
+        """
+        n_errors = 0
+        for i in range(testSet.size()):
+            instance = testSet.get(i)
+            predicted = self.predict(instance)
+            if predicted != instance.getClassLabel():
+                n_errors += 1
+        return n_errors / testSet.size() if testSet.size() > 0 else 0.0
+    
+    def predict(self, instance: Instance) -> str:
+        """
+        Predict the class label for a single instance.
+        
+        Parameters
+        ----------
+        instance : Instance
+            Input instance to classify.
+        
+        Returns
+        -------
+        str
+            Predicted class label.
+        
+        Notes
+        -----
+        - For binary: returns class with probability >= 0.5
+        - For multiclass: returns class with highest score
+        """
+        if self.__trees and isinstance(self.__trees[0], list):
+            scores = [0.0] * self.__n_classes
+            learning_rate = self.__parameter.getLearningRate()
+            
+            for class_idx in range(self.__n_classes):
+                for tree in self.__trees[class_idx]:
+                    scores[class_idx] += learning_rate * tree.predictValue(instance)
+            
+            max_idx = scores.index(max(scores))
+            return self.__class_labels[max_idx]
+        else:
+            score = self.__base_score
+            learning_rate = self.__parameter.getLearningRate()
+            
+            for tree in self.__trees:
+                score += learning_rate * tree.predictValue(instance)
+            
+            prob = self.__sigmoid(score)
+            return self.__class_labels[1] if prob >= 0.5 else self.__class_labels[0]
+    
+    def predictProbability(self, instance: Instance) -> Dict[str, float]:
+        """
+        Predict probability distribution over all classes.
+        
+        Parameters
+        ----------
+        instance : Instance
+            Input instance to get probability predictions for.
+        
+        Returns
+        -------
+        Dict[str, float]
+            Dictionary mapping each class label to its predicted probability.
+            Probabilities sum to 1.0.
+        """
+        if self.__trees and isinstance(self.__trees[0], list):
+            scores = [0.0] * self.__n_classes
+            learning_rate = self.__parameter.getLearningRate()
+            
+            for class_idx in range(self.__n_classes):
+                for tree in self.__trees[class_idx]:
+                    scores[class_idx] += learning_rate * tree.predictValue(instance)
+            
+            probs = self.__softmax(scores)
+            return {self.__class_labels[i]: probs[i] for i in range(self.__n_classes)}
+        else:
+            score = self.__base_score
+            learning_rate = self.__parameter.getLearningRate()
+            
+            for tree in self.__trees:
+                score += learning_rate * tree.predictValue(instance)
+            
+            prob_positive = self.__sigmoid(score)
+            return {
+                self.__class_labels[0]: 1.0 - prob_positive,
+                self.__class_labels[1]: prob_positive
+            }
+    
+    def getTrainingHistory(self) -> List[Dict]:
+        """
+        Get the training history with validation metrics.
+        
+        Returns
+        -------
+        List[Dict]
+            List of dictionaries containing iteration number and validation error.
+            Empty list if no validation set was used during training.
+        """
+        return self.__training_history
+    
+    def getFeatureImportance(self) -> Dict[int, float]:
+        """
+        Get feature importance scores.
+        
+        Returns
+        -------
+        Dict[int, float]
+            Dictionary mapping feature indices to their importance scores.
+            Currently returns empty dict (feature not yet implemented).
+        """
+        return self.__feature_importance
+    
+    def loadModel(self, fileName: str) -> None:
+        """
+        Load a trained model from a file.
+        
+        Parameters
+        ----------
+        fileName : str
+            Path to the file containing the serialized model.
+        
+        Returns
+        -------
+        None
+            Model state is loaded in-place.
+        
+        Raises
+        ------
+        IOError
+            If file cannot be read or model data is corrupted.
+        """
+        import pickle
+        try:
+            with open(fileName, 'rb') as f:
+                model_data = pickle.load(f)
+                self.__trees = model_data['trees']
+                self.__class_labels = model_data['class_labels']
+                self.__n_classes = model_data['n_classes']
+                self.__base_score = model_data['base_score']
+                self.__parameter = model_data['parameter']
+        except Exception as e:
+            raise IOError(f"Failed to load model from {fileName}: {str(e)}")
+    
+    def saveModel(self, fileName: str) -> None:
+        """
+        Save the trained model to a file.
+        
+        Parameters
+        ----------
+        fileName : str
+            Path where the model should be saved.
+        
+        Returns
+        -------
+        None
+            Model is serialized and written to disk.
+        
+        Raises
+        ------
+        IOError
+            If file cannot be written or serialization fails.
+        """
+        import pickle
+        try:
+            model_data = {
+                'trees': self.__trees,
+                'class_labels': self.__class_labels,
+                'n_classes': self.__n_classes,
+                'base_score': self.__base_score,
+                'parameter': self.__parameter
+            }
+            with open(fileName, 'wb') as f:
+                pickle.dump(model_data, f)
+        except Exception as e:
+            raise IOError(f"Failed to save model to {fileName}: {str(e)}")
\ No newline at end of file
diff --git a/Classification/Parameter/XGBoostParameter.py b/Classification/Parameter/XGBoostParameter.py
new file mode 100644
index 0000000..bafd63c
--- /dev/null
+++ b/Classification/Parameter/XGBoostParameter.py
@@ -0,0 +1,190 @@
+"""
+XGBoost Parameter Configuration
+"""
+
+from Classification.Parameter.Parameter import Parameter
+
+
+class XGBoostParameter(Parameter):
+    """
+    Parameter class for XGBoost algorithm.
+    
+    This class encapsulates all hyperparameters used in the XGBoost gradient
+    boosting implementation, including learning rate, tree structure parameters,
+    regularization terms, and sampling ratios.
+    
+    Attributes:
+        __learning_rate (float): Step size shrinkage to prevent overfitting (0 < eta <= 1)
+        __n_estimators (int): Number of boosting rounds (trees)
+        __max_depth (int): Maximum depth of trees
+        __min_child_weight (float): Minimum sum of instance weight needed in a child
+        __gamma (float): Minimum loss reduction required for split
+        __subsample (float): Subsample ratio of training instances (0 < ratio <= 1)
+        __colsample_bytree (float): Subsample ratio of columns when constructing each tree
+        __reg_lambda (float): L2 regularization term on weights
+        __reg_alpha (float): L1 regularization term on weights
+        __early_stopping_rounds (int): Stop if no improvement for N rounds
+    """
+    
+    def __init__(self, seed: int, 
+                 learning_rate: float = 0.3,
+                 n_estimators: int = 100,
+                 max_depth: int = 6,
+                 min_child_weight: float = 0.0,
+                 gamma: float = 0.0,
+                 subsample: float = 1.0,
+                 colsample_bytree: float = 1.0,
+                 reg_lambda: float = 0.0,
+                 reg_alpha: float = 0.0,
+                 early_stopping_rounds: int = 10):
+        """
+        Initialize XGBoost parameters with validation.
+        
+        Args:
+            seed (int): Random seed for reproducibility
+            learning_rate (float, optional): Step size shrinkage to prevent overfitting. 
+                Must be in (0, 1]. Defaults to 0.3
+            n_estimators (int, optional): Number of boosting rounds (trees). 
+                Must be at least 1. Defaults to 100
+            max_depth (int, optional): Maximum depth of trees. 
+                Must be at least 1. Defaults to 6
+            min_child_weight (float, optional): Minimum sum of instance weight (hessian) 
+                needed in a child. Must be non-negative. Defaults to 0.0
+            gamma (float, optional): Minimum loss reduction required to make a split. 
+                Must be non-negative. Defaults to 0.0
+            subsample (float, optional): Subsample ratio of training instances. 
+                Must be in (0, 1]. Defaults to 1.0
+            colsample_bytree (float, optional): Subsample ratio of columns when 
+                constructing each tree. Must be in (0, 1]. Defaults to 1.0
+            reg_lambda (float, optional): L2 regularization term on weights. 
+                Must be non-negative. Defaults to 0.0
+            reg_alpha (float, optional): L1 regularization term on weights. 
+                Must be non-negative. Defaults to 0.0
+            early_stopping_rounds (int, optional): Number of rounds with no improvement 
+                after which training will stop. Defaults to 10
+        
+        Raises:
+            ValueError: If parameters are out of valid ranges
+        """
+        super().__init__(seed)
+        
+        # Validate parameters
+        if not 0 < learning_rate <= 1:
+            raise ValueError("learning_rate must be in (0, 1]")
+        if n_estimators < 1:
+            raise ValueError("n_estimators must be at least 1")
+        if max_depth < 1:
+            raise ValueError("max_depth must be at least 1")
+        if min_child_weight < 0:
+            raise ValueError("min_child_weight must be non-negative")
+        if gamma < 0:
+            raise ValueError("gamma must be non-negative")
+        if not 0 < subsample <= 1:
+            raise ValueError("subsample must be in (0, 1]")
+        if not 0 < colsample_bytree <= 1:
+            raise ValueError("colsample_bytree must be in (0, 1]")
+        if reg_lambda < 0:
+            raise ValueError("reg_lambda must be non-negative")
+        if reg_alpha < 0:
+            raise ValueError("reg_alpha must be non-negative")
+        
+        self.__learning_rate = learning_rate
+        self.__n_estimators = n_estimators
+        self.__max_depth = max_depth
+        self.__min_child_weight = min_child_weight
+        self.__gamma = gamma
+        self.__subsample = subsample
+        self.__colsample_bytree = colsample_bytree
+        self.__reg_lambda = reg_lambda
+        self.__reg_alpha = reg_alpha
+        self.__early_stopping_rounds = early_stopping_rounds
+    
+    def getLearningRate(self) -> float:
+        """
+        Return the learning rate (eta).
+        
+        Returns:
+            float: Learning rate value in (0, 1]
+        """
+        return self.__learning_rate
+    
+    def getNEstimators(self) -> int:
+        """
+        Return the number of boosting rounds (trees).
+        
+        Returns:
+            int: Number of trees to build in the ensemble
+        """
+        return self.__n_estimators
+    
+    def getMaxDepth(self) -> int:
+        """
+        Return the maximum depth of trees.
+        
+        Returns:
+            int: Maximum depth allowed for each tree
+        """
+        return self.__max_depth
+    
+    def getMinChildWeight(self) -> float:
+        """
+        Return the minimum sum of instance weight needed in a child.
+        
+        Returns:
+            float: Minimum sum of hessian values required in a child node
+        """
+        return self.__min_child_weight
+    
+    def getGamma(self) -> float:
+        """
+        Return the minimum loss reduction required for split.
+        
+        Returns:
+            float: Minimum gain required to make a split
+        """
+        return self.__gamma
+    
+    def getSubsample(self) -> float:
+        """
+        Return the subsample ratio of training instances.
+        
+        Returns:
+            float: Proportion of instances to sample for each tree in (0, 1]
+        """
+        return self.__subsample
+    
+    def getColsampleByTree(self) -> float:
+        """
+        Return the subsample ratio of columns when constructing each tree.
+        
+        Returns:
+            float: Proportion of features to sample for each tree in (0, 1]
+        """
+        return self.__colsample_bytree
+    
+    def getRegLambda(self) -> float:
+        """
+        Return the L2 regularization term on weights.
+        
+        Returns:
+            float: L2 regularization parameter (ridge penalty)
+        """
+        return self.__reg_lambda
+    
+    def getRegAlpha(self) -> float:
+        """
+        Return the L1 regularization term on weights.
+        
+        Returns:
+            float: L1 regularization parameter (lasso penalty)
+        """
+        return self.__reg_alpha
+    
+    def getEarlyStoppingRounds(self) -> int:
+        """
+        Return the number of rounds for early stopping.
+        
+        Returns:
+            int: Number of consecutive rounds without improvement before stopping
+        """
+        return self.__early_stopping_rounds
\ No newline at end of file
diff --git a/datasets/__init__.py b/datasets/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/setup.py b/setup.py
index cfbc38e..eee6098 100644
--- a/setup.py
+++ b/setup.py
@@ -23,3 +23,4 @@
     long_description=long_description,
     long_description_content_type='text/markdown'
 )
+print("Hello")
\ No newline at end of file
diff --git a/test/Classifier/ClassifierTest.py b/test/Classifier/ClassifierTest.py
index 22e7bf9..e8ed7c3 100644
--- a/test/Classifier/ClassifierTest.py
+++ b/test/Classifier/ClassifierTest.py
@@ -18,22 +18,22 @@ class ClassifierTest(unittest.TestCase):
     def setUp(self) -> None:
         attributeTypes = 4 * [AttributeType.CONTINUOUS]
         dataDefinition = DataDefinition(attributeTypes)
-        self.iris = DataSet(dataDefinition, ",", "../../datasets/iris.data")
+        self.iris = DataSet(dataDefinition, ",", "datasets/iris.data")
         attributeTypes = 6 * [AttributeType.CONTINUOUS]
         dataDefinition = DataDefinition(attributeTypes)
-        self.bupa = DataSet(dataDefinition, ",", "../../datasets/bupa.data")
+        self.bupa = DataSet(dataDefinition, ",", "datasets/bupa.data")
         attributeTypes = 34 * [AttributeType.CONTINUOUS]
         dataDefinition = DataDefinition(attributeTypes)
-        self.dermatology = DataSet(dataDefinition, ",", "../../datasets/dermatology.data")
+        self.dermatology = DataSet(dataDefinition, ",", "datasets/dermatology.data")
         attributeTypes = 6 * [AttributeType.DISCRETE]
         dataDefinition = DataDefinition(attributeTypes)
-        self.car = DataSet(dataDefinition, ",", "../../datasets/car.data")
+        self.car = DataSet(dataDefinition, ",", "datasets/car.data")
         attributeTypes = 9 * [AttributeType.DISCRETE]
         dataDefinition = DataDefinition(attributeTypes)
-        self.tictactoe = DataSet(dataDefinition, ",", "../../datasets/tictactoe.data")
+        self.tictactoe = DataSet(dataDefinition, ",", "datasets/tictactoe.data")
         attributeTypes = 8 * [AttributeType.DISCRETE]
         dataDefinition = DataDefinition(attributeTypes)
-        self.nursery = DataSet(dataDefinition, ",", "../../datasets/nursery.data")
+        self.nursery = DataSet(dataDefinition, ",", "datasets/nursery.data")
         attributeTypes = []
         for i in range(6):
             if i % 2 == 0:
@@ -41,4 +41,4 @@ def setUp(self) -> None:
             else:
                 attributeTypes.append(AttributeType.CONTINUOUS)
         dataDefinition = DataDefinition(attributeTypes)
-        self.chess = DataSet(dataDefinition, ",", "../../datasets/chess.data")
+        self.chess = DataSet(dataDefinition, ",", "datasets/chess.data")
diff --git a/test/Classifier/XGBoostTest.py b/test/Classifier/XGBoostTest.py
new file mode 100644
index 0000000..8679b85
--- /dev/null
+++ b/test/Classifier/XGBoostTest.py
@@ -0,0 +1,62 @@
+import unittest
+
+from Classification.Model.Ensemble.XGBoostModel import XGBoostModel
+from Classification.Parameter.XGBoostParameter import XGBoostParameter
+from test.Classifier.ClassifierTest import ClassifierTest
+
+
+class XGBoostTest(ClassifierTest):
+
+    def test_Train(self):
+        xgboost = XGBoostModel()
+
+        xgboostParameter = XGBoostParameter(
+            seed=1,
+            n_estimators=50,
+            max_depth=4,
+            learning_rate=0.3
+        )
+
+        # Iris
+        xgboost.train(self.iris.getInstanceList(), xgboostParameter)
+        self.assertAlmostEqual(
+            0.0,
+            100 * xgboost.test(self.iris.getInstanceList()).getErrorRate(),
+            2
+        )
+
+        # Bupa
+        xgboost.train(self.bupa.getInstanceList(), xgboostParameter)
+        self.assertAlmostEqual(
+            0.0,
+            100 * xgboost.test(self.bupa.getInstanceList()).getErrorRate(),
+            2
+        )
+
+        # Dermatology
+        xgboost.train(self.dermatology.getInstanceList(), xgboostParameter)
+        self.assertAlmostEqual(
+            0.0,
+            100 * xgboost.test(self.dermatology.getInstanceList()).getErrorRate(),
+            2
+        )
+
+        # Car
+        xgboost.train(self.car.getInstanceList(), xgboostParameter)
+        self.assertAlmostEqual(
+            0.0,
+            100 * xgboost.test(self.car.getInstanceList()).getErrorRate(),
+            2
+        )
+
+        # TicTacToe
+        xgboost.train(self.tictactoe.getInstanceList(), xgboostParameter)
+        self.assertAlmostEqual(
+            0.0,
+            100 * xgboost.test(self.tictactoe.getInstanceList()).getErrorRate(),
+            2
+        )
+
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file
diff --git a/test/DataSet/DataSetTest.py b/test/DataSet/DataSetTest.py
index c5cd3b4..7b51ce0 100644
--- a/test/DataSet/DataSetTest.py
+++ b/test/DataSet/DataSetTest.py
@@ -17,22 +17,22 @@ class DataSetTest(unittest.TestCase):
     def setUp(self):
         attributeTypes = 4 * [AttributeType.CONTINUOUS]
         dataDefinition = DataDefinition(attributeTypes)
-        self.iris = DataSet(dataDefinition, ",", "../../datasets/iris.data")
+        self.iris = DataSet(dataDefinition, ",", "datasets/iris.data")
         attributeTypes = 6 * [AttributeType.CONTINUOUS]
         dataDefinition = DataDefinition(attributeTypes)
-        self.bupa = DataSet(dataDefinition, ",", "../../datasets/bupa.data")
+        self.bupa = DataSet(dataDefinition, ",", "datasets/bupa.data")
         attributeTypes = 34 * [AttributeType.CONTINUOUS]
         dataDefinition = DataDefinition(attributeTypes)
-        self.dermatology = DataSet(dataDefinition, ",", "../../datasets/dermatology.data")
+        self.dermatology = DataSet(dataDefinition, ",", "datasets/dermatology.data")
         attributeTypes = 6 * [AttributeType.DISCRETE]
         dataDefinition = DataDefinition(attributeTypes)
-        self.car = DataSet(dataDefinition, ",", "../../datasets/car.data")
+        self.car = DataSet(dataDefinition, ",", "datasets/car.data")
         attributeTypes = 9 * [AttributeType.DISCRETE]
         dataDefinition = DataDefinition(attributeTypes)
-        self.tictactoe = DataSet(dataDefinition, ",", "../../datasets/tictactoe.data")
+        self.tictactoe = DataSet(dataDefinition, ",", "datasets/tictactoe.data")
         attributeTypes = 8 * [AttributeType.DISCRETE]
         dataDefinition = DataDefinition(attributeTypes)
-        self.nursery = DataSet(dataDefinition, ",", "../../datasets/nursery.data")
+        self.nursery = DataSet(dataDefinition, ",", "datasets/nursery.data")
         attributeTypes = []
         for i in range(6):
             if i % 2 == 0:
@@ -40,7 +40,7 @@ def setUp(self):
             else:
                 attributeTypes.append(AttributeType.CONTINUOUS)
         dataDefinition = DataDefinition(attributeTypes)
-        self.chess = DataSet(dataDefinition, ",", "../../datasets/chess.data")
+        self.chess = DataSet(dataDefinition, ",", "datasets/chess.data")
 
     def test_SampleSize(self):
         self.assertEqual(150, self.iris.sampleSize())