From cbd0a5f383f2395c4a594e3003799f84f8c465f5 Mon Sep 17 00:00:00 2001
From: sama <sama@node0.alex.hrsama-pg0.utah.cloudlab.us>
Date: Mon, 7 Jul 2025 02:27:09 -0600
Subject: [PATCH 01/12] added coauthor cs dataset

---
 easygraph/datasets/__init__.py       |  19 +++--
 easygraph/datasets/citation_graph.py |   2 +-
 easygraph/datasets/coauthor.py       | 112 +++++++++++++++++++++++++++
 3 files changed, 124 insertions(+), 9 deletions(-)
 create mode 100644 easygraph/datasets/coauthor.py

diff --git a/easygraph/datasets/__init__.py b/easygraph/datasets/__init__.py
index d303baa9..30869616 100644
--- a/easygraph/datasets/__init__.py
+++ b/easygraph/datasets/__init__.py
@@ -1,3 +1,4 @@
+# risky imports
 try:
     from easygraph.datasets.get_sample_graph import *
     from easygraph.datasets.gnn_benchmark import *
@@ -7,17 +8,19 @@
     from easygraph.datasets.hypergraph.House_Committees import House_Committees
     from easygraph.datasets.karate import KarateClubDataset
     from easygraph.datasets.mathoverflow_answers import mathoverflow_answers
-
-    from .citation_graph import CitationGraphDataset
-    from .citation_graph import CiteseerGraphDataset
-    from .citation_graph import CoraBinary
-    from .citation_graph import CoraGraphDataset
-    from .citation_graph import PubmedGraphDataset
     from .ppi import LegacyPPIDataset
     from .ppi import PPIDataset
-
-except:
+except Exception as e:
     print(
         " Please install Pytorch before use graph-related datasets and"
         " hypergraph-related datasets."
     )
+
+from .citation_graph import (
+    CitationGraphDataset,
+    CiteseerGraphDataset,
+    CoraBinary,
+    CoraGraphDataset,
+    PubmedGraphDataset,
+)
+from .coauthor import CoauthorCSDataset
diff --git a/easygraph/datasets/citation_graph.py b/easygraph/datasets/citation_graph.py
index a7e268d9..3bb2fead 100644
--- a/easygraph/datasets/citation_graph.py
+++ b/easygraph/datasets/citation_graph.py
@@ -55,7 +55,7 @@ class CitationGraphDataset(EasyGraphBuiltinDataset):
     """
     _urls = {
         "cora_v2": "dataset/cora_v2.zip",
-        "citeseer": "dataset/citeSeer.zip",
+        "citeseer": "dataset/citeseer.zip",
         "pubmed": "dataset/pubmed.zip",
     }
 
diff --git a/easygraph/datasets/coauthor.py b/easygraph/datasets/coauthor.py
new file mode 100644
index 00000000..b9a1166b
--- /dev/null
+++ b/easygraph/datasets/coauthor.py
@@ -0,0 +1,112 @@
+"""CoauthorCS Dataset
+
+This dataset contains a co-authorship network of authors who submitted papers to CS category.
+Each node represents an author and edges represent co-authorships.
+Node features are bag-of-words representations of keywords in the author's papers.
+The task is node classification, with labels indicating the primary field of study.
+
+Statistics:
+- Nodes: 18333
+- Edges: 81894
+- Feature Dim: 6805
+- Classes: 15
+
+Source: https://github.com/dmlc/dgl/tree/master/examples/pytorch/cluster_gcn
+"""
+
+import os
+import numpy as np
+import easygraph as eg
+import scipy.sparse as sp
+
+from easygraph.classes.graph import Graph
+from .graph_dataset_base import EasyGraphBuiltinDataset
+from .utils import download, extract_archive, tensor, data_type_dict
+
+
+class CoauthorCSDataset(EasyGraphBuiltinDataset):
+        r"""CoauthorCS citation network dataset.
+
+    Nodes are authors, and edges indicate co-authorship relationships. Each node
+    has a bag-of-words feature vector and a label denoting the primary research field.
+
+    Parameters
+    ----------
+    raw_dir : str, optional
+        Directory to store the raw downloaded files. Default: None
+    force_reload : bool, optional
+        Whether to re-download and process the dataset. Default: False
+    verbose : bool, optional
+        Whether to print detailed processing logs. Default: True
+    transform : callable, optional
+        Transform to apply to the graph on access.
+
+    Examples
+    --------
+    >>> from easygraph.datasets import CoauthorCSDataset
+    >>> dataset = CoauthorCSDataset()
+    >>> g = dataset[0]
+    >>> print("Nodes:", g.number_of_nodes())
+    >>> print("Edges:", g.number_of_edges())
+    >>> print("Feature shape:", g.nodes[0]['feat'].shape)
+    >>> print("Label:", g.nodes[0]['label'])
+    >>> print("Number of classes:", dataset.num_classes)
+    """
+    def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
+        name = "coauthor_cs"
+        url = "https://data.dgl.ai/dataset/coauthor_cs.zip"
+        super(CoauthorCSDataset, self).__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def process(self):
+        path = os.path.join(self.raw_path, "coauthor_cs.npz")
+        data = np.load(path)
+
+        # Reconstruct adjacency matrix
+        adj = sp.csr_matrix(
+            (data["adj_data"], data["adj_indices"], data["adj_indptr"]),
+            shape=data["adj_shape"],
+        )
+
+        # Reconstruct feature matrix
+        features = sp.csr_matrix(
+            (data["attr_data"], data["attr_indices"], data["attr_indptr"]),
+            shape=data["attr_shape"],
+        ).todense()
+
+        labels = data["labels"]
+
+        g = eg.Graph()
+        g.add_edges_from(list(zip(*adj.nonzero())))
+
+        for i in range(features.shape[0]):
+            g.add_node(i, feat=np.array(features[i]).squeeze(), label=int(labels[i]))
+
+        self._g = g
+        self._num_classes = len(np.unique(labels))
+
+        if self.verbose:
+            print("Finished loading CoauthorCS dataset.")
+            print(f"  NumNodes: {g.number_of_nodes()}")
+            print(f"  NumEdges: {g.number_of_edges()}")
+            print(f"  NumFeats: {features.shape[1]}")
+            print(f"  NumClasses: {self._num_classes}")
+
+    def __getitem__(self, idx):
+        assert idx == 0, "CoauthorCSDataset only contains one graph"
+        if self._g is None:
+            raise ValueError("Graph has not been loaded or processed correctly.")
+        return self._g if self._transform is None else self._transform(self._g)
+
+    def __len__(self):
+        return 1
+
+    @property
+    def num_classes(self):
+        return self._num_classes

From 576eb0556a4ee959a25df98c204922295b82a02a Mon Sep 17 00:00:00 2001
From: sama <sama@node0.alex.hrsama-pg0.utah.cloudlab.us>
Date: Mon, 7 Jul 2025 02:38:31 -0600
Subject: [PATCH 02/12] added amazon photos dataset

---
 easygraph/amazon_computers.py      |   0
 easygraph/datasets/__init__.py     |   1 +
 easygraph/datasets/amazon_photo.py | 105 +++++++++++++++++++++++++++++
 easygraph/datasets/coauthor.py     |   2 +-
 4 files changed, 107 insertions(+), 1 deletion(-)
 create mode 100644 easygraph/amazon_computers.py
 create mode 100644 easygraph/datasets/amazon_photo.py

diff --git a/easygraph/amazon_computers.py b/easygraph/amazon_computers.py
new file mode 100644
index 00000000..e69de29b
diff --git a/easygraph/datasets/__init__.py b/easygraph/datasets/__init__.py
index 30869616..d32afdcf 100644
--- a/easygraph/datasets/__init__.py
+++ b/easygraph/datasets/__init__.py
@@ -24,3 +24,4 @@
     PubmedGraphDataset,
 )
 from .coauthor import CoauthorCSDataset
+from .amazon_photo import AmazonPhotoDataset
\ No newline at end of file
diff --git a/easygraph/datasets/amazon_photo.py b/easygraph/datasets/amazon_photo.py
new file mode 100644
index 00000000..29156976
--- /dev/null
+++ b/easygraph/datasets/amazon_photo.py
@@ -0,0 +1,105 @@
+import os
+import numpy as np
+import easygraph as eg
+import scipy.sparse as sp
+
+from easygraph.classes.graph import Graph
+from .graph_dataset_base import EasyGraphBuiltinDataset
+from .utils import download, extract_archive, tensor, data_type_dict
+
+
+class AmazonPhotoDataset(EasyGraphBuiltinDataset):
+    r"""Amazon Electronics Photo co-purchase graph dataset.
+
+    Nodes represent products, and edges link products frequently co-purchased.
+    Node features are bag-of-words of product reviews. The task is to classify
+    the product category.
+
+    Statistics:
+
+    - Nodes: 7,650
+    - Edges: 119,081
+    - Number of Classes: 8
+    - Features: 745
+
+    Parameters
+    ----------
+    raw_dir : str, optional
+        Raw file directory to download/contains the input data directory. Default: None
+    force_reload : bool, optional
+        Whether to reload the dataset. Default: False
+    verbose : bool, optional
+        Whether to print out progress information. Default: True
+    transform : callable, optional
+        A transform that takes in a :class:`~easygraph.Graph` object and returns
+        a transformed version. The :class:`~easygraph.Graph` object will be
+        transformed before every access.
+
+    Examples
+    --------
+    >>> from easygraph.datasets import AmazonPhotoDataset
+    >>> dataset = AmazonPhotoDataset()
+    >>> g = dataset[0]
+    >>> print(g.number_of_nodes())
+    >>> print(g.number_of_edges())
+    >>> print(g.nodes[0]['feat'].shape)
+    >>> print(g.nodes[0]['label'])
+    >>> print(dataset.num_classes)
+    """
+
+    def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
+        name = "amazon_photo"
+        url = "https://data.dgl.ai/dataset/amazon_co_buy_photo.zip"
+        super(AmazonPhotoDataset, self).__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def process(self):
+        path = os.path.join(self.raw_path, "amazon_co_buy_photo.npz")
+        data = np.load(path)
+
+        adj = sp.csr_matrix(
+            (data["adj_data"], data["adj_indices"], data["adj_indptr"]),
+            shape=data["adj_shape"],
+        )
+
+        features = sp.csr_matrix(
+            (data["attr_data"], data["attr_indices"], data["attr_indptr"]),
+            shape=data["attr_shape"],
+        ).todense()
+
+        labels = data["labels"]
+
+        g = eg.Graph()
+        g.add_edges_from(list(zip(*adj.nonzero())))
+
+        for i in range(features.shape[0]):
+            g.add_node(i, feat=np.array(features[i]).squeeze(), label=int(labels[i]))
+
+        self._g = g
+        self._num_classes = len(np.unique(labels))
+
+        if self.verbose:
+            print("Finished loading AmazonPhoto dataset.")
+            print(f"  NumNodes: {g.number_of_nodes()}")
+            print(f"  NumEdges: {g.number_of_edges()}")
+            print(f"  NumFeats: {features.shape[1]}")
+            print(f"  NumClasses: {self._num_classes}")
+
+    def __getitem__(self, idx):
+        assert idx == 0, "AmazonPhotoDataset only contains one graph"
+        if self._g is None:
+            raise ValueError("Graph has not been loaded or processed correctly.")
+        return self._g if self._transform is None else self._transform(self._g)
+
+    def __len__(self):
+        return 1
+
+    @property
+    def num_classes(self):
+        return self._num_classes
diff --git a/easygraph/datasets/coauthor.py b/easygraph/datasets/coauthor.py
index b9a1166b..1df6d3a9 100644
--- a/easygraph/datasets/coauthor.py
+++ b/easygraph/datasets/coauthor.py
@@ -25,7 +25,7 @@
 
 
 class CoauthorCSDataset(EasyGraphBuiltinDataset):
-        r"""CoauthorCS citation network dataset.
+    r"""CoauthorCS citation network dataset.
 
     Nodes are authors, and edges indicate co-authorship relationships. Each node
     has a bag-of-words feature vector and a label denoting the primary research field.

From 8355bc31d05c2ed8806cf0930ea89d554624c258 Mon Sep 17 00:00:00 2001
From: sama <sama@node0.alex.hrsama-pg0.utah.cloudlab.us>
Date: Mon, 7 Jul 2025 03:28:31 -0600
Subject: [PATCH 03/12] added reddit dataset

---
 easygraph/amazon_computers.py  | 103 +++++++++++++++++++++++++++++++++
 easygraph/datasets/__init__.py |   3 +-
 easygraph/datasets/reddit.py   |  82 ++++++++++++++++++++++++++
 3 files changed, 187 insertions(+), 1 deletion(-)
 create mode 100644 easygraph/datasets/reddit.py

diff --git a/easygraph/amazon_computers.py b/easygraph/amazon_computers.py
index e69de29b..06c27beb 100644
--- a/easygraph/amazon_computers.py
+++ b/easygraph/amazon_computers.py
@@ -0,0 +1,103 @@
+import os
+import numpy as np
+import easygraph as eg
+import scipy.sparse as sp
+
+from easygraph.classes.graph import Graph
+from .graph_dataset_base import EasyGraphBuiltinDataset
+
+
+class AmazonComputersDataset(EasyGraphBuiltinDataset):
+    r"""Amazon Computers co-purchase graph dataset.
+
+    Nodes represent computer products, and edges link products frequently co-purchased.
+    Node features are bag-of-words of product reviews. The task is to classify
+    the product category.
+
+    Statistics:
+    - Nodes: 13,752
+    - Edges: 245,861
+    - Number of Classes: 10
+    - Features: 767
+
+    Parameters
+    ----------
+    raw_dir : str, optional
+        Raw file directory to download/contains the input data directory. Default: None
+    force_reload : bool, optional
+        Whether to reload the dataset. Default: False
+    verbose : bool, optional
+        Whether to print out progress information. Default: True
+    transform : callable, optional
+        A transform that takes in a :class:`~easygraph.Graph` object and returns
+        a transformed version. The :class:`~easygraph.Graph` object will be
+        transformed before every access.
+
+    Examples
+    --------
+    >>> from easygraph.datasets import AmazonComputersDataset
+    >>> dataset = AmazonComputersDataset()
+    >>> g = dataset[0]
+    >>> print(g.number_of_nodes())
+    >>> print(g.number_of_edges())
+    >>> print(g.nodes[0]['feat'].shape)
+    >>> print(g.nodes[0]['label'])
+    >>> print(dataset.num_classes)
+    """
+
+    def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
+        name = "amazon_computers"
+        url = "https://data.dgl.ai/dataset/amazon_co_buy_computers.zip"
+        super(AmazonComputersDataset, self).__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def process(self):
+        path = os.path.join(self.raw_path, "amazon_co_buy_computers.npz")
+        data = np.load(path)
+
+        adj = sp.csr_matrix(
+            (data["adj_data"], data["adj_indices"], data["adj_indptr"]),
+            shape=data["adj_shape"],
+        )
+
+        features = sp.csr_matrix(
+            (data["attr_data"], data["attr_indices"], data["attr_indptr"]),
+            shape=data["attr_shape"],
+        ).todense()
+
+        labels = data["labels"]
+
+        g = eg.Graph()
+        g.add_edges_from(list(zip(*adj.nonzero())))
+
+        for i in range(features.shape[0]):
+            g.add_node(i, feat=np.array(features[i]).squeeze(), label=int(labels[i]))
+
+        self._g = g
+        self._num_classes = len(np.unique(labels))
+
+        if self.verbose:
+            print("Finished loading AmazonComputers dataset.")
+            print(f"  NumNodes: {g.number_of_nodes()}")
+            print(f"  NumEdges: {g.number_of_edges()}")
+            print(f"  NumFeats: {features.shape[1]}")
+            print(f"  NumClasses: {self._num_classes}")
+
+    def __getitem__(self, idx):
+        assert idx == 0, "AmazonComputersDataset only contains one graph"
+        if self._g is None:
+            raise ValueError("Graph has not been loaded or processed correctly.")
+        return self._g if self._transform is None else self._transform(self._g)
+
+    def __len__(self):
+        return 1
+
+    @property
+    def num_classes(self):
+        return self._num_classes
diff --git a/easygraph/datasets/__init__.py b/easygraph/datasets/__init__.py
index d32afdcf..2624c2d7 100644
--- a/easygraph/datasets/__init__.py
+++ b/easygraph/datasets/__init__.py
@@ -24,4 +24,5 @@
     PubmedGraphDataset,
 )
 from .coauthor import CoauthorCSDataset
-from .amazon_photo import AmazonPhotoDataset
\ No newline at end of file
+from .amazon_photo import AmazonPhotoDataset
+from .reddit import RedditDataset
\ No newline at end of file
diff --git a/easygraph/datasets/reddit.py b/easygraph/datasets/reddit.py
new file mode 100644
index 00000000..d15bafde
--- /dev/null
+++ b/easygraph/datasets/reddit.py
@@ -0,0 +1,82 @@
+import os
+import numpy as np
+import easygraph as eg
+import scipy.sparse as sp
+
+from easygraph.classes.graph import Graph
+from .graph_dataset_base import EasyGraphBuiltinDataset
+from .utils import download, extract_archive, tensor, data_type_dict
+
+class RedditDataset(EasyGraphBuiltinDataset):
+    r"""Reddit posts graph (Sept 2014) for community (subreddit) classification.
+
+    Statistics:
+    - Nodes: ~232,965
+    - Edges: ~114 million (approx.)
+    - Features per node: 602
+    - Classes: number of subreddit communities
+
+    Data are split by post-day: first 20 days train, then validation (30%), test (rest).
+
+    Parameters
+    ----------
+    self_loop : bool
+        Add self-loop edges if True.
+    raw_dir, force_reload, verbose, transform : same as EasyGraphBuiltinDataset
+    """
+    def __init__(self, self_loop=False, raw_dir=None, force_reload=False,
+                 verbose=True, transform=None):
+        name = "reddit"
+        url = "https://data.dgl.ai/dataset/reddit.zip"
+        self.self_loop = self_loop
+        super().__init__(name=name, url=url, raw_dir=raw_dir,
+                         force_reload=force_reload, verbose=verbose,
+                         transform=transform)
+
+    def process(self):
+        # Expect two files extracted: reddit_data.npz & reddit_graph.npz
+        data = np.load(os.path.join(self.raw_path, "reddit_data.npz"))
+        feat = data["feature"]          # shape [N, 602]
+        labels = data["label"]          # shape [N]
+        split = data["node_types"]      # 1=train,2=val,3=test
+
+        # Load adjacency
+        adj = sp.load_npz(os.path.join(self.raw_path, "reddit_graph.npz"))
+        src, dst = adj.nonzero()
+        if self.self_loop:
+            self_loops = np.arange(adj.shape[0])
+            src = np.concatenate([src, self_loops])
+            dst = np.concatenate([dst, self_loops])
+        edges = list(zip(src, dst))
+
+        # Build graph
+        g = eg.Graph()
+        g.add_edges_from(edges)
+
+        # Assign node features, labels, and masks
+        for i in range(feat.shape[0]):
+            g.add_node(i, feat=feat[i], label=int(labels[i]),
+                       train_mask=(split[i] == 1),
+                       val_mask=(split[i] == 2),
+                       test_mask=(split[i] == 3))
+
+        self._g = g
+        self._num_classes = int(np.max(labels) + 1)
+
+        if self.verbose:
+            print("Loaded Reddit dataset:")
+            print(f"  NumNodes: {g.number_of_nodes()}")
+            print(f"  NumEdges: {g.number_of_edges()}")
+            print(f"  NumFeats: {feat.shape[1]}")
+            print(f"  NumClasses: {self._num_classes}")
+
+    def __getitem__(self, idx):
+        assert idx == 0, "RedditDataset only contains one graph"
+        return self._g if self.transform is None else self.transform(self._g)
+
+    def __len__(self):
+        return 1
+
+    @property
+    def num_classes(self):
+        return self._num_classes

From 282e6a25069ad8dc0e9f5962c3470621f40691d5 Mon Sep 17 00:00:00 2001
From: sama <sama@node0.alex.hrsama-pg0.utah.cloudlab.us>
Date: Mon, 7 Jul 2025 03:36:38 -0600
Subject: [PATCH 04/12] added flickr dataset

---
 easygraph/datasets/__init__.py |   3 +-
 easygraph/datasets/flickr.py   | 107 +++++++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+), 1 deletion(-)
 create mode 100644 easygraph/datasets/flickr.py

diff --git a/easygraph/datasets/__init__.py b/easygraph/datasets/__init__.py
index 2624c2d7..5fa6fd4c 100644
--- a/easygraph/datasets/__init__.py
+++ b/easygraph/datasets/__init__.py
@@ -25,4 +25,5 @@
 )
 from .coauthor import CoauthorCSDataset
 from .amazon_photo import AmazonPhotoDataset
-from .reddit import RedditDataset
\ No newline at end of file
+from .reddit import RedditDataset
+from .flickr import FlickrDataset
diff --git a/easygraph/datasets/flickr.py b/easygraph/datasets/flickr.py
new file mode 100644
index 00000000..8a226f84
--- /dev/null
+++ b/easygraph/datasets/flickr.py
@@ -0,0 +1,107 @@
+import os
+import json
+import numpy as np
+import scipy.sparse as sp
+import easygraph as eg
+from easygraph.classes.graph import Graph
+from .graph_dataset_base import EasyGraphBuiltinDataset
+from .utils import tensor, data_type_dict
+
+class FlickrDataset(EasyGraphBuiltinDataset):
+    r"""Flickr dataset for node classification.
+
+    Nodes are images and edges represent social tags co-occurrence.
+    Node features are precomputed image embeddings. Labels indicate image categories.
+
+    Statistics:
+    - Nodes: 89,250
+    - Edges: 899,756
+    - Classes: 7
+    - Feature dim: 500
+
+    Source: GraphSAINT (https://arxiv.org/abs/1907.04931)
+
+    Parameters
+    ----------
+    raw_dir : str, optional
+        Custom directory to download the dataset. Default: None (uses standard cache dir).
+    force_reload : bool, optional
+        Whether to re-download and reprocess. Default: False.
+    verbose : bool, optional
+        Whether to print loading progress. Default: False.
+    transform : callable, optional
+        A transform applied to the graph on access.
+    reorder : bool, optional
+        Whether to apply graph reordering for locality (requires torch). Default: False.
+
+    Examples
+    --------
+    >>> from easygraph.datasets import FlickrDataset
+    >>> ds = FlickrDataset(verbose=True)
+    >>> g = ds[0]
+    >>> print(g.number_of_nodes(), g.number_of_edges(), ds.num_classes)
+    >>> print(g.nodes[0]['feat'].shape, g.nodes[0]['label'])
+    """
+    def __init__(self, raw_dir=None, force_reload=False, verbose=False, transform=None, reorder=False):
+        name = "flickr"
+        url = self._get_dgl_url("dataset/flickr.zip")
+        self._reorder = reorder
+        super(FlickrDataset, self).__init__(name=name, url=url, raw_dir=raw_dir,
+                                             force_reload=force_reload,
+                                             verbose=verbose, transform=transform)
+
+    def process(self):
+        # Load adjacency
+        coo = sp.load_npz(os.path.join(self.raw_path, "adj_full.npz"))
+        g = eg.Graph()
+        g.add_edges_from(list(zip(*coo.nonzero())))
+
+        # Load features
+        feats = np.load(os.path.join(self.raw_path, "feats.npy"))
+        # Load labels
+        with open(os.path.join(self.raw_path, "class_map.json")) as f:
+            class_map = json.load(f)
+            labels = np.array([class_map[str(i)] for i in range(feats.shape[0])])
+
+        # Load train/val/test splits
+        with open(os.path.join(self.raw_path, "role.json")) as f:
+            role = json.load(f)
+        train_mask = np.zeros(feats.shape[0], dtype=bool); train_mask[role["tr"]] = True
+        val_mask = np.zeros(feats.shape[0], dtype=bool); val_mask[role["va"]] = True
+        test_mask = np.zeros(feats.shape[0], dtype=bool); test_mask[role["te"]] = True
+
+        # Attach node data
+        for i in range(feats.shape[0]):
+            g.add_node(i,
+                       feat=feats[i].astype(np.float32),
+                       label=int(labels[i]))
+        g.graph["train_mask"] = train_mask
+        g.graph["val_mask"] = val_mask
+        g.graph["test_mask"] = test_mask
+
+        self._g = g
+        self._num_classes = int(labels.max() + 1)
+        if self.verbose:
+            print("Loaded Flickr dataset")
+            print(f" Nodes: {g.number_of_nodes()}, Edges: {g.number_of_edges()}, Features: {feats.shape[1]}, Classes: {self._num_classes}")
+
+    def __getitem__(self, idx):
+        assert idx == 0, "FlickrDataset contains only one graph"
+        g = self._g
+        # transfer mask info
+        g.graph["train_mask"] = g.graph.pop("train_mask")
+        g.graph["val_mask"] = g.graph.pop("val_mask")
+        g.graph["test_mask"] = g.graph.pop("test_mask")
+        return self._transform(g) if self._transform else g
+
+    def __len__(self):
+        return 1
+
+    @property
+    def num_classes(self):
+        return self._num_classes
+
+    @staticmethod
+    def _get_dgl_url(path):
+        from .utils import _get_dgl_url
+        return _get_dgl_url(path)

From fd79e025399904e11303a746887416f738d45e9a Mon Sep 17 00:00:00 2001
From: sama <sama@node0.alex.hrsama-pg0.utah.cloudlab.us>
Date: Mon, 7 Jul 2025 20:29:04 -0600
Subject: [PATCH 05/12] added facebook ego dataset

---
 easygraph/datasets/__init__.py     |   1 +
 easygraph/datasets/facebook_ego.py | 104 +++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+)
 create mode 100644 easygraph/datasets/facebook_ego.py

diff --git a/easygraph/datasets/__init__.py b/easygraph/datasets/__init__.py
index 5fa6fd4c..c329c687 100644
--- a/easygraph/datasets/__init__.py
+++ b/easygraph/datasets/__init__.py
@@ -27,3 +27,4 @@
 from .amazon_photo import AmazonPhotoDataset
 from .reddit import RedditDataset
 from .flickr import FlickrDataset
+from .facebook_ego import FacebookEgoNetDataset
\ No newline at end of file
diff --git a/easygraph/datasets/facebook_ego.py b/easygraph/datasets/facebook_ego.py
new file mode 100644
index 00000000..505a594d
--- /dev/null
+++ b/easygraph/datasets/facebook_ego.py
@@ -0,0 +1,104 @@
+"""Facebook Ego-Net Dataset
+
+This dataset contains a subset of Facebook’s social network collected from 
+survey participants in the SNAP EgoNet project. Nodes represent users, and 
+edges indicate friendship links between them.
+
+Each ego network is centered on a user and includes their friend connections 
+and friend-to-friend connections. The `.circles` files contain labeled groups 
+(i.e., communities) of friends identified by the ego user.
+
+This version processes all ego-nets as a single undirected graph. Node features 
+are not provided. Labels (circles) are optional and not included by default.
+
+Statistics (based on merged graph):
+- Nodes: ~4,000+
+- Edges: ~88,000+
+- Features: None
+- Classes: None
+
+Reference:
+J. McAuley and J. Leskovec, “Learning to Discover Social Circles in Ego Networks,” 
+in NIPS, 2012. [https://snap.stanford.edu/data/egonets-Facebook.html]
+"""
+
+import os
+import easygraph as eg
+from easygraph.classes.graph import Graph
+from .graph_dataset_base import EasyGraphBuiltinDataset
+from .utils import download, extract_archive
+
+
+class FacebookEgoNetDataset(EasyGraphBuiltinDataset):
+    r"""Facebook Ego-Net social network dataset.
+
+    Each node is a user, and edges represent friendship. The dataset
+    includes 10 ego networks centered on different users.
+
+    Parameters
+    ----------
+    raw_dir : str, optional
+        Directory to store the raw downloaded files. Default: None
+    force_reload : bool, optional
+        Whether to re-download and process the dataset. Default: False
+    verbose : bool, optional
+        Whether to print detailed processing logs. Default: True
+    transform : callable, optional
+        Optional transform to apply on the graph.
+
+    Examples
+    --------
+    >>> from easygraph.datasets import FacebookEgoNetDataset
+    >>> dataset = FacebookEgoNetDataset()
+    >>> g = dataset[0]
+    >>> print("Nodes:", g.number_of_nodes())
+    >>> print("Edges:", g.number_of_edges())
+    """
+
+    def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
+        name = "facebook"
+        url = "https://snap.stanford.edu/data/facebook.tar.gz"
+        super(FacebookEgoNetDataset, self).__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def process(self):
+        parent_dir = os.path.join(self.raw_path, "facebook")
+        g = eg.Graph()
+
+        # Iterate over all .edges files in the subdirectory
+        for filename in os.listdir(parent_dir):
+            if filename.endswith(".edges"):
+                edge_file = os.path.join(parent_dir, filename)
+
+                with open(edge_file, 'r') as f:
+                    for line in f:
+                        u, v = map(int, line.strip().split())
+                        g.add_edge(u, v)
+
+        self._g = g
+        self._num_nodes = g.number_of_nodes()
+        self._num_edges = g.number_of_edges()
+
+        if self.verbose:
+            print("Finished loading Facebook Ego-Net dataset.")
+            print(f"  NumNodes: {self._num_nodes}")
+            print(f"  NumEdges: {self._num_edges}")
+
+    def __getitem__(self, idx):
+        assert idx == 0, "FacebookEgoNetDataset only contains one merged graph"
+        return self._g if self._transform is None else self._transform(self._g)
+
+    def __len__(self):
+        return 1
+    def download(self):
+        r"""Automatically download data and extract it."""
+        if self.url is not None:
+            archive_path = os.path.join(self.raw_dir, self.name + ".tar.gz")
+            download(self.url, path=archive_path)
+            extract_archive(archive_path, self.raw_path)
\ No newline at end of file

From 517ad2e6d5e64bb45f20adfcace0f914e06661ca Mon Sep 17 00:00:00 2001
From: sama <sama@node0.alex.hrsama-pg0.utah.cloudlab.us>
Date: Mon, 7 Jul 2025 20:49:13 -0600
Subject: [PATCH 06/12] added web-google dataset

---
 easygraph/datasets/graph_dataset_base.py |   1 -
 easygraph/datasets/web_google.py         | 113 +++++++++++++++++++++++
 2 files changed, 113 insertions(+), 1 deletion(-)
 create mode 100644 easygraph/datasets/web_google.py

diff --git a/easygraph/datasets/graph_dataset_base.py b/easygraph/datasets/graph_dataset_base.py
index 4f433e81..1077ddf7 100644
--- a/easygraph/datasets/graph_dataset_base.py
+++ b/easygraph/datasets/graph_dataset_base.py
@@ -8,7 +8,6 @@
 import os
 import sys
 import traceback
-
 from ..utils import retry_method_with_fix
 from .utils import download
 from .utils import extract_archive
diff --git a/easygraph/datasets/web_google.py b/easygraph/datasets/web_google.py
new file mode 100644
index 00000000..68b5360e
--- /dev/null
+++ b/easygraph/datasets/web_google.py
@@ -0,0 +1,113 @@
+"""Web-Google Dataset
+
+This dataset is a web graph based on Google's web pages and their hyperlink
+structure, as crawled by the Stanford WebBase project in 2002.
+
+Each node represents a web page, and a directed edge from u to v indicates
+a hyperlink from page u to page v.
+
+Statistics:
+- Nodes: 875713
+- Edges: 5105039
+- Features: None
+- Labels: None
+
+Reference:
+J. Leskovec, A. Rajaraman, J. Ullman, “Mining of Massive Datasets.”
+Dataset from SNAP: https://snap.stanford.edu/data/web-Google.html
+"""
+
+import os
+import easygraph as eg
+from easygraph.classes.graph import Graph
+from .graph_dataset_base import EasyGraphBuiltinDataset
+from .utils import download, extract_archive
+import gzip
+import shutil
+
+
+class WebGoogleDataset(EasyGraphBuiltinDataset):
+    r"""Web-Google hyperlink network dataset.
+
+    Parameters
+    ----------
+    raw_dir : str, optional
+        Directory to store the raw downloaded files. Default: None
+    force_reload : bool, optional
+        Whether to re-download and process the dataset. Default: False
+    verbose : bool, optional
+        Whether to print detailed processing logs. Default: True
+    transform : callable, optional
+        Optional transform to apply on the graph.
+
+    Examples
+    --------
+    >>> from easygraph.datasets import WebGoogleDataset
+    >>> dataset = WebGoogleDataset()
+    >>> g = dataset[0]
+    >>> print("Nodes:", g.number_of_nodes())
+    >>> print("Edges:", g.number_of_edges())
+    """
+
+    def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
+        name = "web-Google"
+        url = "https://snap.stanford.edu/data/web-Google.txt.gz"
+        super(WebGoogleDataset, self).__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def download(self):
+        r"""Download and extract .gz edge list."""
+        if self.url is not None:
+            file_path = os.path.join(self.raw_dir, self.name + ".txt.gz")
+            download(self.url, path=file_path)
+            extract_archive(file_path, self.raw_path)
+
+    def process(self):
+        graph = eg.DiGraph()  # Web-Google is directed
+        edge_list_path = os.path.join(self.raw_path, self.name + ".txt")
+
+        with open(edge_list_path, 'r') as f:
+            for line in f:
+                if line.startswith('#') or line.strip() == "":
+                    continue
+                u, v = map(int, line.strip().split())
+                graph.add_edge(u, v)
+
+        self._g = graph
+        self._num_nodes = graph.number_of_nodes()
+        self._num_edges = graph.number_of_edges()
+
+        if self.verbose:
+            print("Finished loading Web-Google dataset.")
+            print(f"  NumNodes: {self._num_nodes}")
+            print(f"  NumEdges: {self._num_edges}")
+
+    def __getitem__(self, idx):
+        assert idx == 0, "WebGoogleDataset only contains one graph"
+        return self._g if self._transform is None else self._transform(self._g)
+
+    def __len__(self):
+        return 1
+    def download(self):
+        r"""Download and decompress the .txt.gz file."""
+        if self.url is not None:
+            compressed_path = os.path.join(self.raw_dir, self.name + ".txt.gz")
+            extracted_path = os.path.join(self.raw_path, self.name + ".txt")
+
+            # Download .gz file
+            download(self.url, path=compressed_path)
+
+            # Ensure output directory exists
+            if not os.path.exists(self.raw_path):
+                os.makedirs(self.raw_path)
+
+            # Decompress manually
+            with gzip.open(compressed_path, 'rb') as f_in:
+                with open(extracted_path, 'wb') as f_out:
+                    shutil.copyfileobj(f_in, f_out)
\ No newline at end of file

From 3d59a68e984bca041c73dfa1d586445826605a32 Mon Sep 17 00:00:00 2001
From: sama <sama@node0.alex.hrsama-pg0.utah.cloudlab.us>
Date: Mon, 7 Jul 2025 20:53:19 -0600
Subject: [PATCH 07/12] finished roadnet dataset

---
 easygraph/datasets/__init__.py |   3 +-
 easygraph/datasets/roadnet.py  | 104 +++++++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+), 1 deletion(-)
 create mode 100644 easygraph/datasets/roadnet.py

diff --git a/easygraph/datasets/__init__.py b/easygraph/datasets/__init__.py
index c329c687..ab5d4dd5 100644
--- a/easygraph/datasets/__init__.py
+++ b/easygraph/datasets/__init__.py
@@ -27,4 +27,5 @@
 from .amazon_photo import AmazonPhotoDataset
 from .reddit import RedditDataset
 from .flickr import FlickrDataset
-from .facebook_ego import FacebookEgoNetDataset
\ No newline at end of file
+from .facebook_ego import FacebookEgoNetDataset
+from .roadnet import RoadNetCADataset
\ No newline at end of file
diff --git a/easygraph/datasets/roadnet.py b/easygraph/datasets/roadnet.py
new file mode 100644
index 00000000..62e5203f
--- /dev/null
+++ b/easygraph/datasets/roadnet.py
@@ -0,0 +1,104 @@
+"""RoadNet-CA Dataset
+
+This dataset represents the road network of California.
+Nodes correspond to intersections, and edges represent roads connecting them.
+
+The data is undirected and unweighted. No features or labels are provided.
+
+Statistics:
+- Nodes: 1,965,206
+- Edges: 2,766,607
+- Features: None
+- Labels: None
+
+Reference:
+J. Leskovec and A. Krevl, “SNAP Datasets: Stanford Large Network Dataset Collection,” 
+https://snap.stanford.edu/data/roadNet-CA.html
+"""
+
+import os
+import gzip
+import shutil
+import easygraph as eg
+from easygraph.classes.graph import Graph
+from .graph_dataset_base import EasyGraphBuiltinDataset
+from .utils import download
+
+
+class RoadNetCADataset(EasyGraphBuiltinDataset):
+    r"""Road network of California (RoadNet-CA)
+
+    Nodes are road intersections and edges are roads connecting them.
+
+    Parameters
+    ----------
+    raw_dir : str, optional
+        Directory to store the raw downloaded files. Default: None
+    force_reload : bool, optional
+        Whether to re-download and process the dataset. Default: False
+    verbose : bool, optional
+        Whether to print detailed processing logs. Default: True
+    transform : callable, optional
+        Optional transform to apply on the graph.
+
+    Examples
+    --------
+    >>> from easygraph.datasets import RoadNetCADataset
+    >>> dataset = RoadNetCADataset()
+    >>> g = dataset[0]
+    >>> print("Nodes:", g.number_of_nodes())
+    >>> print("Edges:", g.number_of_edges())
+    """
+
+    def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
+        name = "roadNet-CA"
+        url = "https://snap.stanford.edu/data/roadNet-CA.txt.gz"
+        super(RoadNetCADataset, self).__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def download(self):
+        r"""Download and decompress the .txt.gz file."""
+        compressed_path = os.path.join(self.raw_dir, self.name + ".txt.gz")
+        extracted_path = os.path.join(self.raw_path, self.name + ".txt")
+
+        download(self.url, path=compressed_path)
+
+        if not os.path.exists(self.raw_path):
+            os.makedirs(self.raw_path)
+
+        with gzip.open(compressed_path, 'rb') as f_in:
+            with open(extracted_path, 'wb') as f_out:
+                shutil.copyfileobj(f_in, f_out)
+
+    def process(self):
+        graph = eg.Graph()  # Undirected road network
+        edge_list_path = os.path.join(self.raw_path, self.name + ".txt")
+
+        with open(edge_list_path, 'r') as f:
+            for line in f:
+                if line.startswith('#') or line.strip() == "":
+                    continue
+                u, v = map(int, line.strip().split())
+                graph.add_edge(u, v)
+
+        self._g = graph
+        self._num_nodes = graph.number_of_nodes()
+        self._num_edges = graph.number_of_edges()
+
+        if self.verbose:
+            print("Finished loading RoadNet-CA dataset.")
+            print(f"  NumNodes: {self._num_nodes}")
+            print(f"  NumEdges: {self._num_edges}")
+
+    def __getitem__(self, idx):
+        assert idx == 0, "RoadNetCADataset only contains one graph"
+        return self._g if self._transform is None else self._transform(self._g)
+
+    def __len__(self):
+        return 1

From f7d2d31d17f9d55fb831c1adb4427afe78514a03 Mon Sep 17 00:00:00 2001
From: sama <sama@node0.alex.hrsama-pg0.utah.cloudlab.us>
Date: Mon, 7 Jul 2025 20:57:46 -0600
Subject: [PATCH 08/12] added arxiv dataset

---
 easygraph/datasets/__init__.py |   3 +-
 easygraph/datasets/arxiv.py    | 103 +++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 easygraph/datasets/arxiv.py

diff --git a/easygraph/datasets/__init__.py b/easygraph/datasets/__init__.py
index ab5d4dd5..99c1d025 100644
--- a/easygraph/datasets/__init__.py
+++ b/easygraph/datasets/__init__.py
@@ -28,4 +28,5 @@
 from .reddit import RedditDataset
 from .flickr import FlickrDataset
 from .facebook_ego import FacebookEgoNetDataset
-from .roadnet import RoadNetCADataset
\ No newline at end of file
+from .roadnet import RoadNetCADataset
+from .arxiv import ArxivHEPTHDataset
\ No newline at end of file
diff --git a/easygraph/datasets/arxiv.py b/easygraph/datasets/arxiv.py
new file mode 100644
index 00000000..2239e2c9
--- /dev/null
+++ b/easygraph/datasets/arxiv.py
@@ -0,0 +1,103 @@
+"""Arxiv HEP-TH Citation Network
+
+This dataset represents the citation network of preprints from the High Energy Physics - Theory (HEP-TH) category on arXiv, covering the period from January 1993 to April 2003.
+
+Each node corresponds to a paper, and a directed edge from paper A to paper B indicates that A cites B.
+
+No features or labels are included in this dataset.
+
+Statistics:
+- Nodes: 27,770
+- Edges: 352,807
+- Features: None
+- Labels: None
+
+Reference:
+J. Leskovec, J. Kleinberg and C. Faloutsos, "Graphs over Time: Densification Laws, Shrinking Diameters and Possible Explanations," 
+in KDD 2005. Dataset: https://snap.stanford.edu/data/cit-HepTh.html
+"""
+
+import os
+import gzip
+import shutil
+import easygraph as eg
+from easygraph.classes.graph import Graph
+from .graph_dataset_base import EasyGraphBuiltinDataset
+from .utils import download
+
+
+class ArxivHEPTHDataset(EasyGraphBuiltinDataset):
+    r"""Arxiv HEP-TH citation network dataset.
+
+    Parameters
+    ----------
+    raw_dir : str, optional
+        Directory to store the raw downloaded files. Default: None
+    force_reload : bool, optional
+        Whether to re-download and process the dataset. Default: False
+    verbose : bool, optional
+        Whether to print detailed processing logs. Default: True
+    transform : callable, optional
+        Optional transform to apply on the graph.
+
+    Examples
+    --------
+    >>> from easygraph.datasets import ArxivHEPTHDataset
+    >>> dataset = ArxivHEPTHDataset()
+    >>> g = dataset[0]
+    >>> print("Nodes:", g.number_of_nodes())
+    >>> print("Edges:", g.number_of_edges())
+    """
+
+    def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
+        name = "cit-HepTh"
+        url = "https://snap.stanford.edu/data/cit-HepTh.txt.gz"
+        super(ArxivHEPTHDataset, self).__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def download(self):
+        r"""Download and decompress the .txt.gz file."""
+        compressed_path = os.path.join(self.raw_dir, self.name + ".txt.gz")
+        extracted_path = os.path.join(self.raw_path, self.name + ".txt")
+
+        download(self.url, path=compressed_path)
+
+        if not os.path.exists(self.raw_path):
+            os.makedirs(self.raw_path)
+
+        with gzip.open(compressed_path, 'rb') as f_in:
+            with open(extracted_path, 'wb') as f_out:
+                shutil.copyfileobj(f_in, f_out)
+
+    def process(self):
+        graph = eg.DiGraph()  # Citation network is directed
+        edge_list_path = os.path.join(self.raw_path, self.name + ".txt")
+
+        with open(edge_list_path, 'r') as f:
+            for line in f:
+                if line.startswith('#') or line.strip() == "":
+                    continue
+                u, v = map(int, line.strip().split())
+                graph.add_edge(u, v)
+
+        self._g = graph
+        self._num_nodes = graph.number_of_nodes()
+        self._num_edges = graph.number_of_edges()
+
+        if self.verbose:
+            print("Finished loading Arxiv HEP-TH dataset.")
+            print(f"  NumNodes: {self._num_nodes}")
+            print(f"  NumEdges: {self._num_edges}")
+
+    def __getitem__(self, idx):
+        assert idx == 0, "ArxivHEPTHDataset only contains one graph"
+        return self._g if self._transform is None else self._transform(self._g)
+
+    def __len__(self):
+        return 1

From 12a6f1b51b5d9d6b2d6ed869d51991ba957fe7c4 Mon Sep 17 00:00:00 2001
From: sama <sama@node0.alex.hrsama-pg0.utah.cloudlab.us>
Date: Mon, 7 Jul 2025 21:32:24 -0600
Subject: [PATCH 09/12] added twitter ego dataset

---
 easygraph/datasets/__init__.py    |   4 +-
 easygraph/datasets/github.py      | 123 ++++++++++++++++++++++++++++++
 easygraph/datasets/twitter_ego.py |  60 +++++++++++++++
 3 files changed, 186 insertions(+), 1 deletion(-)
 create mode 100644 easygraph/datasets/github.py
 create mode 100644 easygraph/datasets/twitter_ego.py

diff --git a/easygraph/datasets/__init__.py b/easygraph/datasets/__init__.py
index 99c1d025..d5fe7d6f 100644
--- a/easygraph/datasets/__init__.py
+++ b/easygraph/datasets/__init__.py
@@ -29,4 +29,6 @@
 from .flickr import FlickrDataset
 from .facebook_ego import FacebookEgoNetDataset
 from .roadnet import RoadNetCADataset
-from .arxiv import ArxivHEPTHDataset
\ No newline at end of file
+from .arxiv import ArxivHEPTHDataset
+from .github import GitHubUsersDataset
+from .twitter_ego import TwitterEgoDataset
\ No newline at end of file
diff --git a/easygraph/datasets/github.py b/easygraph/datasets/github.py
new file mode 100644
index 00000000..35f40a80
--- /dev/null
+++ b/easygraph/datasets/github.py
@@ -0,0 +1,123 @@
+"""GitHub Users Social Network Dataset (musae_git)
+
+This dataset represents a directed social network of GitHub users collected in 2019.
+Nodes represent GitHub developers, and a directed edge from user A to user B indicates that A follows B.
+
+Each node also includes:
+- Features: User profile and activity-based features.
+- Labels: Developer's project area (e.g., machine learning, web dev, etc.)
+
+Statistics:
+- Nodes: 37,700
+- Edges: 289,003
+- Feature dim: 5,575
+- Classes: 2
+
+Reference:
+J. Leskovec et al. "SNAP Datasets: Stanford Large Network Dataset Collection", 
+https://snap.stanford.edu/data/github-social.html
+"""
+
+import os
+import csv
+import json
+import numpy as np
+import easygraph as eg
+from easygraph.classes.graph import Graph
+from .graph_dataset_base import EasyGraphBuiltinDataset
+from .utils import download, extract_archive
+
+
+class GitHubUsersDataset(EasyGraphBuiltinDataset):
+    r"""GitHub developers social graph (musae_git).
+
+    Parameters
+    ----------
+    raw_dir : str, optional
+        Directory to store raw data. Default: None
+    force_reload : bool, optional
+        Force re-download and processing. Default: False
+    verbose : bool, optional
+        Print processing information. Default: True
+    transform : callable, optional
+        Transform to apply to the graph on load.
+
+    Examples
+    --------
+    >>> from easygraph.datasets import GitHubUsersDataset
+    >>> dataset = GitHubUsersDataset()
+    >>> g = dataset[0]
+    >>> print("Nodes:", g.number_of_nodes())
+    >>> print("Edges:", g.number_of_edges())
+    >>> print("Feature shape:", g.nodes[0]['feat'].shape)
+    >>> print("Label:", g.nodes[0]['label'])
+    """
+
+    def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
+        name = "musae_git"
+        url = "https://snap.stanford.edu/data/git_web_ml.zip"
+        super(GitHubUsersDataset, self).__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def download(self):
+        archive = os.path.join(self.raw_dir, self.name + ".zip")
+        download(self.url, path=archive)
+        extract_archive(archive, self.raw_path)
+
+
+    def process(self):
+        g = eg.DiGraph()
+        base_path = os.path.join(self.raw_path, "git_web_ml")
+
+
+        # Load node features
+        with open(os.path.join(base_path, "musae_git_features.json"), "r") as f:
+            features = json.load(f)
+
+        # Load labels
+        labels = {}
+        with open(os.path.join(base_path, "musae_git_target.csv"), "r") as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                node_id = int(row["id"])
+                labels[node_id] = int(row["ml_target"])
+
+        # Load edges
+        with open(os.path.join(base_path, "musae_git_edges.csv"), "r") as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                u, v = int(row["id_1"]), int(row["id_2"])
+                g.add_edge(u, v)
+
+        # Add node attributes
+        for node_id in g.nodes:
+            feat = np.array(features[str(node_id)], dtype=np.float32)
+            label = labels.get(node_id, -1)
+            g.add_node(node_id, feat=feat, label=label)
+
+        self._g = g
+        self._num_classes = len(set(labels.values()))
+
+        if self.verbose:
+            print("Finished loading GitHub Users dataset.")
+            print(f"  NumNodes: {g.number_of_nodes()}")
+            print(f"  NumEdges: {g.number_of_edges()}")
+            print(f"  Feature dim: {feat.shape[0]}")
+            print(f"  NumClasses: {self._num_classes}")
+
+    def __getitem__(self, idx):
+        assert idx == 0, "GitHubUsersDataset only contains one graph"
+        return self._g if self._transform is None else self._transform(self._g)
+
+    def __len__(self):
+        return 1
+
+    @property
+    def num_classes(self):
+        return self._num_classes
diff --git a/easygraph/datasets/twitter_ego.py b/easygraph/datasets/twitter_ego.py
new file mode 100644
index 00000000..d88b085a
--- /dev/null
+++ b/easygraph/datasets/twitter_ego.py
@@ -0,0 +1,60 @@
+import gzip
+import os
+import easygraph as eg
+from easygraph.datasets.graph_dataset_base import EasyGraphBuiltinDataset
+from easygraph.datasets.utils import download, extract_archive
+
+class TwitterEgoDataset(EasyGraphBuiltinDataset):
+    r"""
+    Twitter Ego Network Dataset
+
+    The Twitter dataset was collected from public sources and contains a large ego-network of Twitter users.
+    The combined network includes 81K edges among 81K users.
+
+    Source: J. McAuley and J. Leskovec, Stanford SNAP, 2012  
+    URL: https://snap.stanford.edu/data/egonets-Twitter.html  
+    File used: https://snap.stanford.edu/data/twitter_combined.txt.gz  
+    """
+
+    def __init__(self):
+        super(TwitterEgoDataset, self).__init__(
+            name="twitter_ego",
+            url="https://snap.stanford.edu/data/twitter_combined.txt.gz",
+            force_reload=False,
+        )
+
+    def download(self):
+        gz_path = os.path.join(self.raw_path, "twitter_combined.txt.gz")
+        download(self.url, path=gz_path)
+        extract_archive(gz_path, self.raw_path)
+
+    def process(self):
+        import gzip
+        import easygraph as eg
+
+        gz_path = os.path.join(self.raw_path, "twitter_combined.txt.gz")
+        txt_path = os.path.join(self.raw_path, "twitter_combined.txt")
+
+        if not os.path.exists(txt_path):
+            with gzip.open(gz_path, "rt") as f_in, open(txt_path, "w") as f_out:
+                f_out.writelines(f_in)
+
+        G = eg.Graph()
+        edge_count = 0
+        with open(txt_path, "r") as f:
+            for line in f:
+                u, v = map(int, line.strip().split())
+                G.add_edge(u, v)
+                edge_count += 1
+
+        self._graphs = [G]
+        self._graph = G
+        self._processed = True
+
+    def __getitem__(self, idx):
+        if self._graph is not None:
+            return self._graph
+        elif self._graphs:
+            return self._graphs[idx]
+        else:
+            return None

From b3504d475001d7dce1d895cf5d7fedc4cd14c091 Mon Sep 17 00:00:00 2001
From: sama <sama@node0.alex.hrsama-pg0.utah.cloudlab.us>
Date: Mon, 7 Jul 2025 21:47:37 -0600
Subject: [PATCH 10/12] added wikipedia topcats dataset

---
 easygraph/datasets/__init__.py     |  3 +-
 easygraph/datasets/wiki_topcats.py | 96 ++++++++++++++++++++++++++++++
 2 files changed, 98 insertions(+), 1 deletion(-)
 create mode 100644 easygraph/datasets/wiki_topcats.py

diff --git a/easygraph/datasets/__init__.py b/easygraph/datasets/__init__.py
index d5fe7d6f..bd12c6d5 100644
--- a/easygraph/datasets/__init__.py
+++ b/easygraph/datasets/__init__.py
@@ -31,4 +31,5 @@
 from .roadnet import RoadNetCADataset
 from .arxiv import ArxivHEPTHDataset
 from .github import GitHubUsersDataset
-from .twitter_ego import TwitterEgoDataset
\ No newline at end of file
+from .twitter_ego import TwitterEgoDataset
+from .wiki_topcats import WikiTopCatsDataset
\ No newline at end of file
diff --git a/easygraph/datasets/wiki_topcats.py b/easygraph/datasets/wiki_topcats.py
new file mode 100644
index 00000000..5a4085b0
--- /dev/null
+++ b/easygraph/datasets/wiki_topcats.py
@@ -0,0 +1,96 @@
+"""Wikipedia Top Categories Dataset (wiki-topcats)
+
+This dataset is a directed graph of Wikipedia articles restricted to
+top-level categories (at least 100 articles), capturing the largest
+strongly connected component.
+
+Statistics:
+- Nodes: 1,791,489
+- Edges: 28,511,807
+- Categories: 17,364
+- Overlapping labels per node
+
+Source:
+H. Yin, A. Benson, J. Leskovec, D. Gleich.
+"Local Higher-order Graph Clustering", KDD 2017
+Data: https://snap.stanford.edu/data/wiki-topcats.html
+"""
+
+import os
+import gzip
+import easygraph as eg
+from easygraph.datasets.graph_dataset_base import EasyGraphBuiltinDataset
+from easygraph.datasets.utils import download, extract_archive
+
+class WikiTopCatsDataset(EasyGraphBuiltinDataset):
+    """Wikipedia Top Categories Snapshot from 2011 (SNAP)"""
+    def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
+        super(WikiTopCatsDataset, self).__init__(
+            name="wiki_topcats",
+            url="https://snap.stanford.edu/data/wiki-topcats.txt.gz",
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def download(self):
+        # Download the main graph file
+        gz_path = os.path.join(self.raw_dir, "wiki-topcats.txt.gz")
+        download(self.url, path=gz_path)
+
+        # Also download category info and page names
+        cat_url = "https://snap.stanford.edu/data/wiki-topcats-categories.txt.gz"
+        names_url = "https://snap.stanford.edu/data/wiki-topcats-page-names.txt.gz"
+        download(cat_url, path=os.path.join(self.raw_dir, "wiki-topcats-categories.txt.gz"))
+        download(names_url, path=os.path.join(self.raw_dir, "wiki-topcats-page-names.txt.gz"))
+
+    def process(self):
+        raw = self.raw_dir
+
+        # Decompress and read edges
+        edge_gz = os.path.join(raw, "wiki-topcats.txt.gz")
+        edge_txt = os.path.join(raw, "wiki-topcats.txt")
+        if not os.path.exists(edge_txt):
+            with gzip.open(edge_gz, "rt") as fin, open(edge_txt, "w") as fout:
+                fout.writelines(fin)
+        G = eg.DiGraph()
+        edge_count = 0
+        with open(edge_txt, "r") as f:
+            for line in f:
+                u, v = map(int, line.strip().split())
+                G.add_edge(u, v)
+                edge_count += 1
+        if self.verbose:
+            print(f"Loaded graph: {G.number_of_nodes()} nodes, {edge_count} edges")
+
+        # Compress node names
+        names_gz = os.path.join(raw, "wiki-topcats-page-names.txt.gz")
+        names = {}
+        with gzip.open(names_gz, "rt") as f:
+            for idx, line in enumerate(f):
+                names[idx] = line.strip()
+
+        # Load categories
+        cats_gz = os.path.join(raw, "wiki-topcats-categories.txt.gz")
+        labels = {}  # mapping: node -> list of category strings
+        with gzip.open(cats_gz, "rt") as f:
+            for idx, line in enumerate(f):
+                categories = line.strip().split(";")
+                categories = [cat.strip() for cat in categories if cat.strip()]
+                labels[idx] = categories
+
+        # Attach node features: empty, and node labels
+        for n in G.nodes:
+            G.add_node(n, name=names.get(n, ""), label=labels.get(n, []))
+
+        self._graph = G
+        self._graphs = [G]
+        self._processed = True
+
+    def __getitem__(self, idx):
+        assert idx == 0
+        return self._graph
+
+    def __len__(self):
+        return 1

From a6e2870de62692c80788dbf396468c75f062f8a9 Mon Sep 17 00:00:00 2001
From: sama <sama@node0.alex.hrsama-pg0.utah.cloudlab.us>
Date: Tue, 8 Jul 2025 01:47:51 -0600
Subject: [PATCH 11/12] fixed google dataset import

---
 easygraph/datasets/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/easygraph/datasets/__init__.py b/easygraph/datasets/__init__.py
index bd12c6d5..9bc87c89 100644
--- a/easygraph/datasets/__init__.py
+++ b/easygraph/datasets/__init__.py
@@ -32,4 +32,5 @@
 from .arxiv import ArxivHEPTHDataset
 from .github import GitHubUsersDataset
 from .twitter_ego import TwitterEgoDataset
+from .web_google import WebGoogleDataset
 from .wiki_topcats import WikiTopCatsDataset
\ No newline at end of file

From 0a04eae6a529cc26cb5fa2a7c6cc6cb4844eaf18 Mon Sep 17 00:00:00 2001
From: sama <sama@node0.alex.hrsama-pg0.utah.cloudlab.us>
Date: Tue, 8 Jul 2025 01:54:16 -0600
Subject: [PATCH 12/12] fixed linter errors

---
 easygraph/amazon_computers.py               | 103 --------------------
 easygraph/datasets/__init__.py              |  25 +++--
 easygraph/datasets/amazon_photo.py          |   9 +-
 easygraph/datasets/arxiv.py                 |  15 +--
 easygraph/datasets/citation_graph.py        |   4 +-
 easygraph/datasets/coauthor.py              |  10 +-
 easygraph/datasets/dynamic/email_enron.py   |   3 +-
 easygraph/datasets/dynamic/email_eu.py      |   3 +-
 easygraph/datasets/dynamic/hospital_lyon.py |   7 +-
 easygraph/datasets/facebook_ego.py          |  23 +++--
 easygraph/datasets/flickr.py                |  50 +++++++---
 easygraph/datasets/github.py                |  14 +--
 easygraph/datasets/graph_dataset_base.py    |   4 +-
 easygraph/datasets/ppi.py                   |   3 +-
 easygraph/datasets/reddit.py                |  50 +++++++---
 easygraph/datasets/roadnet.py               |  15 +--
 easygraph/datasets/twitter_ego.py           |  13 ++-
 easygraph/datasets/web_google.py            |  21 ++--
 easygraph/datasets/wiki_topcats.py          |  17 +++-
 easygraph/model/hypergraphs/hwnn.py         |   2 +-
 easygraph/nn/convs/hypergraphs/hwnn_conv.py |   2 +-
 21 files changed, 188 insertions(+), 205 deletions(-)
 delete mode 100644 easygraph/amazon_computers.py

diff --git a/easygraph/amazon_computers.py b/easygraph/amazon_computers.py
deleted file mode 100644
index 06c27beb..00000000
--- a/easygraph/amazon_computers.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import os
-import numpy as np
-import easygraph as eg
-import scipy.sparse as sp
-
-from easygraph.classes.graph import Graph
-from .graph_dataset_base import EasyGraphBuiltinDataset
-
-
-class AmazonComputersDataset(EasyGraphBuiltinDataset):
-    r"""Amazon Computers co-purchase graph dataset.
-
-    Nodes represent computer products, and edges link products frequently co-purchased.
-    Node features are bag-of-words of product reviews. The task is to classify
-    the product category.
-
-    Statistics:
-    - Nodes: 13,752
-    - Edges: 245,861
-    - Number of Classes: 10
-    - Features: 767
-
-    Parameters
-    ----------
-    raw_dir : str, optional
-        Raw file directory to download/contains the input data directory. Default: None
-    force_reload : bool, optional
-        Whether to reload the dataset. Default: False
-    verbose : bool, optional
-        Whether to print out progress information. Default: True
-    transform : callable, optional
-        A transform that takes in a :class:`~easygraph.Graph` object and returns
-        a transformed version. The :class:`~easygraph.Graph` object will be
-        transformed before every access.
-
-    Examples
-    --------
-    >>> from easygraph.datasets import AmazonComputersDataset
-    >>> dataset = AmazonComputersDataset()
-    >>> g = dataset[0]
-    >>> print(g.number_of_nodes())
-    >>> print(g.number_of_edges())
-    >>> print(g.nodes[0]['feat'].shape)
-    >>> print(g.nodes[0]['label'])
-    >>> print(dataset.num_classes)
-    """
-
-    def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
-        name = "amazon_computers"
-        url = "https://data.dgl.ai/dataset/amazon_co_buy_computers.zip"
-        super(AmazonComputersDataset, self).__init__(
-            name=name,
-            url=url,
-            raw_dir=raw_dir,
-            force_reload=force_reload,
-            verbose=verbose,
-            transform=transform,
-        )
-
-    def process(self):
-        path = os.path.join(self.raw_path, "amazon_co_buy_computers.npz")
-        data = np.load(path)
-
-        adj = sp.csr_matrix(
-            (data["adj_data"], data["adj_indices"], data["adj_indptr"]),
-            shape=data["adj_shape"],
-        )
-
-        features = sp.csr_matrix(
-            (data["attr_data"], data["attr_indices"], data["attr_indptr"]),
-            shape=data["attr_shape"],
-        ).todense()
-
-        labels = data["labels"]
-
-        g = eg.Graph()
-        g.add_edges_from(list(zip(*adj.nonzero())))
-
-        for i in range(features.shape[0]):
-            g.add_node(i, feat=np.array(features[i]).squeeze(), label=int(labels[i]))
-
-        self._g = g
-        self._num_classes = len(np.unique(labels))
-
-        if self.verbose:
-            print("Finished loading AmazonComputers dataset.")
-            print(f"  NumNodes: {g.number_of_nodes()}")
-            print(f"  NumEdges: {g.number_of_edges()}")
-            print(f"  NumFeats: {features.shape[1]}")
-            print(f"  NumClasses: {self._num_classes}")
-
-    def __getitem__(self, idx):
-        assert idx == 0, "AmazonComputersDataset only contains one graph"
-        if self._g is None:
-            raise ValueError("Graph has not been loaded or processed correctly.")
-        return self._g if self._transform is None else self._transform(self._g)
-
-    def __len__(self):
-        return 1
-
-    @property
-    def num_classes(self):
-        return self._num_classes
diff --git a/easygraph/datasets/__init__.py b/easygraph/datasets/__init__.py
index 9bc87c89..035ada50 100644
--- a/easygraph/datasets/__init__.py
+++ b/easygraph/datasets/__init__.py
@@ -8,6 +8,7 @@
     from easygraph.datasets.hypergraph.House_Committees import House_Committees
     from easygraph.datasets.karate import KarateClubDataset
     from easygraph.datasets.mathoverflow_answers import mathoverflow_answers
+
     from .ppi import LegacyPPIDataset
     from .ppi import PPIDataset
 except Exception as e:
@@ -16,21 +17,19 @@
         " hypergraph-related datasets."
     )
 
-from .citation_graph import (
-    CitationGraphDataset,
-    CiteseerGraphDataset,
-    CoraBinary,
-    CoraGraphDataset,
-    PubmedGraphDataset,
-)
-from .coauthor import CoauthorCSDataset
 from .amazon_photo import AmazonPhotoDataset
-from .reddit import RedditDataset
-from .flickr import FlickrDataset
-from .facebook_ego import FacebookEgoNetDataset
-from .roadnet import RoadNetCADataset
 from .arxiv import ArxivHEPTHDataset
+from .citation_graph import CitationGraphDataset
+from .citation_graph import CiteseerGraphDataset
+from .citation_graph import CoraBinary
+from .citation_graph import CoraGraphDataset
+from .citation_graph import PubmedGraphDataset
+from .coauthor import CoauthorCSDataset
+from .facebook_ego import FacebookEgoNetDataset
+from .flickr import FlickrDataset
 from .github import GitHubUsersDataset
+from .reddit import RedditDataset
+from .roadnet import RoadNetCADataset
 from .twitter_ego import TwitterEgoDataset
 from .web_google import WebGoogleDataset
-from .wiki_topcats import WikiTopCatsDataset
\ No newline at end of file
+from .wiki_topcats import WikiTopCatsDataset
diff --git a/easygraph/datasets/amazon_photo.py b/easygraph/datasets/amazon_photo.py
index 29156976..a9295a20 100644
--- a/easygraph/datasets/amazon_photo.py
+++ b/easygraph/datasets/amazon_photo.py
@@ -1,11 +1,16 @@
 import os
-import numpy as np
+
 import easygraph as eg
+import numpy as np
 import scipy.sparse as sp
 
 from easygraph.classes.graph import Graph
+
 from .graph_dataset_base import EasyGraphBuiltinDataset
-from .utils import download, extract_archive, tensor, data_type_dict
+from .utils import data_type_dict
+from .utils import download
+from .utils import extract_archive
+from .utils import tensor
 
 
 class AmazonPhotoDataset(EasyGraphBuiltinDataset):
diff --git a/easygraph/datasets/arxiv.py b/easygraph/datasets/arxiv.py
index 2239e2c9..cfce499b 100644
--- a/easygraph/datasets/arxiv.py
+++ b/easygraph/datasets/arxiv.py
@@ -13,15 +13,18 @@
 - Labels: None
 
 Reference:
-J. Leskovec, J. Kleinberg and C. Faloutsos, "Graphs over Time: Densification Laws, Shrinking Diameters and Possible Explanations," 
+J. Leskovec, J. Kleinberg and C. Faloutsos, "Graphs over Time: Densification Laws, Shrinking Diameters and Possible Explanations,"
 in KDD 2005. Dataset: https://snap.stanford.edu/data/cit-HepTh.html
 """
 
-import os
 import gzip
+import os
 import shutil
+
 import easygraph as eg
+
 from easygraph.classes.graph import Graph
+
 from .graph_dataset_base import EasyGraphBuiltinDataset
 from .utils import download
 
@@ -71,17 +74,17 @@ def download(self):
         if not os.path.exists(self.raw_path):
             os.makedirs(self.raw_path)
 
-        with gzip.open(compressed_path, 'rb') as f_in:
-            with open(extracted_path, 'wb') as f_out:
+        with gzip.open(compressed_path, "rb") as f_in:
+            with open(extracted_path, "wb") as f_out:
                 shutil.copyfileobj(f_in, f_out)
 
     def process(self):
         graph = eg.DiGraph()  # Citation network is directed
         edge_list_path = os.path.join(self.raw_path, self.name + ".txt")
 
-        with open(edge_list_path, 'r') as f:
+        with open(edge_list_path, "r") as f:
             for line in f:
-                if line.startswith('#') or line.strip() == "":
+                if line.startswith("#") or line.strip() == "":
                     continue
                 u, v = map(int, line.strip().split())
                 graph.add_edge(u, v)
diff --git a/easygraph/datasets/citation_graph.py b/easygraph/datasets/citation_graph.py
index 3bb2fead..3795d678 100644
--- a/easygraph/datasets/citation_graph.py
+++ b/easygraph/datasets/citation_graph.py
@@ -1,6 +1,5 @@
-"""Cora, citeseer, pubmed dataset.
+"""Cora, citeseer, pubmed dataset."""
 
-"""
 from __future__ import absolute_import
 
 import os
@@ -53,6 +52,7 @@ class CitationGraphDataset(EasyGraphBuiltinDataset):
     reorder : bool
         Whether to reorder the graph using :func:`~eg.reorder_graph`. Default: False.
     """
+
     _urls = {
         "cora_v2": "dataset/cora_v2.zip",
         "citeseer": "dataset/citeseer.zip",
diff --git a/easygraph/datasets/coauthor.py b/easygraph/datasets/coauthor.py
index 1df6d3a9..fe90f734 100644
--- a/easygraph/datasets/coauthor.py
+++ b/easygraph/datasets/coauthor.py
@@ -15,13 +15,18 @@
 """
 
 import os
-import numpy as np
+
 import easygraph as eg
+import numpy as np
 import scipy.sparse as sp
 
 from easygraph.classes.graph import Graph
+
 from .graph_dataset_base import EasyGraphBuiltinDataset
-from .utils import download, extract_archive, tensor, data_type_dict
+from .utils import data_type_dict
+from .utils import download
+from .utils import extract_archive
+from .utils import tensor
 
 
 class CoauthorCSDataset(EasyGraphBuiltinDataset):
@@ -52,6 +57,7 @@ class CoauthorCSDataset(EasyGraphBuiltinDataset):
     >>> print("Label:", g.nodes[0]['label'])
     >>> print("Number of classes:", dataset.num_classes)
     """
+
     def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
         name = "coauthor_cs"
         url = "https://data.dgl.ai/dataset/coauthor_cs.zip"
diff --git a/easygraph/datasets/dynamic/email_enron.py b/easygraph/datasets/dynamic/email_enron.py
index aad3087e..0fb24f78 100644
--- a/easygraph/datasets/dynamic/email_enron.py
+++ b/easygraph/datasets/dynamic/email_enron.py
@@ -73,8 +73,7 @@ def download(self):
             self.load_data = data
 
     def process(self):
-        """Loads input data from data directory and transfer to target graph for better analysis
-        """
+        """Loads input data from data directory and transfer to target graph for better analysis"""
 
         self._g, edge_feature_list = dict_to_hypergraph(self.load_data, is_dynamic=True)
 
diff --git a/easygraph/datasets/dynamic/email_eu.py b/easygraph/datasets/dynamic/email_eu.py
index 236e6ecd..51c150ed 100644
--- a/easygraph/datasets/dynamic/email_eu.py
+++ b/easygraph/datasets/dynamic/email_eu.py
@@ -70,8 +70,7 @@ def download(self):
             self.load_data = data
 
     def process(self):
-        """Loads input data from data directory and transfer to target graph for better analysis
-        """
+        """Loads input data from data directory and transfer to target graph for better analysis"""
         self._g, edge_feature_list = dict_to_hypergraph(self.load_data, is_dynamic=True)
         self._g.ndata["hyperedge_feature"] = tensor(
             range(1, len(edge_feature_list) + 1)
diff --git a/easygraph/datasets/dynamic/hospital_lyon.py b/easygraph/datasets/dynamic/hospital_lyon.py
index 6784d8f9..e7f93566 100644
--- a/easygraph/datasets/dynamic/hospital_lyon.py
+++ b/easygraph/datasets/dynamic/hospital_lyon.py
@@ -10,7 +10,9 @@
 
 class Hospital_Lyon(EasyGraphDataset):
     _urls = {
-        "hospital_lyon": "easygraph-data-hospital-lyon/-/raw/main/hospital-lyon.json?ref_type=heads&inline=false",
+        "hospital_lyon": (
+            "easygraph-data-hospital-lyon/-/raw/main/hospital-lyon.json?ref_type=heads&inline=false"
+        ),
     }
 
     def __init__(
@@ -119,8 +121,7 @@ def download(self):
             self.load_data = data
 
     def process(self):
-        """Loads input data from data directory and transfer to target graph for better analysis
-        """
+        """Loads input data from data directory and transfer to target graph for better analysis"""
 
         self._g, edge_feature_list = self.preprocess(self.load_data, is_dynamic=True)
         self._g.ndata["hyperedge_feature"] = tensor(
diff --git a/easygraph/datasets/facebook_ego.py b/easygraph/datasets/facebook_ego.py
index 505a594d..33eabf33 100644
--- a/easygraph/datasets/facebook_ego.py
+++ b/easygraph/datasets/facebook_ego.py
@@ -1,14 +1,14 @@
 """Facebook Ego-Net Dataset
 
-This dataset contains a subset of Facebook’s social network collected from 
-survey participants in the SNAP EgoNet project. Nodes represent users, and 
+This dataset contains a subset of Facebook’s social network collected from
+survey participants in the SNAP EgoNet project. Nodes represent users, and
 edges indicate friendship links between them.
 
-Each ego network is centered on a user and includes their friend connections 
-and friend-to-friend connections. The `.circles` files contain labeled groups 
+Each ego network is centered on a user and includes their friend connections
+and friend-to-friend connections. The `.circles` files contain labeled groups
 (i.e., communities) of friends identified by the ego user.
 
-This version processes all ego-nets as a single undirected graph. Node features 
+This version processes all ego-nets as a single undirected graph. Node features
 are not provided. Labels (circles) are optional and not included by default.
 
 Statistics (based on merged graph):
@@ -18,15 +18,19 @@
 - Classes: None
 
 Reference:
-J. McAuley and J. Leskovec, “Learning to Discover Social Circles in Ego Networks,” 
+J. McAuley and J. Leskovec, “Learning to Discover Social Circles in Ego Networks,”
 in NIPS, 2012. [https://snap.stanford.edu/data/egonets-Facebook.html]
 """
 
 import os
+
 import easygraph as eg
+
 from easygraph.classes.graph import Graph
+
 from .graph_dataset_base import EasyGraphBuiltinDataset
-from .utils import download, extract_archive
+from .utils import download
+from .utils import extract_archive
 
 
 class FacebookEgoNetDataset(EasyGraphBuiltinDataset):
@@ -76,7 +80,7 @@ def process(self):
             if filename.endswith(".edges"):
                 edge_file = os.path.join(parent_dir, filename)
 
-                with open(edge_file, 'r') as f:
+                with open(edge_file, "r") as f:
                     for line in f:
                         u, v = map(int, line.strip().split())
                         g.add_edge(u, v)
@@ -96,9 +100,10 @@ def __getitem__(self, idx):
 
     def __len__(self):
         return 1
+
     def download(self):
         r"""Automatically download data and extract it."""
         if self.url is not None:
             archive_path = os.path.join(self.raw_dir, self.name + ".tar.gz")
             download(self.url, path=archive_path)
-            extract_archive(archive_path, self.raw_path)
\ No newline at end of file
+            extract_archive(archive_path, self.raw_path)
diff --git a/easygraph/datasets/flickr.py b/easygraph/datasets/flickr.py
index 8a226f84..022308a8 100644
--- a/easygraph/datasets/flickr.py
+++ b/easygraph/datasets/flickr.py
@@ -1,11 +1,16 @@
-import os
 import json
+import os
+
+import easygraph as eg
 import numpy as np
 import scipy.sparse as sp
-import easygraph as eg
+
 from easygraph.classes.graph import Graph
+
 from .graph_dataset_base import EasyGraphBuiltinDataset
-from .utils import tensor, data_type_dict
+from .utils import data_type_dict
+from .utils import tensor
+
 
 class FlickrDataset(EasyGraphBuiltinDataset):
     r"""Flickr dataset for node classification.
@@ -42,13 +47,26 @@ class FlickrDataset(EasyGraphBuiltinDataset):
     >>> print(g.number_of_nodes(), g.number_of_edges(), ds.num_classes)
     >>> print(g.nodes[0]['feat'].shape, g.nodes[0]['label'])
     """
-    def __init__(self, raw_dir=None, force_reload=False, verbose=False, transform=None, reorder=False):
+
+    def __init__(
+        self,
+        raw_dir=None,
+        force_reload=False,
+        verbose=False,
+        transform=None,
+        reorder=False,
+    ):
         name = "flickr"
         url = self._get_dgl_url("dataset/flickr.zip")
         self._reorder = reorder
-        super(FlickrDataset, self).__init__(name=name, url=url, raw_dir=raw_dir,
-                                             force_reload=force_reload,
-                                             verbose=verbose, transform=transform)
+        super(FlickrDataset, self).__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
 
     def process(self):
         # Load adjacency
@@ -66,15 +84,16 @@ def process(self):
         # Load train/val/test splits
         with open(os.path.join(self.raw_path, "role.json")) as f:
             role = json.load(f)
-        train_mask = np.zeros(feats.shape[0], dtype=bool); train_mask[role["tr"]] = True
-        val_mask = np.zeros(feats.shape[0], dtype=bool); val_mask[role["va"]] = True
-        test_mask = np.zeros(feats.shape[0], dtype=bool); test_mask[role["te"]] = True
+        train_mask = np.zeros(feats.shape[0], dtype=bool)
+        train_mask[role["tr"]] = True
+        val_mask = np.zeros(feats.shape[0], dtype=bool)
+        val_mask[role["va"]] = True
+        test_mask = np.zeros(feats.shape[0], dtype=bool)
+        test_mask[role["te"]] = True
 
         # Attach node data
         for i in range(feats.shape[0]):
-            g.add_node(i,
-                       feat=feats[i].astype(np.float32),
-                       label=int(labels[i]))
+            g.add_node(i, feat=feats[i].astype(np.float32), label=int(labels[i]))
         g.graph["train_mask"] = train_mask
         g.graph["val_mask"] = val_mask
         g.graph["test_mask"] = test_mask
@@ -83,7 +102,9 @@ def process(self):
         self._num_classes = int(labels.max() + 1)
         if self.verbose:
             print("Loaded Flickr dataset")
-            print(f" Nodes: {g.number_of_nodes()}, Edges: {g.number_of_edges()}, Features: {feats.shape[1]}, Classes: {self._num_classes}")
+            print(
+                f" Nodes: {g.number_of_nodes()}, Edges: {g.number_of_edges()}, Features: {feats.shape[1]}, Classes: {self._num_classes}"
+            )
 
     def __getitem__(self, idx):
         assert idx == 0, "FlickrDataset contains only one graph"
@@ -104,4 +125,5 @@ def num_classes(self):
     @staticmethod
     def _get_dgl_url(path):
         from .utils import _get_dgl_url
+
         return _get_dgl_url(path)
diff --git a/easygraph/datasets/github.py b/easygraph/datasets/github.py
index 35f40a80..e0aebda1 100644
--- a/easygraph/datasets/github.py
+++ b/easygraph/datasets/github.py
@@ -14,18 +14,22 @@
 - Classes: 2
 
 Reference:
-J. Leskovec et al. "SNAP Datasets: Stanford Large Network Dataset Collection", 
+J. Leskovec et al. "SNAP Datasets: Stanford Large Network Dataset Collection",
 https://snap.stanford.edu/data/github-social.html
 """
 
-import os
 import csv
 import json
-import numpy as np
+import os
+
 import easygraph as eg
+import numpy as np
+
 from easygraph.classes.graph import Graph
+
 from .graph_dataset_base import EasyGraphBuiltinDataset
-from .utils import download, extract_archive
+from .utils import download
+from .utils import extract_archive
 
 
 class GitHubUsersDataset(EasyGraphBuiltinDataset):
@@ -70,12 +74,10 @@ def download(self):
         download(self.url, path=archive)
         extract_archive(archive, self.raw_path)
 
-
     def process(self):
         g = eg.DiGraph()
         base_path = os.path.join(self.raw_path, "git_web_ml")
 
-
         # Load node features
         with open(os.path.join(base_path, "musae_git_features.json"), "r") as f:
             features = json.load(f)
diff --git a/easygraph/datasets/graph_dataset_base.py b/easygraph/datasets/graph_dataset_base.py
index 1077ddf7..b1d831be 100644
--- a/easygraph/datasets/graph_dataset_base.py
+++ b/easygraph/datasets/graph_dataset_base.py
@@ -1,5 +1,4 @@
-"""Basic EasyGraph Dataset
-"""
+"""Basic EasyGraph Dataset"""
 
 from __future__ import absolute_import
 
@@ -8,6 +7,7 @@
 import os
 import sys
 import traceback
+
 from ..utils import retry_method_with_fix
 from .utils import download
 from .utils import extract_archive
diff --git a/easygraph/datasets/ppi.py b/easygraph/datasets/ppi.py
index 06c350cb..950a434c 100644
--- a/easygraph/datasets/ppi.py
+++ b/easygraph/datasets/ppi.py
@@ -1,4 +1,5 @@
-""" PPIDataset for inductive learning. """
+"""PPIDataset for inductive learning."""
+
 import json
 import os
 
diff --git a/easygraph/datasets/reddit.py b/easygraph/datasets/reddit.py
index d15bafde..a5e39493 100644
--- a/easygraph/datasets/reddit.py
+++ b/easygraph/datasets/reddit.py
@@ -1,11 +1,17 @@
 import os
-import numpy as np
+
 import easygraph as eg
+import numpy as np
 import scipy.sparse as sp
 
 from easygraph.classes.graph import Graph
+
 from .graph_dataset_base import EasyGraphBuiltinDataset
-from .utils import download, extract_archive, tensor, data_type_dict
+from .utils import data_type_dict
+from .utils import download
+from .utils import extract_archive
+from .utils import tensor
+
 
 class RedditDataset(EasyGraphBuiltinDataset):
     r"""Reddit posts graph (Sept 2014) for community (subreddit) classification.
@@ -24,21 +30,33 @@ class RedditDataset(EasyGraphBuiltinDataset):
         Add self-loop edges if True.
     raw_dir, force_reload, verbose, transform : same as EasyGraphBuiltinDataset
     """
-    def __init__(self, self_loop=False, raw_dir=None, force_reload=False,
-                 verbose=True, transform=None):
+
+    def __init__(
+        self,
+        self_loop=False,
+        raw_dir=None,
+        force_reload=False,
+        verbose=True,
+        transform=None,
+    ):
         name = "reddit"
         url = "https://data.dgl.ai/dataset/reddit.zip"
         self.self_loop = self_loop
-        super().__init__(name=name, url=url, raw_dir=raw_dir,
-                         force_reload=force_reload, verbose=verbose,
-                         transform=transform)
+        super().__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
 
     def process(self):
         # Expect two files extracted: reddit_data.npz & reddit_graph.npz
         data = np.load(os.path.join(self.raw_path, "reddit_data.npz"))
-        feat = data["feature"]          # shape [N, 602]
-        labels = data["label"]          # shape [N]
-        split = data["node_types"]      # 1=train,2=val,3=test
+        feat = data["feature"]  # shape [N, 602]
+        labels = data["label"]  # shape [N]
+        split = data["node_types"]  # 1=train,2=val,3=test
 
         # Load adjacency
         adj = sp.load_npz(os.path.join(self.raw_path, "reddit_graph.npz"))
@@ -55,10 +73,14 @@ def process(self):
 
         # Assign node features, labels, and masks
         for i in range(feat.shape[0]):
-            g.add_node(i, feat=feat[i], label=int(labels[i]),
-                       train_mask=(split[i] == 1),
-                       val_mask=(split[i] == 2),
-                       test_mask=(split[i] == 3))
+            g.add_node(
+                i,
+                feat=feat[i],
+                label=int(labels[i]),
+                train_mask=(split[i] == 1),
+                val_mask=(split[i] == 2),
+                test_mask=(split[i] == 3),
+            )
 
         self._g = g
         self._num_classes = int(np.max(labels) + 1)
diff --git a/easygraph/datasets/roadnet.py b/easygraph/datasets/roadnet.py
index 62e5203f..1d7bfa8a 100644
--- a/easygraph/datasets/roadnet.py
+++ b/easygraph/datasets/roadnet.py
@@ -12,15 +12,18 @@
 - Labels: None
 
 Reference:
-J. Leskovec and A. Krevl, “SNAP Datasets: Stanford Large Network Dataset Collection,” 
+J. Leskovec and A. Krevl, “SNAP Datasets: Stanford Large Network Dataset Collection,”
 https://snap.stanford.edu/data/roadNet-CA.html
 """
 
-import os
 import gzip
+import os
 import shutil
+
 import easygraph as eg
+
 from easygraph.classes.graph import Graph
+
 from .graph_dataset_base import EasyGraphBuiltinDataset
 from .utils import download
 
@@ -72,17 +75,17 @@ def download(self):
         if not os.path.exists(self.raw_path):
             os.makedirs(self.raw_path)
 
-        with gzip.open(compressed_path, 'rb') as f_in:
-            with open(extracted_path, 'wb') as f_out:
+        with gzip.open(compressed_path, "rb") as f_in:
+            with open(extracted_path, "wb") as f_out:
                 shutil.copyfileobj(f_in, f_out)
 
     def process(self):
         graph = eg.Graph()  # Undirected road network
         edge_list_path = os.path.join(self.raw_path, self.name + ".txt")
 
-        with open(edge_list_path, 'r') as f:
+        with open(edge_list_path, "r") as f:
             for line in f:
-                if line.startswith('#') or line.strip() == "":
+                if line.startswith("#") or line.strip() == "":
                     continue
                 u, v = map(int, line.strip().split())
                 graph.add_edge(u, v)
diff --git a/easygraph/datasets/twitter_ego.py b/easygraph/datasets/twitter_ego.py
index d88b085a..7b631214 100644
--- a/easygraph/datasets/twitter_ego.py
+++ b/easygraph/datasets/twitter_ego.py
@@ -1,8 +1,12 @@
 import gzip
 import os
+
 import easygraph as eg
+
 from easygraph.datasets.graph_dataset_base import EasyGraphBuiltinDataset
-from easygraph.datasets.utils import download, extract_archive
+from easygraph.datasets.utils import download
+from easygraph.datasets.utils import extract_archive
+
 
 class TwitterEgoDataset(EasyGraphBuiltinDataset):
     r"""
@@ -11,9 +15,9 @@ class TwitterEgoDataset(EasyGraphBuiltinDataset):
     The Twitter dataset was collected from public sources and contains a large ego-network of Twitter users.
     The combined network includes 81K edges among 81K users.
 
-    Source: J. McAuley and J. Leskovec, Stanford SNAP, 2012  
-    URL: https://snap.stanford.edu/data/egonets-Twitter.html  
-    File used: https://snap.stanford.edu/data/twitter_combined.txt.gz  
+    Source: J. McAuley and J. Leskovec, Stanford SNAP, 2012
+    URL: https://snap.stanford.edu/data/egonets-Twitter.html
+    File used: https://snap.stanford.edu/data/twitter_combined.txt.gz
     """
 
     def __init__(self):
@@ -30,6 +34,7 @@ def download(self):
 
     def process(self):
         import gzip
+
         import easygraph as eg
 
         gz_path = os.path.join(self.raw_path, "twitter_combined.txt.gz")
diff --git a/easygraph/datasets/web_google.py b/easygraph/datasets/web_google.py
index 68b5360e..97597299 100644
--- a/easygraph/datasets/web_google.py
+++ b/easygraph/datasets/web_google.py
@@ -17,13 +17,17 @@
 Dataset from SNAP: https://snap.stanford.edu/data/web-Google.html
 """
 
+import gzip
 import os
+import shutil
+
 import easygraph as eg
+
 from easygraph.classes.graph import Graph
+
 from .graph_dataset_base import EasyGraphBuiltinDataset
-from .utils import download, extract_archive
-import gzip
-import shutil
+from .utils import download
+from .utils import extract_archive
 
 
 class WebGoogleDataset(EasyGraphBuiltinDataset):
@@ -72,9 +76,9 @@ def process(self):
         graph = eg.DiGraph()  # Web-Google is directed
         edge_list_path = os.path.join(self.raw_path, self.name + ".txt")
 
-        with open(edge_list_path, 'r') as f:
+        with open(edge_list_path, "r") as f:
             for line in f:
-                if line.startswith('#') or line.strip() == "":
+                if line.startswith("#") or line.strip() == "":
                     continue
                 u, v = map(int, line.strip().split())
                 graph.add_edge(u, v)
@@ -94,6 +98,7 @@ def __getitem__(self, idx):
 
     def __len__(self):
         return 1
+
     def download(self):
         r"""Download and decompress the .txt.gz file."""
         if self.url is not None:
@@ -108,6 +113,6 @@ def download(self):
                 os.makedirs(self.raw_path)
 
             # Decompress manually
-            with gzip.open(compressed_path, 'rb') as f_in:
-                with open(extracted_path, 'wb') as f_out:
-                    shutil.copyfileobj(f_in, f_out)
\ No newline at end of file
+            with gzip.open(compressed_path, "rb") as f_in:
+                with open(extracted_path, "wb") as f_out:
+                    shutil.copyfileobj(f_in, f_out)
diff --git a/easygraph/datasets/wiki_topcats.py b/easygraph/datasets/wiki_topcats.py
index 5a4085b0..9c337d5f 100644
--- a/easygraph/datasets/wiki_topcats.py
+++ b/easygraph/datasets/wiki_topcats.py
@@ -16,14 +16,19 @@
 Data: https://snap.stanford.edu/data/wiki-topcats.html
 """
 
-import os
 import gzip
+import os
+
 import easygraph as eg
+
 from easygraph.datasets.graph_dataset_base import EasyGraphBuiltinDataset
-from easygraph.datasets.utils import download, extract_archive
+from easygraph.datasets.utils import download
+from easygraph.datasets.utils import extract_archive
+
 
 class WikiTopCatsDataset(EasyGraphBuiltinDataset):
     """Wikipedia Top Categories Snapshot from 2011 (SNAP)"""
+
     def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
         super(WikiTopCatsDataset, self).__init__(
             name="wiki_topcats",
@@ -42,8 +47,12 @@ def download(self):
         # Also download category info and page names
         cat_url = "https://snap.stanford.edu/data/wiki-topcats-categories.txt.gz"
         names_url = "https://snap.stanford.edu/data/wiki-topcats-page-names.txt.gz"
-        download(cat_url, path=os.path.join(self.raw_dir, "wiki-topcats-categories.txt.gz"))
-        download(names_url, path=os.path.join(self.raw_dir, "wiki-topcats-page-names.txt.gz"))
+        download(
+            cat_url, path=os.path.join(self.raw_dir, "wiki-topcats-categories.txt.gz")
+        )
+        download(
+            names_url, path=os.path.join(self.raw_dir, "wiki-topcats-page-names.txt.gz")
+        )
 
     def process(self):
         raw = self.raw_dir
diff --git a/easygraph/model/hypergraphs/hwnn.py b/easygraph/model/hypergraphs/hwnn.py
index 37684c39..980bd39e 100644
--- a/easygraph/model/hypergraphs/hwnn.py
+++ b/easygraph/model/hypergraphs/hwnn.py
@@ -39,7 +39,7 @@ def __init__(
 
     def forward(self, X: torch.Tensor, hgs: list) -> torch.Tensor:
         r"""The forward function.
-        
+
         Parameters:
             ``X`` (``torch.Tensor``): Input vertex feature matrix. Size :math:`(N, C_{in})`.
             ``hg`` (``eg.Hypergraph``): The hypergraph structure that contains :math:`N` vertices.
diff --git a/easygraph/nn/convs/hypergraphs/hwnn_conv.py b/easygraph/nn/convs/hypergraphs/hwnn_conv.py
index ea7ea563..7c1fa7e8 100644
--- a/easygraph/nn/convs/hypergraphs/hwnn_conv.py
+++ b/easygraph/nn/convs/hypergraphs/hwnn_conv.py
@@ -44,7 +44,7 @@ def init_parameters(self):
 
     def forward(self, X: torch.Tensor, hg: Hypergraph) -> torch.Tensor:
         r"""The forward function.
-        
+
         Parameters:
             X (``torch.Tensor``): Input vertex feature matrix. Size :math:`(N, C_{in})`.
             hg (``eg.Hypergraph``): The hypergraph structure that contains :math:`N` vertices.