some initial changes for numa_pht. Definitely not working

etwest · etwest · commit 6ac8d2575c17 · 2025-06-19T23:45:58.000-04:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -69,7 +69,6 @@ FetchContent_Declare(
 FetchContent_MakeAvailable(googletest GraphZeppelinCommon StreamingUtilities)
 
 add_library(GutterTree
-  src/work_queue.cpp
   include/work_queue.h
   include/guttering_system.h
   src/guttering_configuration.cpp
@@ -84,6 +83,8 @@ add_library(GutterTree
   include/standalone_gutters.h
   src/cache_guttering.cpp
   include/cache_guttering.h
+  src/numa_pht.cpp
+  include/numa_pht.h
   include/types.h)
 add_dependencies(GutterTree GraphZeppelinCommon StreamingUtilities)
 target_link_libraries(GutterTree PUBLIC gtest GraphZeppelinCommon StreamingUtilities)
diff --git a/experiment/cache_exp.cpp b/experiment/cache_exp.cpp
@@ -16,7 +16,7 @@ static std::atomic<size_t> num_updates_processed;
 // queries the guttering system
 // Should be run in a seperate thread
 static void querier(GutteringSystem *gts) {
-  WorkQueue::DataNode *data;
+  WorkQueue<update_batch>::DataNode *data;
   while(true) {
     bool valid = gts->get_data(data);
     if (valid) {
diff --git a/experiment/standalone_exp.cpp b/experiment/standalone_exp.cpp
@@ -11,7 +11,7 @@ static constexpr uint32_t prime = 100000007;
 // queries the guttering system
 // Should be run in a seperate thread
 void querier(GutteringSystem *gts) {
-  WorkQueue::DataNode *data;
+  WorkQueue<update_batch>::DataNode *data;
   while(true) {
     bool valid = gts->get_data(data);
     if(!valid && shutdown)
diff --git a/include/guttering_system.h b/include/guttering_system.h
@@ -7,6 +7,11 @@
 #include "work_queue.h"
 #include "stream_types.h"
 
+struct update_batch {
+  node_id_t node_idx;
+  std::vector<node_id_t> upd_vec;
+};
+
 class GutteringSystem {
  public:
   // Constructor for programmatic configuration
@@ -20,9 +25,18 @@ class GutteringSystem {
         wq_batch_per_elm(conf._wq_batch_per_elm),
         num_nodes(num_nodes),
         leaf_gutter_size(conf._gutter_bytes / sizeof(node_id_t)),
-        wq(workers * queue_factor,
-           page_slots ? leaf_gutter_size + page_size / sizeof(node_id_t) : leaf_gutter_size,
-           wq_batch_per_elm) {
+        wq(workers * queue_factor, wq_batch_per_elm) {
+    size_t batch_len =
+        page_slots ? leaf_gutter_size + page_size / sizeof(node_id_t) : leaf_gutter_size;
+    std::vector<std::vector<update_batch>> wq_data;
+    for (size_t i = 0; i < workers * queue_factor; i++) {
+      wq_data.push_back(std::vector<update_batch>(wq_batch_per_elm));
+      for (size_t j = 0; j < wq_batch_per_elm; j++) {
+        wq_data[i][j].upd_vec.reserve(batch_len);
+      }
+    }
+    wq.populate_queue(wq_data);
+
     std::cout << conf << std::endl;
   }
   virtual ~GutteringSystem(){};
@@ -61,8 +75,8 @@ class GutteringSystem {
   size_t gutter_size() { return leaf_gutter_size * sizeof(node_id_t); }
 
   // get data out of the guttering system either one gutter at a time or in a batched fashion
-  bool get_data(WorkQueue::DataNode *&data) { return wq.peek(data); }
-  void get_data_callback(WorkQueue::DataNode *data) { wq.peek_callback(data); }
+  bool get_data(WorkQueue<update_batch>::DataNode *&data) { return wq.pop(data); }
+  void get_data_callback(WorkQueue<update_batch>::DataNode *data) { wq.pop_callback(data); }
   void set_non_block(bool block) { wq.set_non_block(block); }  // set non-blocking calls in wq
  protected:
   // parameters of the GutteringSystem, defined by the GutteringConfiguration param or config file
@@ -75,5 +89,5 @@ class GutteringSystem {
 
   const node_id_t num_nodes;
   const node_id_t leaf_gutter_size;
-  WorkQueue wq;
+  WorkQueue<update_batch> wq;
 };
diff --git a/include/work_queue.h b/include/work_queue.h
@@ -4,83 +4,190 @@
 #include <utility>
 #include <atomic>
 #include <vector>
+#include <exception>
 #include "types.h"
 
-struct update_batch {
-  node_id_t node_idx;
-  std::vector<node_id_t> upd_vec;
-};
-
+template<class T> // templatized by data type we're storing
 class WorkQueue {
  public:
   class DataNode {
    private:
     // LL next pointer
     DataNode *next = nullptr;
-    std::vector<update_batch> batches;
+    std::vector<T> data_batch;
 
-    DataNode(const size_t batch_per_elm, const size_t vec_size) {
-      batches.resize(batch_per_elm);
-      for (size_t i = 0; i < batch_per_elm; i++) {
-        batches[i].upd_vec.reserve(vec_size);
-      }
-    }
     friend class WorkQueue;
    public:
-    const std::vector<update_batch>& get_batches() { return batches; }
+    const std::vector<T>& get_batches() { return data_batch; }
   };
 
-  /*
+  /**
    * Construct a work queue
-   * The number of elements in the queue is num_batches / batch_per_elm
-   * As a consequence num_batches is rounded up to the nearest multiple of batch_per_elm
-   * @param num_batches     the rough number of batches to have in the queue
-   * @param max_batch_size  the maximum size of a batch
-   * @param batch_per_elm   number of batches per queue element.
+   * @param num_queue_elements   the rough number of batches to have in the queue
+   * @param data_per_elm         number of batches per queue element.
+   */
+  WorkQueue(size_t num_queue_elements, size_t data_per_elm)
+      : len(num_queue_elements), batch_per_elm(data_per_elm) {
+    non_block = false;
+
+    // place all nodes of linked list in the producer queue and reserve
+    // memory for the vectors
+    for (size_t i = 0; i < len; i++) {
+      // create and reserve space for updates
+      DataNode *node = new DataNode();
+      node->next = producer_list;  // next of node is head
+      producer_list = node;        // set head to new node
+    }
+  }
+  ~WorkQueue() {
+    // free data from the queues
+    // grab locks to ensure that list variables aren't old due to cpu caching
+    producer_list_lock.lock();
+    consumer_list_lock.lock();
+    while (producer_list != nullptr) {
+      DataNode *temp = producer_list;
+      producer_list = producer_list->next;
+      delete temp;
+    }
+    while (consumer_list != nullptr) {
+      DataNode *temp = consumer_list;
+      consumer_list = consumer_list->next;
+      delete temp;
+    }
+    producer_list_lock.unlock();
+    consumer_list_lock.unlock();
+  }
+
+  /**
+   * Initialize the queue pointers to point at actual data instead of nullptrs
+   * If this function is called, IT MUST be called before performing any operations with the queue
+   * The queue can also work without initializing pointers, so long as the pointers returned from
+   * push being null is acceptable. (i.e. user initializes after push or does not need the returned
+   * pointer)
+   * @param data_batches   a vector of data batches that will start in the queue but is swapped with
+   *                       data that is pushed into the queue.
    */
-  WorkQueue(size_t num_batches, size_t max_batch_size, size_t batch_per_elm);
-  ~WorkQueue();
+  void populate_queue(std::vector<std::vector<T>> data_batches) {
+    if (data_batches.size() != len) {
+      throw std::invalid_argument("WQ: Error number of initialized data batches incorrect");
+    }
+    DataNode *data = producer_list; // head of producer list
+    for (size_t i = 0; i < len; i++) {
+      if (data_batches[i].size() != batch_per_elm) {
+        throw std::invalid_argument("WQ: Error number of data elements per batch incorrect");
+      }
+      data->data_batch = data_batches[i];
+      data = data->next;
+    }
+  }
 
-  /* 
+  /**
    * Add a data element to the queue
    * @param upd_vec_batch  vector of graph node id the associated updates
    */
-  void push(std::vector<update_batch> &upd_vec_batch);
+  void push(std::vector<T> &upd_vec_batch) {
+    if (upd_vec_batch.size() > batch_per_elm) {
+      throw std::runtime_error("WQ: Too many batches in call to push " + 
+        std::to_string(upd_vec_batch.size()) + " > " + std::to_string(batch_per_elm));
+    }
+    std::unique_lock<std::mutex> lk(producer_list_lock);
+    producer_condition.wait(lk, [this]{return !full();});
+
+    // printf("WQ: Push:\n");
+    // print();
+
+    // remove head from produce_list
+    DataNode *node = producer_list;
+    producer_list = producer_list->next;
+    lk.unlock();
+
+    // swap the batch vectors to perform the update
+    std::swap(node->data_batch, upd_vec_batch);
+
+    // add this block to the consumer queue for processing
+    consumer_list_lock.lock();
+    node->next = consumer_list;
+    consumer_list = node;
+    consumer_list_lock.unlock();
+    consumer_condition.notify_one();
+  }
 
-  /* 
+  /**
    * Get data from the queue for processing
    * @param data   where to place the Data
    * @return  true if we were able to get good data, false otherwise
    */
-  bool peek(DataNode *&data);
+  bool pop(DataNode *&data) {
+    // wait while queue is empty
+    // printf("waiting to peek\n");
+    std::unique_lock<std::mutex> lk(consumer_list_lock);
+    consumer_condition.wait(lk, [this]{return !empty() || non_block;});
+
+    // printf("WQ: Peek\n");
+    // print();
+
+    // if non_block and queue is empty then there is no data to get
+    // so inform the caller of this
+    if (empty()) {
+      lk.unlock();
+      return false;
+    }
 
-  /*
-   * Wait until the work queue has enough items in it to satisfy the request and then
-   * @param node_vec     where to place the batch of Data
-   * @param batch_size   the amount of Data requested
-   * return true if able to get good data, false otherwise
-   */
-  bool peek_batch(std::vector<DataNode *> &node_vec, size_t batch_size);
-  
-  /* 
+    // remove head from consumer_list and release lock
+    DataNode *node = consumer_list;
+    consumer_list = consumer_list->next;
+    lk.unlock();
+
+    data = node;
+    return true;
+  }
+
+  /**
    * After processing data taken from the work queue call this function
    * to mark the node as ready to be overwritten
    * @param data   the LL node that we have finished processing
    */
-  void peek_callback(DataNode *data);
-
-  /*
-   * A batched version of peek_callback that avoids locking on every DataNode
-   */
-  void peek_batch_callback(const std::vector<DataNode *> &node_vec);
+  void pop_callback(DataNode *node) {
+    producer_list_lock.lock();
+    // printf("WQ: Callback\n");
+    // print();
+    node->next = producer_list;
+    producer_list = node;
+    producer_list_lock.unlock();
+    producer_condition.notify_one();
+    // printf("WQ: Callback done\n");
+  }
 
-  void set_non_block(bool _block);
+  void set_non_block(bool _block) {
+    consumer_list_lock.lock();
+    non_block = _block;
+    consumer_list_lock.unlock();
+    consumer_condition.notify_all();
+  }
 
-  /*
+  /**
    * Function which prints the work queue
    * Used for debugging
    */
-  void print();
+  void print() {
+    std::string to_print = "";
+
+    int p_size = 0;
+    DataNode *temp = producer_list;
+    while (temp != nullptr) {
+      to_print += std::to_string(p_size) + ": " + std::to_string((uint64_t)temp) + "\n";
+      temp = temp->next;
+      ++p_size;
+    }
+    int c_size = 0;
+    temp = consumer_list;
+    while (temp != nullptr) {
+      to_print += std::to_string(c_size) + ": " + std::to_string((uint64_t)temp) + "\n";
+      temp = temp->next;
+      ++c_size;
+    }
+    printf("WQ: producer_queue size = %i consumer_queue size = %i\n%s", p_size, c_size, to_print.c_str());
+  }
 
   // functions for checking if the queue is empty or full
   inline bool full()    {return producer_list == nullptr;} // if producer queue empty, wq full
@@ -91,7 +198,6 @@ class WorkQueue {
   DataNode *consumer_list = nullptr; // list of nodes with data for reading
 
   const size_t len;            // number of elments in queue
-  const size_t max_batch_size; // maximum batch size
   const size_t batch_per_elm;  // number of batches per work queue element
 
   // locks and condition variables for producer list
@@ -106,16 +212,3 @@ class WorkQueue {
   // or return false on failure (true)
   bool non_block;
 };
-
-class WriteTooBig : public std::exception {
-private:
-  const std::string message;
-
-public:
-  WriteTooBig(std::string message) : 
-    message(message) {}
-
-  virtual const char *what() const throw() {
-    return message.c_str();
-  }
-};
diff --git a/src/work_queue.cpp b/src/work_queue.cpp
diff --git a/test/guttering_systems_test.cpp b/test/guttering_systems_test.cpp