Skip to content

Commit 11dbb6f

Browse files
committed
memory fix, global_skip fix, upload to pyi
1 parent 2e2c4c9 commit 11dbb6f

7 files changed

Lines changed: 125 additions & 31 deletions

File tree

.github/workflows/pypi_release.yml

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
name: Publish distributions to PyPI
2+
3+
on:
4+
workflow_dispatch:
5+
push:
6+
tags:
7+
- '*'
8+
9+
concurrency:
10+
group: ${{ github.workflow }}-${{ github.ref }}
11+
cancel-in-progress: true
12+
13+
jobs:
14+
15+
build_sdist:
16+
name: Build SDist
17+
runs-on: ubuntu-latest
18+
steps:
19+
- uses: actions/checkout@v4
20+
21+
- name: Build SDist0
22+
run: pipx run build --sdist
23+
24+
- name: Check metadata
25+
run: pipx run twine check dist/*
26+
27+
- uses: actions/upload-artifact@v4
28+
with:
29+
name: dist-source
30+
path: dist/*.tar.gz
31+
32+
33+
build_wheels:
34+
name: Wheels on ${{ matrix.os }}
35+
runs-on: ${{ matrix.os }}
36+
strategy:
37+
fail-fast: false
38+
matrix:
39+
os: [ubuntu-latest, windows-latest, macos-latest ]
40+
41+
steps:
42+
- uses: actions/checkout@v4
43+
44+
- uses: pypa/cibuildwheel@v2.16.5
45+
env:
46+
CIBW_SKIP: "pp*"
47+
CIBW_ARCHS_MACOS: auto universal2
48+
CIBW_PRERELEASE_PYTHONS: true
49+
50+
- name: Verify clean directory
51+
run: git diff --exit-code
52+
shell: bash
53+
54+
- name: Upload wheels
55+
uses: actions/upload-artifact@v4
56+
with:
57+
name: dist-wheel-${{ matrix.os }}
58+
path: wheelhouse/*.whl
59+
60+
upload_all:
61+
name: Upload if release
62+
needs: [build_wheels, build_sdist]
63+
runs-on: ubuntu-latest
64+
#if: github.event_name == 'release'
65+
66+
steps:
67+
- uses: actions/setup-python@v5
68+
name: Set up Python 3.x
69+
with:
70+
python-version: "3.8"
71+
72+
- uses: actions/download-artifact@v4
73+
name: Download wheels
74+
with:
75+
pattern: dist-*
76+
merge-multiple: true
77+
path: dist
78+
79+
- uses: pypa/gh-action-pypi-publish@release/v1
80+
name: Publish to PyPI
81+
with:
82+
user: __token__
83+
password: ${{ secrets.PYPI_API_TOKEN }}
84+

MANIFEST.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Include everything in the code folder
2+
recursive-include code *

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@ If you use ConTree, please cite our paper:
1212

1313
## Python usage
1414

15+
### Install from PyPi
16+
The `pycontree` python package can be installed from PyPi using `pip`:
17+
18+
```sh
19+
pip install pycontree
20+
```
21+
1522
### Install from source using pip
1623
The `pycontree` python package can be installed from source as follows:
1724

code/Engine/src/specialized_solver.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ void SpecializedSolver::get_best_left_right_scores(const Dataview& dataview, int
108108
left_optimal_dt->make_leaf(left_tree.max_label, left_tree.size - left_tree.classification_score);
109109
} else {
110110
left_optimal_dt->update_split(left_tree.best_feature_index, left_tree.best_threshold, std::make_shared<Tree>(left_tree.best_left_label, -1), std::make_shared<Tree>(left_tree.best_right_label, -1));
111-
RUNTIME_ASSERT(left_tree.best_left_label != -1, "Left tree left label should be initialized.");
112-
RUNTIME_ASSERT(left_tree.best_right_label != -1, "Left tree right label should be initialized.");
111+
//RUNTIME_ASSERT(left_tree.best_left_label != -1, "Left tree left label should be initialized.");
112+
//RUNTIME_ASSERT(left_tree.best_right_label != -1, "Left tree right label should be initialized.");
113113
}
114114
left_optimal_dt->misclassification_score = left_tree.size - left_tree.classification_score;
115115
RUNTIME_ASSERT(left_optimal_dt->misclassification_score >= 0, "LR - Left tree misclassification score should be non-negative.");
@@ -118,8 +118,8 @@ void SpecializedSolver::get_best_left_right_scores(const Dataview& dataview, int
118118
right_optimal_dt->make_leaf(right_tree.max_label, right_tree.size - right_tree.classification_score);
119119
} else {
120120
right_optimal_dt->update_split(right_tree.best_feature_index, right_tree.best_threshold, std::make_shared<Tree>(right_tree.best_left_label, -1), std::make_shared<Tree>(right_tree.best_right_label, -1));
121-
RUNTIME_ASSERT(right_tree.best_left_label != -1, "Right tree left label should be initialized.");
122-
RUNTIME_ASSERT(right_tree.best_right_label != -1, "Right tree right label should be initialized.");
121+
//RUNTIME_ASSERT(right_tree.best_left_label != -1, "Right tree left label should be initialized.");
122+
//RUNTIME_ASSERT(right_tree.best_right_label != -1, "Right tree right label should be initialized.");
123123
}
124124
right_optimal_dt->misclassification_score = right_tree.size - right_tree.classification_score;
125125
RUNTIME_ASSERT(right_optimal_dt->misclassification_score >= 0, "LR - Right tree misclassification score should be non-negative.");
@@ -133,7 +133,6 @@ void SpecializedSolver::process_depth_one_feature(const Dataview& dataview,
133133
const std::vector<Dataset::FeatureElement>& current_feature = dataview.get_sorted_dataset_feature(current_feature_index);
134134
const int class_number = dataview.get_class_number();
135135
const int dataset_size = dataview.get_dataset_size();
136-
int global_skip = 0;
137136

138137
left_tree.reset_label_frequency();
139138
right_tree.reset_label_frequency();
@@ -156,10 +155,9 @@ void SpecializedSolver::process_depth_one_feature(const Dataview& dataview,
156155
continue;
157156
}
158157

159-
global_skip--;
160158
tree.can_skip--;
161159

162-
if (current_feature_data.unique_value_index == tree.previous_unique_value_index || tree.can_skip > 0 || global_skip > 0) {
160+
if (current_feature_data.unique_value_index == tree.previous_unique_value_index || tree.can_skip > 0){
163161
tree.current_element_count++;
164162
tree.current_label_frequency[current_feature_data.label]++;
165163
tree.previous_value = current_feature_data.value;
@@ -196,7 +194,6 @@ void SpecializedSolver::process_depth_one_feature(const Dataview& dataview,
196194
tree.best_right_label = right_label;
197195
} else {
198196
tree.can_skip = tree.classification_score - (left_classification_score + right_classification_score);
199-
global_skip = std::max(global_skip, upper_bound - (dataset_size - left_tree.classification_score - right_tree.classification_score));
200197
}
201198

202199
int remaining_size = tree.size - tree.current_element_count;

code/Utilities/include/dynamic_bitset.h

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,19 @@
1010
#include <algorithm>
1111
#include <cstddef>
1212
#include <cstring>
13+
#include <climits>
1314

1415
struct dynamic_bitset {
1516
using base_type = unsigned long;
17+
static constexpr size_t BITS_PER_ELEMENT = sizeof(base_type) * CHAR_BIT;
18+
static constexpr size_t BYTES_PER_ELEMENT = sizeof(base_type);
1619

1720
base_type* bitset;
1821
size_t elements;
1922

2023
dynamic_bitset(size_t size) {
21-
elements = (size - 1) / sizeof(base_type) + 1;
24+
RUNTIME_ASSERT(size > 0, "Cannot create an empty bitset");
25+
elements = (size - 1) / BITS_PER_ELEMENT + 1;
2226
bitset = new base_type[elements];
2327
std::fill(bitset, bitset + elements, 0);
2428
}
@@ -31,7 +35,7 @@ struct dynamic_bitset {
3135

3236
dynamic_bitset(const dynamic_bitset& other) : elements(other.elements) {
3337
bitset = new base_type[elements];
34-
std::memcpy(bitset, other.bitset, elements * sizeof(base_type));
38+
std::memcpy(bitset, other.bitset, elements * BYTES_PER_ELEMENT);
3539
}
3640

3741
dynamic_bitset& operator=(const dynamic_bitset& other) {
@@ -40,7 +44,7 @@ struct dynamic_bitset {
4044

4145
elements = other.elements;
4246
base_type* new_bitset = new base_type[elements];
43-
std::memcpy(new_bitset, other.bitset, elements * sizeof(base_type));
47+
std::memcpy(new_bitset, other.bitset, elements * BYTES_PER_ELEMENT);
4448
delete[] bitset;
4549
bitset = new_bitset;
4650
return *this;
@@ -58,20 +62,23 @@ struct dynamic_bitset {
5862
}
5963

6064
void set_bit(size_t index) {
61-
size_t element = index / sizeof(base_type);
62-
size_t bit_index = index % sizeof(base_type);
65+
size_t element = index / BITS_PER_ELEMENT;
66+
RUNTIME_ASSERT(element <= elements, "set_bit - Writing beyond the bitset size: byte index " << element << ", while max is " << elements);
67+
size_t bit_index = index % BITS_PER_ELEMENT;
6368
bitset[element] |= 1UL << bit_index;
6469
}
6570

6671
void clear_bit(size_t index) {
67-
size_t element = index / sizeof(base_type);
68-
size_t bit_index = index % sizeof(base_type);
72+
size_t element = index / BITS_PER_ELEMENT;
73+
RUNTIME_ASSERT(element <= elements, "clear_bit - Writing beyond the bitset size: byte index " << element << ", while max is " << elements);
74+
size_t bit_index = index % BITS_PER_ELEMENT;
6975
bitset[element] &= 1UL << bit_index;
7076
}
7177

7278
void toggle_bit(size_t index) {
73-
size_t element = index / sizeof(base_type);
74-
size_t bit_index = index % sizeof(base_type);
79+
size_t element = index / BITS_PER_ELEMENT;
80+
RUNTIME_ASSERT(element <= elements, "toggle_bit - Writing beyond the bitset size: byte index " << element << ", while max is " << elements);
81+
size_t bit_index = index % BITS_PER_ELEMENT;
7582
bitset[element] ^= 1UL << bit_index;
7683
}
7784

pyproject.toml

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
[build-system]
22
requires = [
3-
"setuptools>=42",
3+
"setuptools>=42, <72.2.0",
44
"pybind11>=2.12.0"
55
]
66
build-backend = "setuptools.build_meta"
7-
target-version = "py38"
87

98
[project]
109
name = "pycontree"
11-
version = "1.0"
10+
version = "1.0.4"
1211
description = "Python Wrapper ConTree: Optimal Decision Trees for Continuous Feature Data"
1312
license= {file = "LICENSE"}
13+
readme = "README.md"
1414
authors = [
1515
{name = "Cătălin E. Briţa", email="C.E.Brita@student.tudelft.nl"},
1616
{name = "Jacobus G. M. van der Linden", email="J.G.M.vanderLinden@tudelft.nl"},
@@ -32,13 +32,8 @@ classifiers = [
3232
"License :: OSI Approved :: MIT License",
3333
"Operating System :: OS Independent", ]
3434

35-
[project.optional-dependencies]
36-
dev = []
35+
[tool.setuptools.packages.find]
36+
exclude = ["datasets*", "train-datasets*", "examples*"]
3737

38-
[tool.pytest.ini_options]
39-
pythonpath = [
40-
"."
41-
]
42-
43-
[tool.setuptools.packages]
44-
find = {} # Scanning implicit namespaces is active by default
38+
[project.urls]
39+
Repository = "https://github.com/consol-lab/contree"

setup.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# Define package metadata
77
package_name = 'pycontree'
88
extension_name = 'ccontree'
9-
__version__ = "1.0"
9+
__version__ = "1.0.4"
1010

1111
ext_modules = [
1212
Pybind11Extension(package_name + '.' + extension_name,
@@ -23,5 +23,7 @@
2323
version=__version__,
2424
ext_modules=ext_modules,
2525
dev_requires=[],
26-
install_requires=['pandas', 'numpy']
26+
install_requires=['pandas', 'numpy'],
27+
long_description=open('README.md').read(),
28+
long_description_content_type='text/markdown'
2729
)

0 commit comments

Comments
 (0)