patchdiff/benchmarks/benchmark.py at 024d2f87d9465d0518b7327c5f3530c98e3cc9eb · fork-tongue/patchdiff · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
"""
Benchmark suite for patchdiff performance testing using pytest-benchmark.

Run benchmarks:
    uv run pytest benchmarks/benchmark.py --benchmark-only

Save baseline:
    uv run pytest benchmarks/benchmark.py --benchmark-only --benchmark-autosave

Compare against baseline:
    uv run pytest benchmarks/benchmark.py --benchmark-only --benchmark-compare=0001

Fail if performance degrades >5%:
    uv run pytest benchmarks/benchmark.py --benchmark-only --benchmark-compare=0001 --benchmark-compare-fail=mean:5%
"""

import random

import pytest

from patchdiff import apply, diff
from patchdiff.pointer import Pointer

# Set seed for reproducibility
random.seed(42)


def generate_random_list(size: int, value_range: int = 1000) -> list[int]:
    """Generate a random list of integers."""
    return [random.randint(0, value_range) for _ in range(size)]


def generate_similar_lists(
    size: int, change_ratio: float = 0.1
) -> tuple[list[int], list[int]]:
    """
    Generate two similar lists with specified change ratio.

    Args:
        size: Size of the lists
        change_ratio: Ratio of elements that differ (0.0 to 1.0)
    """
    list_a = generate_random_list(size)
    list_b = list_a.copy()

    num_changes = int(size * change_ratio)

    # Make some replacements
    for _ in range(num_changes // 3):
        idx = random.randint(0, size - 1)
        list_b[idx] = random.randint(0, 1000)

    # Make some insertions
    for _ in range(num_changes // 3):
        idx = random.randint(0, len(list_b))
        list_b.insert(idx, random.randint(0, 1000))

    # Make some deletions
    for _ in range(num_changes // 3):
        if list_b:
            idx = random.randint(0, len(list_b) - 1)
            del list_b[idx]

    return list_a, list_b


def generate_nested_dict(depth: int, breadth: int) -> dict | int:
    """Generate a nested dictionary structure."""
    if depth == 0:
        return random.randint(0, 1000)

    result = {}
    for i in range(breadth):
        key = f"key_{i}"
        if random.random() > 0.3:
            result[key] = generate_nested_dict(depth - 1, breadth)
        else:
            result[key] = random.randint(0, 1000)
    return result


# ========================================
# List Diff Benchmarks
# ========================================


@pytest.mark.benchmark(group="list-diff")
def test_list_diff_small_10pct(benchmark):
    """Benchmark: 50 element list with 10% changes."""
    a, b = generate_similar_lists(50, 0.1)
    benchmark(diff, a, b)


@pytest.mark.benchmark(group="list-diff")
@pytest.mark.parametrize("change_ratio", [0.05, 0.1, 0.5])
def test_list_diff_medium(benchmark, change_ratio):
    """Benchmark: 1000 element list with varying change ratios."""
    a, b = generate_similar_lists(1000, change_ratio)
    benchmark(diff, a, b)


@pytest.mark.benchmark(group="list-diff-edge")
def test_list_diff_completely_different(benchmark):
    """Benchmark: Two completely different 1000 element lists."""
    a = generate_random_list(1000)
    b = generate_random_list(1000)
    benchmark(diff, a, b)


@pytest.mark.benchmark(group="list-diff-edge")
def test_list_diff_identical(benchmark):
    """Benchmark: Two identical 10000 element lists."""
    a = generate_random_list(10000)
    b = a.copy()
    benchmark(diff, a, b)


# ========================================
# Dict Diff Benchmarks
# ========================================


@pytest.mark.benchmark(group="dict-diff")
def test_dict_diff_flat_500_keys(benchmark):
    """Benchmark: Flat dict with 500 keys, 10% changed."""
    a = {f"key_{i}": i for i in range(500)}
    b = a.copy()
    # Change 10%
    for i in range(50):
        b[f"key_{i}"] = i + 500

    benchmark(diff, a, b)


@pytest.mark.benchmark(group="dict-diff")
def test_dict_diff_nested(benchmark):
    """Benchmark: Nested dict with depth=3, breadth=5."""
    a = generate_nested_dict(3, 5)
    b = generate_nested_dict(3, 5)
    benchmark(diff, a, b)


# ========================================
# Set Diff Benchmarks
# ========================================


@pytest.mark.benchmark(group="set-diff")
def test_set_diff_1000_elements(benchmark):
    """Benchmark: Sets with 1000 elements, 10% difference."""
    a = set(generate_random_list(1000, 2000))
    b = a.copy()
    # Remove 5%
    a_list = list(a)
    for i in range(50):
        a.remove(a_list[i])
    # Add 5%
    for i in range(50):
        b.add(2000 + i)

    benchmark(diff, a, b)


# ========================================
# Mixed Structure Benchmarks
# ========================================


@pytest.mark.benchmark(group="mixed")
def test_mixed_dict_with_list_values(benchmark):
    """Benchmark: Dict with 50 keys, each containing a 100-element list."""
    a = {f"key_{i}": generate_random_list(100) for i in range(50)}
    b = {f"key_{i}": generate_random_list(100) for i in range(50)}
    benchmark(diff, a, b)


# ========================================
# Apply Benchmarks
# ========================================


@pytest.mark.benchmark(group="apply")
def test_apply_list_1000_elements(benchmark):
    """Benchmark: Apply patch to 1000 element list with 10% changes."""
    a, b = generate_similar_lists(1000, 0.1)
    ops, _ = diff(a, b)

    benchmark(apply, a, ops)


# ========================================
# Pointer Evaluate Benchmarks
# ========================================


@pytest.mark.benchmark(group="pointer-evaluate")
def test_pointer_evaluate_deep_dict(benchmark):
    """Benchmark: Evaluate pointer on deeply nested structure."""
    depth = 100
    obj = 42
    for i in range(depth - 1, -1, -1):
        obj = {f"key_{i}": obj}
    ptr = Pointer([f"key_{i}" for i in range(depth)])

    benchmark(ptr.evaluate, obj)


@pytest.mark.benchmark(group="pointer-evaluate")
def test_pointer_evaluate_deep_list(benchmark):
    """Benchmark: Evaluate pointer on deep lists."""
    # Build nested lists 100 levels deep; innermost value is 42.
    depth = 100
    nested = 42
    for _ in range(depth):
        nested = [nested]
    obj = nested
    ptr = Pointer([0] * depth)

    benchmark(ptr.evaluate, obj)


# ========================================
# Pointer Append Benchmarks
# ========================================


@pytest.mark.benchmark(group="pointer-append")
def test_pointer_append(benchmark):
    """Benchmark: Append token to pointer."""
    ptr = Pointer.from_str("/a/b/c/d/e/f/g/h/i/j")

    benchmark(ptr.append, "k")