Skip to content

Commit 4468c7c

Browse files
committed
docs: update code generation
The documentation for code generation has gone out of date, and the provided code was throwing errors. Update the documentation to the most recent changes (from commit tinygrad/tinygrad@2893feb).
1 parent 34437f2 commit 4468c7c

1 file changed

Lines changed: 16 additions & 22 deletions

File tree

20241231_intro.md

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -170,23 +170,20 @@ progressively "lowered" into a form that can be used for actual code generation.
170170
## Code generation
171171

172172
The UOp used for code generation is contains much more details and is of lower level. Here I have built an example that you can use
173-
to play around. If things go out of date, commit id is: ae00fa3b2833dbe0595d54d5fb0b679e1731ae01
173+
to play around. If things go out of date, please refer to commit `tinygrad/tinygrad@2893feb9f6f3c7eed825494e51a9a9e84c6b8a2e`.
174174

175175
Suppose we just want to add two numbers:
176176

177177
```python
178178
from tinygrad.renderer.cstyle import MetalRenderer
179-
from tinygrad.ops import UOp, Ops
180-
from tinygrad import dtypes
179+
from tinygrad.uop import Ops
180+
from tinygrad import UOP, dtypes
181181

182182
const = UOp(Ops.CONST, dtypes.float, arg=1.0)
183183
add = UOp(Ops.ADD, dtypes.float, src=(const, const), arg=None)
184184

185185
print(add)
186-
print(MetalRenderer().render("example", [
187-
const,
188-
add
189-
]))
186+
print(MetalRenderer().render([const, add]))
190187
```
191188

192189
The `add` variable shows something like:
@@ -203,7 +200,7 @@ let's see the rendered code:
203200
```c++
204201
#include <metal_stdlib>
205202
using namespace metal;
206-
kernel void example(uint3 gid [[threadgroup_position_in_grid]], uint3 lid [[thread_position_in_threadgroup]]) {
203+
kernel void test(uint3 gid [[threadgroup_position_in_grid]], uint3 lid [[thread_position_in_threadgroup]]) {
207204
float alu0 = (1.0f+1.0f);
208205
}
209206
```
@@ -212,18 +209,15 @@ Let me show you the CUDA version also, where you would replace the import:
212209
213210
```python
214211
from tinygrad.renderer.cstyle import CUDARenderer
215-
from tinygrad.ops import UOp, Ops
216-
from tinygrad import dtypes
212+
from tinygrad.uop import Ops
213+
from tinygrad import UOP, dtypes
217214
218215
const = UOp(Ops.CONST, dtypes.float, arg=1.0)
219216
add = UOp(Ops.ADD, dtypes.float, src=(const, const), arg=None)
220217
221218
print(add)
222219
223-
print(CUDARenderer("sm_50").render("example", [
224-
const,
225-
add
226-
]))
220+
print(CUDARenderer("sm_50").render([const, add]))
227221
```
228222

229223
Note that you have to pass in the "architecture" as argument, it affects the compiler, this value is set automatically
@@ -232,7 +226,7 @@ by querying `cuDeviceComputeCapability`, for our render purpose, pass in just tw
232226
```c++
233227
#define INFINITY (__int_as_float(0x7f800000))
234228
#define NAN (__int_as_float(0x7fffffff))
235-
extern "C" __global__ void __launch_bounds__(1) example() {
229+
extern "C" __global__ void __launch_bounds__(1) test() {
236230
float alu0 = (1.0f+1.0f);
237231
}
238232
```
@@ -242,15 +236,13 @@ two constants is "folded" before the render stage, so you get the value 2, inste
242236
optimization techniques. Let's see another example that renders the thread position:
243237
244238
```python
245-
MetalRenderer().render("example", [
246-
UOp(Ops.SPECIAL, dtypes.int, arg=("gidx0", 16))
247-
])
239+
print(MetalRenderer().render([UOp(Ops.SPECIAL, dtypes.int, arg=("gidx0", 16))]))
248240
```
249241

250242
```c++
251243
#include <metal_stdlib>
252244
using namespace metal;
253-
kernel void example(uint3 gid [[threadgroup_position_in_grid]], uint3 lid [[thread_position_in_threadgroup]]) {
245+
kernel void test(uint3 gid [[threadgroup_position_in_grid]], uint3 lid [[thread_position_in_threadgroup]]) {
254246
int gidx0 = gid.x; /* 16 */
255247
}
256248
```
@@ -260,7 +252,7 @@ On CUDA:
260252
```c++
261253
#define INFINITY (__int_as_float(0x7f800000))
262254
#define NAN (__int_as_float(0x7fffffff))
263-
extern "C" __global__ void __launch_bounds__(1) example() {
255+
extern "C" __global__ void __launch_bounds__(1) test() {
264256
int gidx0 = blockIdx.x; /* 16 */
265257
}
266258
```
@@ -270,14 +262,16 @@ also handle the count, so it renders `.x` `.y` automtically if you pass more tha
270262

271263

272264
```python
273-
print(CUDARenderer("sm_50").render("example", [
265+
print(CUDARenderer("sm_50").render([
274266
UOp(Ops.SPECIAL, dtypes.int, arg=("gidx0", 16)),
275267
UOp(Ops.SPECIAL, dtypes.int, arg=("gidx1", 16))
276268
]))
277269
```
278270

279271
```c++
280-
extern "C" __global__ void __launch_bounds__(1) example() {
272+
#define INFINITY (__int_as_float(0x7f800000))
273+
#define NAN (__int_as_float(0x7fffffff))
274+
extern "C" __global__ void __launch_bounds__(1) test() {
281275
int gidx0 = blockIdx.x; /* 16 */
282276
int gidx1 = blockIdx.y; /* 16 */
283277
}

0 commit comments

Comments
 (0)