File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 77
88; BC:UDE:UHL = ((uint128_t)BC:UDE:UHL * (uint128_t)(SP64)) >> 64
99__llmulhu:
10+ ; modified version of __llmulu that uses exx to obtain the upper 64 bits of the result.
11+ ; __llmulhu runs slightly faster than two calls to __llmulu, and is much faster
12+ ; than the naive implementation of __llmulhu that calls __llmulu four times.
13+ push af
14+ ld a, i
15+ di
16+ push af
17+
1018 push ix
1119 push iy
12- ld ix, -36
13- add ix, sp
14- ld sp, ix
15- lea ix, ix + 36
16-
17- ld (ix - 3), bc
18- ld (ix - 6), de
19- ld (ix - 9), hl
20-
21- ld bc, 0
22- ld (ix - 10), b
23- ld (ix - 13), bc
24- ld (ix - 30), bc
25- ld c, (ix + 12)
26- ld (ix - 33), bc
27- ld iy, (ix + 9)
28- ld (ix - 36), iy
29-
30- ; x_lo * y_lo
31- ld c, b
32- ld d, b
33- inc de
34- dec.s de
35- call __llmulu
36- inc bc
37- dec.s bc
38- ld (ix - 16), bc
39- ld (ix - 19), de
40- ld b, 0
41- ld c, b
42-
43- ; x_hi * y_lo
44- inc.s de
45- ld d, b
46- ld e, (ix - 2)
47- ld hl, (ix - 5)
48- call __llmulu
49- inc bc
50- dec.s bc
51- ld (ix - 21), bc
52- ld (ix - 24), de
53- ld (ix - 27), hl
54-
55- ld c, (ix + 16)
56- ld (ix - 33), c
57- ld iy, (ix + 13)
58- ld (ix - 36), iy
59-
60- ; x_lo * y_hi
61- ld b, 0
62- ld c, b
63- inc.s de
64- ld d, b
65- ld e, (ix - 6)
66- ld hl, (ix - 9)
67- call __llmulu
68- inc bc
69- dec.s bc
70- lea iy, ix - 27
71- call .L.__llmulhu_i72add
72- lea iy, ix - 18
73- call .L.__llmulhu_i72add
74- ld (ix - 16), bc
75- ld (ix - 19), de
76- ld bc, 0
77-
78- ; x_hi * y_hi
79- inc.s de
80- ld d, b
81- ld e, (ix - 2)
82- ld hl, (ix - 5)
83- call __llmulu
84- inc bc
85- dec.s bc
86- lea iy, ix - 18
87- call .L.__llmulhu_i72add
88- ld sp, ix
89- pop iy
90- pop ix
91- ret
9220
93- .L.__llmulhu_i72add:
94- ; similar to __lladd, except iy points to the stack and is destroyed
95- push bc
96- ld bc, (iy + 0)
97- add hl, bc
21+ ld ix, 0
22+ lea iy, ix - 6
23+ add iy, sp ; cf=1
24+
25+ push de
26+ push hl
27+ ld l, c
28+ ld h, b
29+ ld.s sp, hl
30+
31+ lea hl, iy + 21
32+ ld b, 8
33+ .L.push_loop:
34+ push af
35+ ld a, (hl)
36+ inc hl
37+ or a, a ; cf=0
38+ djnz .L.push_loop
39+
40+ sbc hl, hl
41+ ld e, l
42+ ld d, h
43+
44+ exx
45+ sbc hl, hl
46+ ex de, hl
47+ sbc hl, hl
48+ ld c, l
49+ ld b, l
50+ exx
51+
52+ .L.byte_loop:
53+ scf
54+ adc a, a
55+
56+ .L.bit_loop:
57+ ex af, af'
58+
59+ add ix, ix
60+ adc hl, hl
9861 ex de, hl
62+ adc.s hl, hl
63+ ex de, hl
64+
65+ exx
66+ adc hl, hl
67+ ex de, hl
68+ adc hl, hl
69+ ex de, hl
70+ rl c
71+ rl b
72+ exx
73+
74+ ex af, af'
75+
76+ jr nc, .L.add_end
77+ ld bc, (iy)
78+ add ix, bc
9979 ld bc, (iy + 3)
10080 adc hl, bc
10181 ex de, hl
102- pop bc
103- jr nc, .L.no_carry48
82+ adc.s hl, sp
83+ ex de, hl
84+ jr nc, .L.add_end
85+ exx
86+ inc hl
87+ add hl, de
88+ or a, a
89+ sbc hl, de
90+ jr nz, .L.add_end_exx
91+ inc de
92+ sbc hl, de
93+ add hl, de
94+ jr nz, .L.add_end_exx
10495 inc bc
105- .L.no_carry48:
106- ld iy, (iy + 6)
107- add iy, bc
108- lea bc, iy + 0
109- ret
96+ .L.add_end_exx:
97+ exx
98+ .L.add_end:
99+
100+ add a, a
101+ jr nz, .L.bit_loop
102+
103+ pop af
104+ jr nc, .L.byte_loop
105+
106+ ; ld b, d
107+ ; ld c, e
108+ ; ex de, hl
109+ ; lea hl, ix + 0
110+ ; BC:UDE:UHL = lower 64 bits
111+ ; shadow BC:UDE:UHL = upper 64 bits
112+ exx
110113
111- .extern __llmulu
114+ pop af ; reset SP
115+ pop af ; reset SP
116+ pop iy
117+ pop ix
118+
119+ pop af
120+ jp po, .L.skipEI
121+ ei
122+ .L.skipEI:
123+ pop af
124+ ret
You can’t perform that action at this time.
0 commit comments