forked from SJTU-YONGFU-RESEARCH-GRP/core
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfigurable_kogge_stone_adder.v
More file actions
executable file
·208 lines (191 loc) · 10.1 KB
/
configurable_kogge_stone_adder.v
File metadata and controls
executable file
·208 lines (191 loc) · 10.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
/* verilator lint_off DECLFILENAME */
/* verilator lint_off UNOPTFLAT */
/* verilator lint_off EOFNEWLINE */
/* verilator lint_off UNUSEDGENVAR */
/**
* Configurable Kogge-Stone Adder
*
* This module implements a parameterized Kogge-Stone prefix tree adder, which is
* one of the fastest parallel prefix adders. It uses a complete binary tree structure
* to compute all carries in parallel with minimal delay.
*
* Algorithm Overview:
* The Kogge-Stone adder uses a parallel prefix computation to calculate all carry
* signals simultaneously. It builds a complete binary tree where each stage computes
* prefix operations over larger and larger ranges, eventually computing all carries
* in O(log n) time.
*
* Key Features:
* - Configurable data width via DATA_WIDTH parameter
* - Logarithmic delay complexity: O(log n) where n is DATA_WIDTH
* - Fastest parallel prefix adder (optimal delay)
* - Higher area cost than Brent-Kung but faster
* - Uses generate-propagate (GP) logic with prefix tree computation
*
* Design Trade-offs:
* - Maximum speed: Best delay performance of all prefix adders
* - Area cost: More area than Brent-Kung due to complete tree structure
* - Fan-out: Higher fan-out in later stages (may require buffering)
*
* How It Works:
* 1. Compute initial generate (g) and propagate (p) signals for each bit
* 2. Build prefix tree in log2(n) stages:
* - Stage 0: Initial GP signals
* - Stage i: Compute GP over ranges of size 2^i
* - Each stage doubles the range of prefix computation
* 3. After final stage, all carries can be computed in parallel
* 4. Compute final sum using XOR of propagate and carry signals
*
* Prefix Tree Structure:
* - Each stage computes: (g, p) over range [j-2^i, j]
* - Uses operator: (g1, p1) o (g2, p2) = (g1 | (p1 & g2), p1 & p2)
* - Final stage has GP signals covering entire range [0, j] for all j
*
* @param DATA_WIDTH Width of input operands and output sum (default: 32 bits)
*
* @input a[DATA_WIDTH-1:0] First operand to be added
* @input b[DATA_WIDTH-1:0] Second operand to be added
* @input cin Carry-in bit (allows chaining multiple adders)
* @output sum[DATA_WIDTH-1:0] Sum of a + b + cin
* @output cout Carry-out bit (indicates overflow when DATA_WIDTH bits are insufficient)
*/
module configurable_kogge_stone_adder #(
parameter DATA_WIDTH = 32 // Width of the operands
) (
input wire [DATA_WIDTH-1:0] a, // First operand
input wire [DATA_WIDTH-1:0] b, // Second operand
input wire cin, // Carry-in
output wire [DATA_WIDTH-1:0] sum, // Sum output
output wire cout // Carry-out
);
// ============================================================================
// Parameter Calculations
// ============================================================================
// The Kogge-Stone prefix tree requires log2(DATA_WIDTH) stages to compute
// all prefix operations. Each stage doubles the range of the prefix computation.
localparam STAGES = $clog2(DATA_WIDTH);
// ============================================================================
// Initial Generate and Propagate Signal Generation
// ============================================================================
// These are the initial GP signals computed from the input operands.
// They represent the GP properties at each individual bit position.
wire [DATA_WIDTH-1:0] g_init; // Initial generate signals
wire [DATA_WIDTH-1:0] p_init; // Initial propagate signals
// Compute initial GP signals for each bit position
genvar i;
generate
for (i = 0; i < DATA_WIDTH; i = i + 1) begin : init_pg
// Generate: A carry is generated at this bit if both inputs are 1
// This means a carry will be produced regardless of any incoming carry
assign g_init[i] = a[i] & b[i];
// Propagate: A carry will propagate through this bit if the inputs differ
// XOR is used because: if a[i] != b[i], any incoming carry will pass through
// Note: For Kogge-Stone, we use XOR (not OR) to ensure correct prefix computation
assign p_init[i] = a[i] ^ b[i];
end
endgenerate
// ============================================================================
// Prefix Tree Signal Storage
// ============================================================================
// We need to store GP signals for each stage of the prefix tree.
// p[stage][bit] and g[stage][bit] represent the GP signals at a given stage.
// After the final stage, g[STAGES][i] represents the generate signal covering
// the range [0, i], which allows us to compute carry[i+1] directly.
wire [DATA_WIDTH-1:0] p [STAGES:0]; // Propagate signals for each stage
wire [DATA_WIDTH-1:0] g [STAGES:0]; // Generate signals for each stage
// ============================================================================
// Stage 0 Initialization
// ============================================================================
// Initialize the first stage (stage 0) with the initial GP signals.
// This represents GP properties at individual bit positions.
generate
for (i = 0; i < DATA_WIDTH; i = i + 1) begin : init_stage
assign p[0][i] = p_init[i]; // Initial propagate at bit i
assign g[0][i] = g_init[i]; // Initial generate at bit i
end
endgenerate
// ============================================================================
// Kogge-Stone Prefix Tree Construction
// ============================================================================
// This is the heart of the Kogge-Stone algorithm. We build a complete binary
// tree where each stage computes prefix operations over larger ranges.
//
// Prefix Operator: (g1, p1) o (g2, p2) = (g1 | (p1 & g2), p1 & p2)
// This operator is associative, allowing us to compute prefixes in parallel.
//
// At stage i, we compute GP over ranges of size 2^i:
// - Stage 0: GP over range [i, i] (single bit)
// - Stage 1: GP over range [i-1, i] (2 bits)
// - Stage 2: GP over range [i-3, i] (4 bits)
// - ...
// - Stage STAGES: GP over range [0, i] (all bits up to i)
genvar j;
generate
for (i = 0; i < STAGES; i = i + 1) begin : prefix_stage
// Step size for this stage: 2^i
// This determines how far back we look when computing the prefix
localparam step = 1 << i;
// For each bit position, compute the prefix operation
for (j = 0; j < DATA_WIDTH; j = j + 1) begin : prefix_bit
if (j >= step) begin : use_prefix
// Compute prefix: combine current GP with GP from step positions back
// This extends the range of the prefix computation
// Generate update: g_new = g_current | (p_current & g_previous)
// This means: generate at current position OR (propagate through current
// AND generate from previous range)
assign g[i+1][j] = g[i][j] | (p[i][j] & g[i][j-step]);
// Propagate update: p_new = p_current & p_previous
// This means: propagate through both the current position AND previous range
assign p[i+1][j] = p[i][j] & p[i][j-step];
end
else begin : pass_through
// For bits j < step, we can't look back step positions
// These bits just pass through their current GP values
assign g[i+1][j] = g[i][j];
assign p[i+1][j] = p[i][j];
end
end
end
endgenerate
// ============================================================================
// Carry Computation
// ============================================================================
// After building the prefix tree, we can compute all carries in parallel.
// The final stage GP signals cover the entire range [0, i], allowing us to
// compute carry[i+1] directly without waiting for lower-order carries.
wire [DATA_WIDTH:0] carries; // Carry signals: carries[i] is the carry into bit position i
assign carries[0] = cin; // Initialize with external carry-in
// Compute all carries using the final stage GP signals
generate
for (i = 0; i < DATA_WIDTH; i = i + 1) begin : carry_gen
// Carry equation: c[i+1] = g[STAGES][i] | (p[STAGES][i] & c[i])
// g[STAGES][i] represents generate over range [0, i]
// p[STAGES][i] represents propagate over range [0, i]
// This allows us to compute the carry without sequential propagation
assign carries[i+1] = g[STAGES][i] | (p[STAGES][i] & carries[i]);
end
endgenerate
// ============================================================================
// Sum Computation
// ============================================================================
// Once all carries are computed, the sum is simply the XOR of propagate and carry.
// We use p_init (not p[STAGES]) because sum needs the bit-level propagate, not
// the range-level propagate from the prefix tree.
generate
for (i = 0; i < DATA_WIDTH; i = i + 1) begin : sum_gen
// Sum = a XOR b XOR carry_in
// Since p_init[i] = a[i] ^ b[i], we can use it directly
assign sum[i] = p_init[i] ^ carries[i];
end
endgenerate
// ============================================================================
// Output Assignment
// ============================================================================
// The final carry-out is the carry into the most significant bit position.
// This indicates overflow when the result exceeds DATA_WIDTH bits.
assign cout = carries[DATA_WIDTH];
endmodule
/* verilator lint_on DECLFILENAME */
/* verilator lint_on UNOPTFLAT */
/* verilator lint_on EOFNEWLINE */
/* verilator lint_on UNUSEDGENVAR */