core/libraries/adders/configurable_carry_skip_adder.v at main · universal-verification-methodology/core · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
/* verilator lint_off DECLFILENAME */
/* verilator lint_off UNOPTFLAT */
/* verilator lint_off EOFNEWLINE */

/**
 * Configurable Carry-Skip Adder (CSKA)
 *
 * This module implements a carry-skip (also called carry-bypass) adder, which
 * detects when all bits in a block propagate carries and allows the carry to
 * "skip" over the block, reducing propagation delay.
 *
 * Algorithm Overview:
 * The adder is divided into blocks. Each block computes its sum normally using
 * a ripple-carry adder. However, if all bits in a block have propagate signals
 * set (meaning they all pass carries through), the carry can skip the entire
 * block and go directly to the next block, bypassing the ripple delay.
 *
 * Key Features:
 * - Configurable data width via DATA_WIDTH parameter
 * - Configurable block size via BLOCK_SIZE parameter
 * - Faster than pure ripple-carry when many blocks have all-propagate conditions
 * - Area overhead is minimal (just skip detection logic)
 *
 * Design Trade-offs:
 * - Larger BLOCK_SIZE: Better skip opportunities but longer worst-case delay
 * - Smaller BLOCK_SIZE: More frequent skips but more blocks to manage
 * - Typical BLOCK_SIZE: 4-8 bits
 *
 * How It Works:
 * 1. Compute propagate signals for each bit (p[i] = a[i] XOR b[i])
 * 2. For each block, check if all bits propagate (AND of all p signals)
 * 3. If all propagate, carry skips the block (bypasses ripple delay)
 * 4. Otherwise, carry propagates normally through the block
 *
 * @param DATA_WIDTH Width of input operands and output sum (default: 32 bits)
 * @param BLOCK_SIZE Number of bits per carry-skip block (default: 4 bits)
 *                   Typical values: 4, 8, or 16.
 *
 * @input a[DATA_WIDTH-1:0] First operand to be added
 * @input b[DATA_WIDTH-1:0] Second operand to be added
 * @input cin Carry-in bit (allows chaining multiple adders)
 * @output sum[DATA_WIDTH-1:0] Sum of a + b + cin
 * @output cout Carry-out bit (indicates overflow when DATA_WIDTH bits are insufficient)
 */
module configurable_carry_skip_adder #(
    parameter DATA_WIDTH = 32,    // Width of the operands
    parameter BLOCK_SIZE = 4      // Size of each block
) (
    input wire [DATA_WIDTH-1:0] a,    // First operand
    input wire [DATA_WIDTH-1:0] b,    // Second operand
    input wire cin,                   // Carry-in
    output wire [DATA_WIDTH-1:0] sum, // Sum output
    output wire cout                  // Carry-out
);

    // ============================================================================
    // Parameter Calculations
    // ============================================================================
    // Calculate the number of blocks needed, rounding up to handle cases where
    // DATA_WIDTH is not evenly divisible by BLOCK_SIZE
    localparam NUM_BLOCKS = (DATA_WIDTH + BLOCK_SIZE - 1) / BLOCK_SIZE;

    // ============================================================================
    // Propagate Signal Generation
    // ============================================================================
    // Propagate signals indicate whether a carry will pass through each bit position.
    // If p[i] = 1, then a carry-in to bit i will propagate to bit i+1 (assuming no generation).
    // This is computed as: p[i] = a[i] XOR b[i] (when inputs differ, carry propagates)
    wire [DATA_WIDTH-1:0] p;

    // Generate propagate signals for each bit position
    genvar i;
    generate
        for (i = 0; i < DATA_WIDTH; i = i + 1) begin : gen_propagate
            // Propagate signal: 1 when inputs differ (a XOR b)
            // When inputs differ, any incoming carry will pass through unchanged
            assign p[i] = a[i] ^ b[i];
        end
    endgenerate

    // ============================================================================
    // Block-Level Carry Signals
    // ============================================================================
    // These signals connect the blocks together. block_carry[i] is the carry-in
    // to block i, and block_carry[i+1] is the carry-out from block i.
    // The skip logic allows block_carry[i+1] to bypass the block's ripple delay.
    wire [NUM_BLOCKS:0] block_carry;
    assign block_carry[0] = cin;                    // Initial carry-in from external source
    assign cout = block_carry[NUM_BLOCKS];          // Final carry-out (overflow indicator)

    // ============================================================================
    // Carry-Skip Block Generation
    // ============================================================================
    genvar j;
    generate
        for (i = 0; i < NUM_BLOCKS; i = i + 1) begin : blocks
            // Calculate the actual width of this block
            // The last block may be smaller if DATA_WIDTH is not divisible by BLOCK_SIZE
            localparam CURRENT_BLOCK_SIZE = ((i+1)*BLOCK_SIZE <= DATA_WIDTH) ?
                                            BLOCK_SIZE :
                                            DATA_WIDTH - (i*BLOCK_SIZE);

            // Calculate bit indices for this block
            localparam START_IDX = i * BLOCK_SIZE;                    // First bit in this block
            localparam END_IDX = START_IDX + CURRENT_BLOCK_SIZE - 1;  // Last bit in this block

            // Extract the relevant bits and propagate signals for this block
            wire [CURRENT_BLOCK_SIZE-1:0] block_a = a[END_IDX:START_IDX];
            wire [CURRENT_BLOCK_SIZE-1:0] block_b = b[END_IDX:START_IDX];
            wire [CURRENT_BLOCK_SIZE-1:0] block_p = p[END_IDX:START_IDX];

            // ====================================================================
            // Block-Level Sum and Carry Computation
            // ====================================================================
            // Each block computes its sum using a ripple-carry adder structure.
            // The carries ripple through the block normally.
            wire [CURRENT_BLOCK_SIZE-1:0] block_sum;  // Sum result for this block
            wire [CURRENT_BLOCK_SIZE:0] block_c;       // Internal carry chain for this block
            assign block_c[0] = block_carry[i];       // Initialize with carry-in to this block

            // Ripple-carry computation within the block
            for (j = 0; j < CURRENT_BLOCK_SIZE; j = j + 1) begin : rca
                // Sum computation: sum = a XOR b XOR carry_in
                // Since block_p[j] = block_a[j] XOR block_b[j], we can use it directly
                assign block_sum[j] = block_p[j] ^ block_c[j];

                // Carry computation: c[j+1] = generate OR (propagate AND carry_in)
                // Generate: block_a[j] AND block_b[j] (both inputs are 1)
                // Propagate: block_a[j] OR block_b[j] (at least one input is 1)
                // Note: For propagate, we use OR (not XOR) because we need to know if
                // a carry can pass through, which happens when at least one input is 1.
                assign block_c[j+1] = (block_a[j] & block_b[j]) |
                                     ((block_a[j] | block_b[j]) & block_c[j]);
            end

            // Connect block sum to the main sum output
            assign sum[END_IDX:START_IDX] = block_sum;

            // ====================================================================
            // Carry-Skip Detection
            // ====================================================================
            // If ALL bits in the block have propagate signals set, then any carry-in
            // will propagate through the entire block without modification.
            // In this case, we can skip the block's ripple delay and forward the
            // carry directly to the next block.
            wire block_propagate = &block_p;  // AND reduction: all bits propagate

            // ====================================================================
            // Skip Logic
            // ====================================================================
            // If all bits propagate, skip the block's ripple delay and forward
            // the carry-in directly. Otherwise, use the normal carry-out from the block.
            // This is the key optimization: when all bits propagate, we bypass
            // the O(n) ripple delay within the block.
            assign block_carry[i+1] = block_propagate ? block_carry[i] : block_c[CURRENT_BLOCK_SIZE];
        end
    endgenerate

endmodule

/* verilator lint_on DECLFILENAME */
/* verilator lint_on UNOPTFLAT */
/* verilator lint_on EOFNEWLINE */