Skip to content

Commit 8e2eff7

Browse files
committed
Add parameter to disable input buffer fall through
1 parent 6a83b17 commit 8e2eff7

2 files changed

Lines changed: 76 additions & 70 deletions

File tree

README.md

Lines changed: 37 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5,47 +5,48 @@ The implementation is taylored to cv32e40p (formerly known as RI5CY). This means
55
The coprocessors main module is named `fpu_ss` and can be found in [fpu_ss.sv](src/fpu_ss.sv "fpu_ss.sv"). Below, the instantiation template is given and the parameters are described.
66
### Integration Template
77
fpu_ss #(
8-
.PULP_ZFINX ( 0 ),
9-
.INPUT_BUFFER_DEPTH ( 0 ),
10-
.OUT_OF_ORDER ( 1 ),
11-
.FORWARDING ( 1 ),
12-
.FPU_FEATURES ( ),
13-
.FPU_IMPLEMENTATION ( )
8+
.PULP_ZFINX ( 0 ),
9+
.INPUT_BUFFER_DEPTH ( 0 ),
10+
.INPUT_BUFFER_FALL_THROUGH ( 1 ),
11+
.OUT_OF_ORDER ( 1 ),
12+
.FORWARDING ( 1 ),
13+
.FPU_FEATURES ( ),
14+
.FPU_IMPLEMENTATION ( )
1415
) fpu_ss_i (
1516
// clock and reset
16-
.clk_i (),
17-
.rst_ni (),
17+
.clk_i (),
18+
.rst_ni (),
1819

1920
// Compressed Interface
20-
.x_compressed_valid_i (),
21-
.x_compressed_ready_o (),
22-
.x_compressed_req_i (),
23-
.x_compressed_resp_o (),
21+
.x_compressed_valid_i (),
22+
.x_compressed_ready_o (),
23+
.x_compressed_req_i (),
24+
.x_compressed_resp_o (),
2425

2526
// Issue Interface
26-
.x_issue_valid_i (),
27-
.x_issue_ready_o (),
28-
.x_issue_req_i (),
29-
.x_issue_resp_o (),
27+
.x_issue_valid_i (),
28+
.x_issue_ready_o (),
29+
.x_issue_req_i (),
30+
.x_issue_resp_o (),
3031

3132
// Commit Interface
32-
.x_commit_valid_i (),
33-
.x_commit_i (),
33+
.x_commit_valid_i (),
34+
.x_commit_i (),
3435

3536
// Memory Request/Response Interface
36-
.x_mem_valid_o (),
37-
.x_mem_ready_i (),
38-
.x_mem_req_o (),
39-
.x_mem_resp_i (),
37+
.x_mem_valid_o (),
38+
.x_mem_ready_i (),
39+
.x_mem_req_o (),
40+
.x_mem_resp_i (),
4041

4142
// Memory Result Interface
42-
.x_mem_result_valid_i (),
43-
.x_mem_result_i (),
43+
.x_mem_result_valid_i (),
44+
.x_mem_result_i (),
4445

4546
// Result Interface
46-
.x_result_valid_o (),
47-
.x_result_ready_i (),
48-
.x_result_o ()
47+
.x_result_valid_o (),
48+
.x_result_ready_i (),
49+
.x_result_o ()
4950
);
5051

5152
### Dependencies
@@ -58,14 +59,15 @@ to load the FPnew or use
5859
to clone the repository with the Fpnew.
5960
### Parameters
6061

61-
| Parameter Name | Values | Description | Default |
62-
| -------------------- | ----------------------------------------------------------------------------------------------- | -------------------------------------------- | ------- |
63-
| `PULP_ZFINX` | {0,1} | Use F or zfinx extension | 0 |
64-
| `INPUT_BUFFER_DEPTH` | {0, ... , 2^32-1} | Input buffer depth | 0 |
65-
| `OUT_OF_ORDER` | {0, 1} | Enabling out-of-order execution | 1 |
66-
| `FORWARDING` | {0, 1} | Enabling forwarding inside the fpu subsystem | 1 |
67-
| `FPU_FEATURES` | see [FPnew](https://github.com/pulp-platform/fpnew/tree/develop/docs/README.md#parameters) docs | - | - |
68-
| `FPU_IMPLEMENTATION` | see [FPnew](https://github.com/pulp-platform/fpnew/tree/develop/docs/README.md#parameters) docs | - | - |
62+
| Parameter Name | Values | Description | Default |
63+
| --------------------------- | ----------------------------------------------------------------------------------------------- | -------------------------------------------- | ------- |
64+
| `PULP_ZFINX` | {0,1} | Use F or zfinx extension | 0 |
65+
| `INPUT_BUFFER_DEPTH` | {0, ... , 2^32-1} | Input buffer depth | 0 |
66+
| `INPUT_BUFFER_FALL_THROUGH` | {0,1} | Input buffer fall through enable | 1 |
67+
| `OUT_OF_ORDER` | {0, 1} | Enabling out-of-order execution | 1 |
68+
| `FORWARDING` | {0, 1} | Enabling forwarding inside the fpu subsystem | 1 |
69+
| `FPU_FEATURES` | see [FPnew](https://github.com/pulp-platform/fpnew/tree/develop/docs/README.md#parameters) docs | - | - |
70+
| `FPU_IMPLEMENTATION` | see [FPnew](https://github.com/pulp-platform/fpnew/tree/develop/docs/README.md#parameters) docs | - | - |
6971

7072

7173
#### Extended parameter descriptions

src/fpu_ss.sv

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -10,53 +10,57 @@
1010
//
1111
// Description: Top level Module of the FPU subsystem
1212
//
13-
// Parameters: PULP_ZFINX: Enable support for "Zfinx" standard extension (and thereby removing support for
14-
// "F" standard extension)
13+
// Parameters: PULP_ZFINX: Enable support for "Zfinx" standard extension (and thereby removing support for
14+
// "F" standard extension)
1515
//
16-
// INPUT_BUFFER_DEPTH: Set depth of the FIFO input buffer. If parameter is set to 0, no buffer will be
17-
// instantiated
16+
// INPUT_BUFFER_DEPTH: Set depth of the FIFO input buffer. If parameter is set to 0, no buffer will be
17+
// instantiated
1818
//
19-
// OUT_OF_ORDER: Enable out-of-order execution for instructions that go through
20-
// the FPnew.
21-
// For example with OUT_OF_ORDER = 1
22-
// fdiv.s fa1, fa2, fa3 // suppose takes 3 cycles
23-
// fmul.s fa4, fa5, fa6 // suppose takes 1 cycles
24-
// fmul.s fa2, fa5, fa6 // suppose takes 1 cycles
25-
// fmul.s fa3, fa5, fa6 // suppose takes 1 cycles
26-
// --> This sequence takes 4 clock cycles
27-
// With OUT_OF_ORDER this instruction sequence would take 5 clock cycles
28-
// Possible values for this parameter are 0 and 1
19+
// OUT_OF_ORDER: Enable out-of-order execution for instructions that go through
20+
// the FPnew.
21+
// For example with OUT_OF_ORDER = 1
22+
// fdiv.s fa1, fa2, fa3 // suppose takes 3 cycles
23+
// fmul.s fa4, fa5, fa6 // suppose takes 1 cycles
24+
// fmul.s fa2, fa5, fa6 // suppose takes 1 cycles
25+
// fmul.s fa3, fa5, fa6 // suppose takes 1 cycles
26+
// --> This sequence takes 4 clock cycles
27+
// With OUT_OF_ORDER this instruction sequence would take 5 clock cycles
28+
// Possible values for this parameter are 0 and 1
2929
//
30-
// FORWARDING: Enable forwarding of floating-point results in the subsystem.
31-
// For examle take this sequence:
32-
// fmul.s fa4, fa5, fa6 // suppose takes 1 cycles
33-
// fmul.s fa1, fa4, fa6 // suppose takes 1 cycles
34-
// There is a source register dependency for the second instruction on the
35-
// first instructions result. With FORWARDING = 1 this sequence takes 2 clock cycles
36-
// while with FORWARDING = 0 this sequence takes 3 clock cycles.
30+
// FORWARDING: Enable forwarding of floating-point results in the subsystem.
31+
// For examle take this sequence:
32+
// fmul.s fa4, fa5, fa6 // suppose takes 1 cycles
33+
// fmul.s fa1, fa4, fa6 // suppose takes 1 cycles
34+
// There is a source register dependency for the second instruction on the
35+
// first instructions result. With FORWARDING = 1 this sequence takes 2 clock cycles
36+
// while with FORWARDING = 0 this sequence takes 3 clock cycles.
3737
//
38-
// FPU_FEATURES: Parameter to configure the FPnew. The subsystem was designed for the configuration found here:
39-
// https://github.com/moimfeld/cv32e40p/blob/x-interface/example_tb/core/fpu_ss/fpu_ss_pkg.sv
40-
// Other configurations might not work
38+
// FPU_FEATURES: Parameter to configure the FPnew. The subsystem was designed for the configuration found here:
39+
// https://github.com/moimfeld/cv32e40p/blob/x-interface/example_tb/core/fpu_ss/fpu_ss_pkg.sv
40+
// Other configurations might not work
4141
//
42-
// FPU_IMPLEMENTATION: Parameter to configure the FPnew. The subsystem was designed for the configuration found here:
43-
// https://github.com/moimfeld/cv32e40p/blob/x-interface/example_tb/core/fpu_ss/fpu_ss_pkg.sv
44-
// Other configurations might not work
42+
// FPU_IMPLEMENTATION: Parameter to configure the FPnew. The subsystem was designed for the configuration found here:
43+
// https://github.com/moimfeld/cv32e40p/blob/x-interface/example_tb/core/fpu_ss/fpu_ss_pkg.sv
44+
// Other configurations might not work
45+
//
46+
// INPUT_BUFFER_FALL_THROUGH: Set depth of the FIFO input buffer FALL_THROUGH. If INPUT_BUFFER_DEPTH is set to 0, this
47+
// parameter doesn't have any effect
4548
//
4649
// Contributor: Moritz Imfeld <moimfeld@student.ethz.ch>
4750
// Davide Schiavone <davide@openhwgroup.org>
4851

4952
module fpu_ss
5053
import fpu_ss_pkg::*;
5154
#(
52-
parameter PULP_ZFINX = 0,
53-
parameter INPUT_BUFFER_DEPTH = 0,
54-
parameter OUT_OF_ORDER = 1,
55-
parameter FORWARDING = 1,
55+
parameter PULP_ZFINX = 0,
56+
parameter INPUT_BUFFER_DEPTH = 0,
57+
parameter bit INPUT_BUFFER_FALL_THROUGH = 1,
58+
parameter OUT_OF_ORDER = 1,
59+
parameter FORWARDING = 1,
5660
// PulpDivSqrt = 0 enables T-head-based DivSqrt unit. Supported only for FP32-only instances of Fpnew
57-
parameter logic PulpDivsqrt = 1'b0,
58-
parameter fpnew_pkg::fpu_features_t FPU_FEATURES = fpu_ss_pkg::FPU_FEATURES,
59-
parameter fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = fpu_ss_pkg::FPU_IMPLEMENTATION
61+
parameter logic PulpDivsqrt = 1'b0,
62+
parameter fpnew_pkg::fpu_features_t FPU_FEATURES = fpu_ss_pkg::FPU_FEATURES,
63+
parameter fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = fpu_ss_pkg::FPU_IMPLEMENTATION
6064
) (
6165
// Clock and Reset
6266
input logic clk_i,
@@ -287,7 +291,7 @@ module fpu_ss
287291
generate
288292
if (INPUT_BUFFER_DEPTH > 0) begin : gen_input_stream_fifo
289293
stream_fifo #(
290-
.FALL_THROUGH(1),
294+
.FALL_THROUGH(INPUT_BUFFER_FALL_THROUGH),
291295
.DATA_WIDTH (32),
292296
.DEPTH (INPUT_BUFFER_DEPTH),
293297
.T (offloaded_data_t)

0 commit comments

Comments
 (0)