hdk/common/verif/models/sh_bfm/sh_bfm.sv (1,877 lines of code) (raw):
// ============================================================================
// Amazon FPGA Hardware Development Kit
//
// Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Licensed under the Amazon Software License (the "License"). You may not use
// this file except in compliance with the License. A copy of the License is
// located at
//
// http://aws.amazon.com/asl/
//
// or in the "license" file accompanying this file. This file is distributed on
// an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or
// implied. See the License for the specific language governing permissions and
// limitations under the License.
// ============================================================================
`ifndef TB_TOP
`define TB_TOP tb
`endif
`define HBM_STAT_BUS_CL 64'h0300
module sh_bfm #(
parameter NUM_HMC = 4,
parameter NUM_QSFP = 4,
parameter NUM_PCIE = 1,
parameter NUM_GTY = 4,
parameter NUM_I2C = 2,
parameter NUM_POWER = 4
)(
//---------------------------------------------------------------------------
// cl_ports_sh_bfm.vh is generated from cl_ports.vh in $(HDK_SHELL_DESIGN_DIR)/interfaces.
// This is to ensure that there is no integration issues.
//---------------------------------------------------------------------------
`include "cl_ports_sh_bfm.vh"
);
`include "axi_bfm_defines.svh"
import tb_type_defines_pkg::*;
AXI_Command sh_cl_wr_cmds[$];
AXI_Data sh_cl_wr_data[$];
AXI_Command sh_cl_rd_cmds[$];
AXI_Data cl_sh_rd_data[$];
AXI_Command sh_cl_b_resps[$];
AXI_Command cl_sh_wr_cmds[$];
AXI_Data cl_sh_wr_data[$];
AXI_Command cl_sh_rd_cmds[$];
AXI_Command sh_cl_rd_data[$];
AXI_Command cl_sh_b_resps[$];
logic clk_core;
logic clk_extra_a1;
logic kernel_rst_n;
logic rst_n;
logic pre_sync0_rst_n;
logic pre_sync1_rst_n;
logic pre_sync2_rst_n;
logic pre_sync3_rst_n;
logic sync_rst_n;
logic intf_sync_rst_n;
logic ddr_user_clk;
logic ddr_user_rst;
logic ddr_user_rst_n;
logic ddr_is_ready;
logic ddr_is_ready_presync;
logic ddr_is_ready_sync;
bit debug;
logic chk_clk_freq = 1'b0;
logic ECC_EN;
int ecc_err_cnt=0;
typedef struct {
logic [63:0] buffer;
logic [27:0] len;
logic [63:0] cl_addr;
} DMA_OP;
DMA_OP h2c_dma_list[0:3][$];
int h2c_dma_wr_cmd_cnt[0:3];
DMA_OP c2h_dma_list[0:3][$];
DMA_OP c2h_data_dma_list[0:3][$];
logic [3:0] h2c_dma_started;
logic [3:0] c2h_dma_started;
logic [3:0] c2h_dma_done;
logic [3:0] h2c_dma_done;
logic [7:0] read_data_buffer[];
real MAIN_A0_DLY = 4ns;
real CORE_DLY = 4ns;
real EXTRA_A1_DLY = 8ns;
real HBM_DLY = 5ns;
real main_rising_edge;
real core_rising_edge;
real extra_a1_rising_edge;
real hbm_rising_edge;
real main_clk_period;
real core_clk_period;
real extra_a1_clk_period;
real hbm_clk_period;
logic [96:0] pcis_pc_status;
logic pcis_pc_asserted;
logic [96:0] pcim_pc_status;
logic pcim_pc_asserted;
logic [96:0] ocl_pc_status;
logic ocl_pc_asserted;
logic [96:0] sda_pc_status;
logic sda_pc_asserted;
int prot_err_count;
int clk_err_count;
int prot_x_count;
int counter;
logic [63:0] glcount0, glcount1;
//-------------------------------------------------------------------------------------------------------------
// Xilinx AXI Protocol Checker Instance (for CL_SH_DMA_PCIS*).
// Protocol checker checks for protocol violations on the interface where protocol checker
// is instantiated. This will help the CL designers in catiching protocol violations before
// testing with real system. Refer to hdk/common/verif/models/xilinx_axi_pc/axi_protocol_checker_v1_1_vl_rfs.v
// for more details about each PC_STATUS bit.
//------------------------------------------------------------------------------------------------------------
axi_protocol_checker_v1_1_12_top #(
.C_AXI_PROTOCOL (0),
.C_AXI_ID_WIDTH (16),
.C_AXI_DATA_WIDTH (512),
.C_AXI_ADDR_WIDTH (64),
.C_AXI_AWUSER_WIDTH (1),
.C_AXI_ARUSER_WIDTH (1),
.C_AXI_WUSER_WIDTH (1),
.C_AXI_RUSER_WIDTH (1),
.C_AXI_BUSER_WIDTH (1),
.C_PC_MAXRBURSTS (32),
.C_PC_MAXWBURSTS (32),
.C_PC_EXMON_WIDTH (0),
.C_PC_AW_MAXWAITS (`MAXWAITS),
.C_PC_AR_MAXWAITS (`MAXWAITS),
.C_PC_W_MAXWAITS (`MAXWAITS),
.C_PC_R_MAXWAITS (`MAXWAITS),
.C_PC_B_MAXWAITS (`MAXWAITS),
.C_PC_MESSAGE_LEVEL (2),
.C_PC_SUPPORTS_NARROW_BURST(1),
.C_PC_MAX_BURST_LENGTH (256),
.C_PC_HAS_SYSTEM_RESET (1),
.C_PC_STATUS_WIDTH (97)
) axi_pc_mstr_inst_pcis (
.pc_status (pcis_pc_status),
.pc_asserted (pcis_pc_asserted),
.system_resetn (rst_main_n),
.aclk (clk_main_a0),
.aresetn (rst_main_n),
.pc_axi_awid (sh_cl_dma_pcis_awid),
.pc_axi_awaddr (sh_cl_dma_pcis_awaddr),
.pc_axi_awlen (sh_cl_dma_pcis_awlen),
.pc_axi_awsize (sh_cl_dma_pcis_awsize),
.pc_axi_awburst (2'b01),
.pc_axi_awlock (1'b0),
.pc_axi_awcache (4'b0000),
.pc_axi_awprot (3'b000),
.pc_axi_awqos (4'b0000),
.pc_axi_awregion (4'b0000),
.pc_axi_awuser (1'h0),
.pc_axi_awvalid (sh_cl_dma_pcis_awvalid),
.pc_axi_awready (cl_sh_dma_pcis_awready),
.pc_axi_wid (16'h0000), // AXI3 only
.pc_axi_wlast (sh_cl_dma_pcis_wlast),
.pc_axi_wdata (sh_cl_dma_pcis_wdata),
.pc_axi_wstrb (sh_cl_dma_pcis_wstrb),
.pc_axi_wuser (1'h0),
.pc_axi_wvalid (sh_cl_dma_pcis_wvalid),
.pc_axi_wready (cl_sh_dma_pcis_wready),
.pc_axi_bid (cl_sh_dma_pcis_bid),
.pc_axi_bresp (cl_sh_dma_pcis_bresp),
.pc_axi_buser (1'h0),
.pc_axi_bvalid (cl_sh_dma_pcis_bvalid),
.pc_axi_bready (sh_cl_dma_pcis_bready),
.pc_axi_arid (sh_cl_dma_pcis_arid),
.pc_axi_araddr (sh_cl_dma_pcis_araddr),
.pc_axi_arlen (sh_cl_dma_pcis_arlen),
.pc_axi_arsize (sh_cl_dma_pcis_arsize),
.pc_axi_arburst (2'b01),
.pc_axi_arlock (1'b0),
.pc_axi_arcache (4'b0000),
.pc_axi_arprot (3'b000),
.pc_axi_arqos (4'b0000),
.pc_axi_arregion (4'b0000),
.pc_axi_aruser (1'h0),
.pc_axi_arvalid (sh_cl_dma_pcis_arvalid),
.pc_axi_arready (cl_sh_dma_pcis_arready),
.pc_axi_rid (cl_sh_dma_pcis_rid),
.pc_axi_rlast (cl_sh_dma_pcis_rlast),
.pc_axi_rdata (cl_sh_dma_pcis_rdata),
.pc_axi_rresp (cl_sh_dma_pcis_rresp),
.pc_axi_ruser (1'h0),
.pc_axi_rvalid (cl_sh_dma_pcis_rvalid),
.pc_axi_rready (sh_cl_dma_pcis_rready)
);
//----------------------------------------------------------------
// Xilinx AXI Protocol Checker Instance (for CL_SH_PCIM*)
//----------------------------------------------------------------
axi_protocol_checker_v1_1_12_top #(
.C_AXI_PROTOCOL (0),
.C_AXI_ID_WIDTH (16),
.C_AXI_DATA_WIDTH (512),
.C_AXI_ADDR_WIDTH (64),
.C_AXI_AWUSER_WIDTH (1),
.C_AXI_ARUSER_WIDTH (1),
.C_AXI_WUSER_WIDTH (1),
.C_AXI_RUSER_WIDTH (1),
.C_AXI_BUSER_WIDTH (1),
.C_PC_MAXRBURSTS (32),
.C_PC_MAXWBURSTS (32),
.C_PC_EXMON_WIDTH (0),
.C_PC_AW_MAXWAITS (`MAXWAITS),
.C_PC_AR_MAXWAITS (`MAXWAITS),
.C_PC_W_MAXWAITS (`MAXWAITS),
.C_PC_R_MAXWAITS (`MAXWAITS),
.C_PC_B_MAXWAITS (`MAXWAITS),
.C_PC_MESSAGE_LEVEL (0),
.C_PC_SUPPORTS_NARROW_BURST(1),
.C_PC_MAX_BURST_LENGTH (256),
.C_PC_HAS_SYSTEM_RESET (1),
.C_PC_STATUS_WIDTH (97)
) axi_pc_mstr_inst_pcim (
.pc_status (pcim_pc_status),
.pc_asserted (pcim_pc_asserted),
.system_resetn (rst_main_n),
.aclk (clk_main_a0),
.aresetn (rst_main_n),
.pc_axi_awid (cl_sh_pcim_awid),
.pc_axi_awaddr (cl_sh_pcim_awaddr),
.pc_axi_awlen (cl_sh_pcim_awlen),
.pc_axi_awsize (cl_sh_pcim_awsize),
.pc_axi_awburst (2'b01),
.pc_axi_awlock (1'b0),
.pc_axi_awcache (4'b0000),
.pc_axi_awprot (3'b000),
.pc_axi_awqos (4'b0000),
.pc_axi_awregion (4'b0000),
.pc_axi_awuser (1'H0),
.pc_axi_awvalid (cl_sh_pcim_awvalid),
.pc_axi_awready (sh_cl_pcim_awready),
.pc_axi_wid (16'h0000), // AXI3 only
.pc_axi_wlast (cl_sh_pcim_wlast),
.pc_axi_wdata (cl_sh_pcim_wdata),
.pc_axi_wstrb (cl_sh_pcim_wstrb),
.pc_axi_wuser (1'H0),
.pc_axi_wvalid (cl_sh_pcim_wvalid),
.pc_axi_wready (sh_cl_pcim_wready),
.pc_axi_bid (sh_cl_pcim_bid),
.pc_axi_bresp (sh_cl_pcim_bresp),
.pc_axi_buser (1'H0),
.pc_axi_bvalid (sh_cl_pcim_bvalid),
.pc_axi_bready (cl_sh_pcim_bready),
.pc_axi_arid (cl_sh_pcim_arid),
.pc_axi_araddr (cl_sh_pcim_araddr),
.pc_axi_arlen (cl_sh_pcim_arlen),
.pc_axi_arsize (cl_sh_pcim_arsize),
.pc_axi_arburst (2'b01),
.pc_axi_arlock (1'b0),
.pc_axi_arcache (4'b0000),
.pc_axi_arprot (3'b000),
.pc_axi_arqos (4'b0000),
.pc_axi_arregion (4'b0000),
.pc_axi_aruser (1'H0),
.pc_axi_arvalid (cl_sh_pcim_arvalid),
.pc_axi_arready (sh_cl_pcim_arready),
.pc_axi_rid (sh_cl_pcim_rid),
.pc_axi_rlast (sh_cl_pcim_rlast),
.pc_axi_rdata (sh_cl_pcim_rdata),
.pc_axi_rresp (sh_cl_pcim_rresp),
.pc_axi_ruser (1'H0),
.pc_axi_rvalid (sh_cl_pcim_rvalid),
.pc_axi_rready (cl_sh_pcim_rready)
);
//-------------------------------------------------------------------------
// [axi_pc] Xilinx AXI Protocol Checker Instance (for OCL AXL interface)
//-------------------------------------------------------------------------
axi_protocol_checker_v1_1_12_top #(
.C_AXI_PROTOCOL (2), // 2 = AXI4-Lite
.C_AXI_DATA_WIDTH (32),
.C_AXI_ADDR_WIDTH (32),
.C_AXI_AWUSER_WIDTH (1), // Actually, these are all 0
.C_AXI_ARUSER_WIDTH (1),
.C_AXI_WUSER_WIDTH (1),
.C_AXI_RUSER_WIDTH (1),
.C_AXI_BUSER_WIDTH (1),
.C_PC_MAXRBURSTS (8), // Technicaly, up to 8, but must be in-order - no use of IDs
.C_PC_MAXWBURSTS (8),
.C_PC_EXMON_WIDTH (0),
.C_PC_AW_MAXWAITS (`MAXWAITS),
.C_PC_AR_MAXWAITS (`MAXWAITS),
.C_PC_W_MAXWAITS (`MAXWAITS), // These three are don't care because "ready" signals on master behave properly (or are tied)
.C_PC_R_MAXWAITS (`MAXWAITS),
.C_PC_B_MAXWAITS (`MAXWAITS),
.C_PC_MESSAGE_LEVEL (0),
.C_PC_SUPPORTS_NARROW_BURST(0),
.C_PC_MAX_BURST_LENGTH (1),
.C_PC_HAS_SYSTEM_RESET (1),
.C_PC_STATUS_WIDTH (97)
) axl_pc_ocl_slv_inst (
.pc_status (ocl_pc_status),
.pc_asserted (ocl_pc_asserted),
.system_resetn (rst_main_n),
.aclk (clk_main_a0),
.aresetn (rst_main_n),
.pc_axi_awid (1'h0),
.pc_axi_awaddr (ocl_cl_awaddr),
.pc_axi_awlen (8'd0),
.pc_axi_awsize (3'd0),
.pc_axi_awburst (2'b01),
.pc_axi_awlock (1'b0),
.pc_axi_awcache (4'b0000),
.pc_axi_awprot (3'b000),
.pc_axi_awqos (4'b0000),
.pc_axi_awregion (4'b0000),
.pc_axi_awuser (1'H0),
.pc_axi_awvalid (ocl_cl_awvalid),
.pc_axi_awready (cl_ocl_awready),
.pc_axi_wid (1'b0), // AXI3 only
.pc_axi_wlast (1'd1),
.pc_axi_wdata (ocl_cl_wdata),
.pc_axi_wstrb (ocl_cl_wstrb),
.pc_axi_wuser (1'H0),
.pc_axi_wvalid (ocl_cl_wvalid),
.pc_axi_wready (cl_ocl_wready),
.pc_axi_bid (1'h0),
.pc_axi_bresp (cl_ocl_bresp),
.pc_axi_buser (1'H0),
.pc_axi_bvalid (cl_ocl_bvalid),
.pc_axi_bready (ocl_cl_bready),
.pc_axi_arid (1'h0),
.pc_axi_araddr (ocl_cl_araddr),
.pc_axi_arlen (8'd0),
.pc_axi_arsize (3'd0),
.pc_axi_arburst (2'b01),
.pc_axi_arlock (1'b0),
.pc_axi_arcache (4'b0000),
.pc_axi_arprot (3'b000),
.pc_axi_arqos (4'b0000),
.pc_axi_arregion (4'b0000),
.pc_axi_aruser (1'H0),
.pc_axi_arvalid (ocl_cl_arvalid),
.pc_axi_arready (cl_ocl_arready),
.pc_axi_rid (1'h0),
.pc_axi_rlast (1'd1),
.pc_axi_rdata (cl_ocl_rdata),
.pc_axi_rresp (cl_ocl_rresp),
.pc_axi_ruser (1'H0),
.pc_axi_rvalid (cl_ocl_rvalid),
.pc_axi_rready (ocl_cl_rready)
);
//-------------------------------------------------------------------------
// [axi_pc] Xilinx AXI Protocol Checker Instance (for SDA AXL interface)
//-------------------------------------------------------------------------
axi_protocol_checker_v1_1_12_top #(
.C_AXI_PROTOCOL (2), // 2 = AXI4-Lite
.C_AXI_DATA_WIDTH (32),
.C_AXI_ADDR_WIDTH (32),
.C_AXI_AWUSER_WIDTH (1), // Actually, these are all 0
.C_AXI_ARUSER_WIDTH (1),
.C_AXI_WUSER_WIDTH (1),
.C_AXI_RUSER_WIDTH (1),
.C_AXI_BUSER_WIDTH (1),
.C_PC_MAXRBURSTS (8), // Technicaly, up to 8, but must be in-order - no use of IDs
.C_PC_MAXWBURSTS (8),
.C_PC_EXMON_WIDTH (0),
.C_PC_AW_MAXWAITS (`MAXWAITS),
.C_PC_AR_MAXWAITS (`MAXWAITS),
.C_PC_W_MAXWAITS (`MAXWAITS), // These three are don't care because "ready" signals on master behave properly (or are tied)
.C_PC_R_MAXWAITS (`MAXWAITS),
.C_PC_B_MAXWAITS (`MAXWAITS),
.C_PC_MESSAGE_LEVEL (0),
.C_PC_SUPPORTS_NARROW_BURST(0),
.C_PC_MAX_BURST_LENGTH (1),
.C_PC_HAS_SYSTEM_RESET (1),
.C_PC_STATUS_WIDTH (97)
) axl_pc_sda_slv_inst (
.pc_status (sda_pc_status),
.pc_asserted (sda_pc_asserted),
.system_resetn (rst_main_n),
.aclk (clk_main_a0),
.aresetn (rst_main_n),
.pc_axi_awid (1'b0),
.pc_axi_awaddr (sda_cl_awaddr),
.pc_axi_awlen (8'd0),
.pc_axi_awsize (3'd0),
.pc_axi_awburst (2'b01),
.pc_axi_awlock (1'b0),
.pc_axi_awcache (4'b0000),
.pc_axi_awprot (3'b000),
.pc_axi_awqos (4'b0000),
.pc_axi_awregion (4'b0000),
.pc_axi_awuser (1'H0),
.pc_axi_awvalid (sda_cl_awvalid),
.pc_axi_awready (cl_sda_awready),
.pc_axi_wid (1'b0), // AXI3 only
.pc_axi_wlast (1'd1),
.pc_axi_wdata (sda_cl_wdata),
.pc_axi_wstrb (sda_cl_wstrb),
.pc_axi_wuser (1'H0),
.pc_axi_wvalid (sda_cl_wvalid),
.pc_axi_wready (cl_sda_wready),
.pc_axi_bid (1'h0),
.pc_axi_bresp (cl_sda_bresp),
.pc_axi_buser (1'H0),
.pc_axi_bvalid (cl_sda_bvalid),
.pc_axi_bready (sda_cl_bready),
.pc_axi_arid (1'h0),
.pc_axi_araddr (sda_cl_araddr),
.pc_axi_arlen (8'd0),
.pc_axi_arsize (3'd0),
.pc_axi_arburst (2'b01),
.pc_axi_arlock (1'b0),
.pc_axi_arcache (4'b0000),
.pc_axi_arprot (3'b000),
.pc_axi_arqos (4'b0000),
.pc_axi_arregion (4'b0000),
.pc_axi_aruser (1'H0),
.pc_axi_arvalid (sda_cl_arvalid),
.pc_axi_arready (cl_sda_arready),
.pc_axi_rid (1'h0),
.pc_axi_rlast (1'd1),
.pc_axi_rdata (cl_sda_rdata),
.pc_axi_rresp (cl_sda_rresp),
.pc_axi_ruser (1'H0),
.pc_axi_rvalid (cl_sda_rvalid),
.pc_axi_rready (sda_cl_rready)
);
initial begin
debug = 1'b0;
/* TODO: Use the code below once plusarg support is enabled
if ($test$plusargs("DEBUG")) begin
debug = 1'b1;
end else begin
debug = 1'b0;
end
*/
end
`ifdef ECC_DIRECT_EN
assign ECC_EN = 1'b1;
`else
`ifdef RND_ECC_EN
assign ECC_EN = 1'b1;
`else
assign ECC_EN = 1'b0;
`endif
`endif
initial begin
clk_core = 1'b0;
forever #CORE_DLY clk_core = ~clk_core;
end
initial begin
clk_main_a0 = 1'b0;
forever #MAIN_A0_DLY clk_main_a0 = ~clk_main_a0;
end
initial begin
clk_extra_a1 = 1'b0;
forever #EXTRA_A1_DLY clk_extra_a1 = ~clk_extra_a1;
end
initial begin
clk_hbm_ref = 1'b0;
forever #HBM_DLY clk_hbm_ref = ~clk_hbm_ref;
end
logic rst_n_i;
logic rst_main_n_i = 0;
logic rst_xtra_n_i;
always @(posedge clk_core)
rst_n <= rst_n_i;
always @(posedge clk_main_a0)
rst_main_n <= rst_main_n_i;
initial begin
kernel_rst_n = 1'b0; // kernel reset is not used for non-SDAccel simulations.
end
always_ff @(negedge rst_n or posedge clk_core)
if (!rst_n)
begin
pre_sync0_rst_n <= 0;
pre_sync1_rst_n <= 0;
pre_sync2_rst_n <= 0;
pre_sync3_rst_n <= 0;
sync_rst_n <= 0;
end
else
begin
pre_sync0_rst_n <= 1'b1;
pre_sync1_rst_n <= pre_sync0_rst_n;
pre_sync2_rst_n <= pre_sync1_rst_n;
pre_sync3_rst_n <= pre_sync2_rst_n;
sync_rst_n <= pre_sync3_rst_n;
end
assign sh_cl_pwr_state = 2'b00;
initial begin
sh_cl_ctl0 <= 32'h0;
sh_cl_ctl1 <= 32'h0;
end
initial begin
sh_cl_flr_assert <= 1'b0;
end
initial begin
sh_cl_status_vdip <= 32'h0;
end
always_ff @(posedge clk_core or negedge sync_rst_n)
if (~sync_rst_n)
intf_sync_rst_n <= 0;
else
intf_sync_rst_n <= ~(sh_cl_flr_assert);
always_ff @(negedge rst_n or posedge clk_core)
if (!rst_n)
begin
glcount0 <= 0;
end
else
begin
glcount0 <= glcount0+1;
end
always_ff @(negedge rst_n or negedge clk_core)
if (!rst_n)
begin
glcount1 <= 0;
end
else
begin
glcount1 <= glcount1+1;
end
always_ff @(posedge clk_main_a0)
begin
sh_cl_glcount0 <= glcount0;
sh_cl_glcount1 <= glcount1;
end
initial begin
for (int i=0; i<NUM_PCIE; i++) begin
cfg_max_payload[i] <= 2'b01; // 256 bytes
cfg_max_read_req[i] <= 3'b001; // 256 bytes
end
end
assign ddr_user_rst_n = ~ddr_user_rst;
// TODO: Connect up DDR stats interfaces if needed
initial begin
sh_cl_ddr_stat_addr = 8'h00;
sh_cl_ddr_stat_wr = 1'b0;
sh_cl_ddr_stat_rd = 1'b0;
sh_cl_ddr_stat_wdata = 32'h0;
sh_cl_ddr_stat_user = 3'b0;
end
//=================================================
//
// sh->cl PCIeS Interface
//
//=================================================
// initial various counts for DMA operations
initial begin
for(int i=0; i<4; i++)
h2c_dma_wr_cmd_cnt[i] = 0;
sh_cl_dma_pcis_awaddr = 64'b0;
sh_cl_dma_pcis_awid = 16'b0;
sh_cl_dma_pcis_awlen = 8'b0;
sh_cl_dma_pcis_awsize = 3'b0;
sh_cl_dma_pcis_awburst = 2'h1; // this should be 2'h1 (INCR) as that seems to be only burst mode we use
sh_cl_dma_pcis_awcache = 4'b0;
sh_cl_dma_pcis_awlock = 1'b0;
sh_cl_dma_pcis_awprot = 3'b0;
sh_cl_dma_pcis_awqos = 4'b0;
sh_cl_dma_pcis_awuser = 55'b0;
sh_cl_dma_pcis_wdata = 512'b0;
sh_cl_dma_pcis_wstrb = 64'b0;
sh_cl_dma_pcis_wlast = 1'b0;
sh_cl_dma_pcis_wid = 16'b0;
sh_cl_dma_pcis_wuser = 64'b0;
sh_cl_dma_pcis_araddr = 64'b0;
sh_cl_dma_pcis_arlen = 8'b0;
sh_cl_dma_pcis_arsize = 3'b0;
sh_cl_dma_pcis_arburst = 2'h1; // this should be 2'h1 (INCR) as that seems to be only burst mode we use
sh_cl_dma_pcis_arcache = 4'b0;
sh_cl_dma_pcis_arlock = 1'b0;
sh_cl_dma_pcis_arprot = 3'b0;
sh_cl_dma_pcis_arqos = 4'b0;
sh_cl_dma_pcis_aruser = 55'b0;
sh_cl_pcim_bid = 16'b0;
sh_cl_pcim_bresp = 2'b0;
sh_cl_pcim_rid = 16'b0;
sh_cl_pcim_ruser = 64'b0;
sh_cl_pcim_rresp = 2'b0;
sh_cl_pcim_rdata = 512'b0;
end
//
// sh->cl Address Write Channel
//
always @(posedge clk_core) begin
if (sh_cl_wr_cmds.size() != 0) begin
sh_cl_dma_pcis_awaddr <= sh_cl_wr_cmds[0].addr;
sh_cl_dma_pcis_awid <= sh_cl_wr_cmds[0].id;
sh_cl_dma_pcis_awlen <= sh_cl_wr_cmds[0].len;
sh_cl_dma_pcis_awsize <= /*sh_cl_wr_cmds[0].size*/3'h6;
sh_cl_dma_pcis_awvalid <= !sh_cl_dma_pcis_awvalid ? 1'b1 :
!cl_sh_dma_pcis_awready ? 1'b1 : 1'b0;
if (cl_sh_dma_pcis_awready && sh_cl_dma_pcis_awvalid) begin
if (debug) begin
$display("[%t] : DEBUG popping sh to cl write cmd fifo - %d", $realtime, sh_cl_wr_cmds.size());
end
sh_cl_wr_cmds.pop_front();
end
end
else
sh_cl_dma_pcis_awvalid <= 1'b0;
end
//
// write Data Channel
//
//
// sh->cl data Write Channel
//
always @(posedge clk_core) begin
if (sh_cl_wr_data.size() != 0) begin
sh_cl_dma_pcis_wdata <= sh_cl_wr_data[0].data;
sh_cl_dma_pcis_wstrb <= sh_cl_wr_data[0].strb;
sh_cl_dma_pcis_wlast <= sh_cl_wr_data[0].last;
sh_cl_dma_pcis_wvalid <= !sh_cl_dma_pcis_wvalid ? 1'b1 :
!cl_sh_dma_pcis_wready ? 1'b1 : 1'b0;
if (cl_sh_dma_pcis_wready && sh_cl_dma_pcis_wvalid) begin
if (debug) begin
$display("[%t] : DEBUG popping wr data fifo - %d", $realtime, sh_cl_wr_data.size());
end
if (sh_cl_dma_pcis_wlast)
h2c_dma_wr_cmd_cnt[sh_cl_wr_data[0].id]--;
h2c_dma_done[sh_cl_wr_data[0].id] = (h2c_dma_wr_cmd_cnt[sh_cl_wr_data[0].id] == 0);
sh_cl_wr_data.pop_front();
end
end
else
sh_cl_dma_pcis_wvalid <= 1'b0;
end
//
// cl->sh B Response Channel
//
always @(posedge clk_core) begin
sh_cl_dma_pcis_bready <= 1'b1;
end
always @(posedge clk_core) begin
AXI_Command resp;
if (cl_sh_dma_pcis_bvalid & sh_cl_dma_pcis_bready) begin
resp.resp = cl_sh_dma_pcis_bresp;
resp.id = cl_sh_dma_pcis_bid;
cl_sh_b_resps.push_back(resp);
end
end
//
// sh->cl Address Read Channel
//
always @(posedge clk_core) begin
if (sh_cl_rd_cmds.size() != 0) begin
sh_cl_dma_pcis_araddr <= sh_cl_rd_cmds[0].addr;
sh_cl_dma_pcis_arid <= sh_cl_rd_cmds[0].id;
sh_cl_dma_pcis_arlen <= sh_cl_rd_cmds[0].len;
sh_cl_dma_pcis_arsize <= /*sh_cl_rd_cmds[0].size*/3'h6;
sh_cl_dma_pcis_arvalid <= !sh_cl_dma_pcis_arvalid ? 1'b1 :
!cl_sh_dma_pcis_arready ? 1'b1 : 1'b0;
if (cl_sh_dma_pcis_arready && sh_cl_dma_pcis_arvalid) begin
if (debug) begin
$display("[%t] : DEBUG popping sh to cl read cmd fifo - %d", $realtime, sh_cl_rd_cmds.size());
end
sh_cl_rd_cmds.pop_front();
end
end
else begin
sh_cl_dma_pcis_arid <= 16'b0;
sh_cl_dma_pcis_arvalid <= 1'b0;
end
end
//
// cl->sh Read Data Channel
//
always @(posedge clk_core) begin
sh_cl_dma_pcis_rready <= (cl_sh_rd_data.size() < 16) ? 1'b1 : 1'b0;
end
always @(posedge clk_core) begin
AXI_Data data;
if (cl_sh_dma_pcis_rvalid & sh_cl_dma_pcis_rready) begin
data.data = cl_sh_dma_pcis_rdata;
data.id = cl_sh_dma_pcis_rid;
data.last = cl_sh_dma_pcis_rlast;
if (debug) begin
for (int i=0; i<16; i++) begin
$display("[%t] - DEBUG read data [%2d]: 0x%08h", $realtime, i, cl_sh_dma_pcis_rdata[(i*32)+:32]);
end
end
cl_sh_rd_data.push_back(data);
end
end
//=================================================
//
// cl->sh PCIeM Interface
//
//=================================================
logic [63:0] host_memory_addr = 0;
AXI_Command host_mem_wr_que[$];
logic first_wr_beat = 1;
int wr_last_cnt = 0;
logic [63:0] wr_addr, wr_addr_t;
always @(posedge clk_core) begin
if (host_mem_wr_que.size() > 0) begin
if (first_wr_beat == 1) begin
wr_addr = host_mem_wr_que[0].addr;
first_wr_beat = 1'b0;
end
if (cl_sh_wr_data.size() > 0) begin
if (debug) begin
$display("[%t] - DEBUG fb: %1d 0x%0128x 0x%016x", $realtime, first_wr_beat, cl_sh_wr_data[0].data, cl_sh_wr_data[0].strb);
end
for(int i=wr_addr[5:2]; i<16; i++) begin
logic [31:0] word;
if (!`TB_TOP.use_c_host_memory)
if (`TB_TOP.sv_host_memory.exists({wr_addr[63:2], 2'b00}))
word = `TB_TOP.sv_host_memory[{wr_addr[63:2], 2'b00}];
else
word = 32'hffff_ffff; // return a default value
else begin
wr_addr_t = {wr_addr[63:2], 2'b00};
for(int k=0; k<4; k++) begin
byte t;
t = `TB_TOP.host_memory_getc(wr_addr_t + k);
word = {t, word[31:8]};
end
end
for(int j=0; j<4; j++) begin
logic [7:0] c;
int index;
index = j + (i * 4);
if (cl_sh_wr_data[0].strb[index]) begin
c = cl_sh_wr_data[0].data >> (index * 8);
//FIX partial DW order word = {c, word[31:8]};
word[8*j+:8] = c;
end
end // for (int j=0; j<4; j++)
if (!`TB_TOP.use_c_host_memory)
begin
`TB_TOP.sv_host_memory[{wr_addr[63:2], 2'b00}] = word;
end
else begin
wr_addr_t = {wr_addr[63:2], 2'b00};
for(int k=0; k<4; k++) begin
byte t;
t = word[7:0];
`TB_TOP.host_memory_putc(wr_addr_t + k, t);
word = word >> 8;
end
end
wr_addr += 4;
end
if (cl_sh_wr_data[0].last == 1) begin
first_wr_beat = 1'b1;
host_mem_wr_que.pop_front();
if (debug) begin
$display("[%t] - DEBUG reseting...", $realtime);
end
end
cl_sh_wr_data.pop_front();
end // if (cl_sh_wr_data.size() > 0)
end // if (host_mem_wr_que.size() > 0)
end
//
// cl->sh Write Address Channel
//
always @(posedge clk_core) begin
AXI_Command cmd;
`ifdef QUESTA_SIM
automatic int awready_cnt = 0;
`else
int awready_cnt = 0;
`endif
if (cl_sh_pcim_awvalid && sh_cl_pcim_awready) begin
cmd.addr = cl_sh_pcim_awaddr;
cmd.id = cl_sh_pcim_awid;
cmd.len = cl_sh_pcim_awlen;
cmd.size = cl_sh_pcim_awsize;
cmd.last = 0;
if(cl_sh_pcim_awsize != 6) begin
$display("FATAL ERROR: AwSize other than 6 are not supported");
$finish;
end
cl_sh_wr_cmds.push_back(cmd);
sh_cl_b_resps.push_back(cmd);
host_mem_wr_que.push_back(cmd);
end
if ((cl_sh_wr_cmds.size() < 4) || (awready_cnt == 0)) begin
sh_cl_pcim_awready <= 1'b1;
awready_cnt = $urandom_range(0, 80);
end
else begin
sh_cl_pcim_awready <= 1'b0;
awready_cnt--;
end
end
//
// cl->sh write Data Channel
//
always @(posedge clk_core) begin
AXI_Data wr_data;
`ifdef QUESTA_SIM
automatic int wready_cnt = 0;
automatic int wready_nonzero_wait = 0;
`else
int wready_cnt = 0;
int wready_nonzero_wait = 0;
`endif
if (sh_cl_pcim_wready && cl_sh_pcim_wvalid) begin
wr_data.data = cl_sh_pcim_wdata;
wr_data.strb = cl_sh_pcim_wstrb;
wr_data.last = cl_sh_pcim_wlast;
cl_sh_wr_data.push_back(wr_data);
if (wr_data.last == 1)
wr_last_cnt += 1;
end // if (sh_cl_pcim_wready && cl_sh_pcim_wvalid)
if ((cl_sh_wr_data.size() > 64) || (wready_cnt > 0)) begin
sh_cl_pcim_wready <= 1'b0;
wready_cnt--;
end
else begin
sh_cl_pcim_wready <= 1'b1;
wready_cnt = $urandom_range(10, 0);
wready_nonzero_wait = $urandom_range(8, 0);
wready_cnt = (wready_nonzero_wait == 0) ? wready_cnt : 0;
end
end
//
// cl->sh B Response Channel
//
always @(posedge clk_core) begin
if (sh_cl_b_resps.size() != 0) begin
if (debug) begin
$display("[%t] : DEBUG resp.size %2d ", $realtime, sh_cl_b_resps.size());
end
if (wr_last_cnt != 0) begin
sh_cl_pcim_bid <= sh_cl_b_resps[0].id;
sh_cl_pcim_bresp <= 2'b00;
sh_cl_pcim_bvalid <= !sh_cl_pcim_bvalid ? 1'b1 :
!cl_sh_pcim_bready ? 1'b1 : 1'b0;
if (cl_sh_pcim_bready && sh_cl_pcim_bvalid) begin
wr_last_cnt -= 1;
sh_cl_b_resps.pop_front();
cl_sh_wr_cmds.pop_front();
end
end
end
else
sh_cl_pcim_bvalid <= 1'b0;
end
//
// sh->cl Address Read Channel
//
always @(posedge clk_core) begin
AXI_Command cmd;
`ifdef QUESTA_SIM
automatic int arready_cnt = 0;
`else
int arready_cnt = 0;
`endif
if (cl_sh_pcim_arvalid && sh_cl_pcim_arready) begin
cmd.addr = cl_sh_pcim_araddr;
cmd.id = cl_sh_pcim_arid;
cmd.len = cl_sh_pcim_arlen;
cmd.size = cl_sh_pcim_arsize;
cmd.last = 0;
if(cl_sh_pcim_arsize != 6) begin
$display("FATAL ERROR: ArSize other than 6 are not supported");
$finish;
end
cl_sh_rd_cmds.push_back(cmd);
sh_cl_rd_data.push_back(cmd);
end
if ((cl_sh_rd_cmds.size() < 4) || (arready_cnt == 0)) begin
sh_cl_pcim_arready <= 1'b1;
arready_cnt = $urandom_range(0, 80);
end
else begin
sh_cl_pcim_arready <= 1'b0;
arready_cnt--;
end
end
//
// sh->cl Read Data Channel
//
logic first_rd_beat;
logic [63:0] rd_addr, rd_addr_t;
always @(posedge clk_core) begin
AXI_Command rd_cmd;
logic [511:0] beat;
if (sh_cl_rd_data.size() != 0) begin
sh_cl_pcim_rid <= sh_cl_rd_data[0].id;
sh_cl_pcim_rresp <= 2'b00;
sh_cl_pcim_rvalid <= !sh_cl_pcim_rvalid ? 1'b1 :
!cl_sh_pcim_rready ? 1'b1 :
!sh_cl_pcim_rlast ? 1'b1 : 1'b0;
sh_cl_pcim_rlast <= (sh_cl_rd_data[0].len == 0) ? 1'b1 :
(sh_cl_rd_data[0].len == 1) && sh_cl_pcim_rvalid && cl_sh_pcim_rready ? 1'b1 : 1'b0;
if (first_rd_beat == 1'b1) begin
rd_addr = sh_cl_rd_data[0].addr;
first_rd_beat = 1'b0;
end
beat = {512{1'b1}};
if (cl_sh_pcim_rready) begin
for(int i=rd_addr[5:2]; i<16; i++) begin
logic [31:0] c;
if (debug) begin
$display("[%t] : DEBUG reading addr 0x%016x", $realtime, rd_addr);
end
if (!`TB_TOP.use_c_host_memory)
if (`TB_TOP.sv_host_memory.exists({rd_addr[63:2], 2'b00}))
c = `TB_TOP.sv_host_memory[{rd_addr[63:2], 2'b00}];
else
c = 32'hffffffff;
else begin
rd_addr_t = {rd_addr[63:2], 2'b00};
for(int k=0; k<4; k++) begin
byte t;
t = `TB_TOP.host_memory_getc(rd_addr_t + k);
c = {t, c[31:8]};
end
end
beat = {c, beat[511:32]};
rd_addr +=4;
end
if (debug) begin
$display("[%t] : DEBUG beat 0x%0128x", $realtime, beat);
end
sh_cl_pcim_rdata <= beat;
end //if(cl_sh_pcim_rready)
end
else begin
sh_cl_pcim_rvalid <= 1'b0;
sh_cl_pcim_rlast <= 1'b0;
first_rd_beat = 1'b1;
end
if (cl_sh_pcim_rready && sh_cl_pcim_rvalid && (sh_cl_rd_data.size() != 0)) begin
if (sh_cl_rd_data[0].len == 0) begin
sh_cl_rd_data.pop_front();
cl_sh_rd_cmds.pop_front();
first_rd_beat = 1'b1;
end
else
sh_cl_rd_data[0].len--;
end
end
//=================================================
//
// Interrupt handling
//
//=================================================
logic [15:0] int_ack;
logic [15:0] int_pend;
initial begin
int_ack = 16'h0000;
int_pend = 16'h0000;
end
always @(posedge clk_core) begin
for (int idx=0; idx<16; idx++) begin
if (cl_sh_apppf_irq_req[idx] == 1'b1) begin
int_pend |= 1'b1 << idx;
end
end
if (|int_ack) begin
for (int idx=0; idx<16; idx++) begin
if (int_ack[idx] == 1'b1) begin
$display("[%t] : Sending ack for interrupt %2d", $realtime, idx);
end
end
end
sh_cl_apppf_irq_ack <= int_ack;
int_ack = 16'h0000;
end
always @(posedge clk_core) begin
for (int idx=0; idx<16; idx++) begin
if (int_pend[idx] == 1'b1) begin
`TB_TOP.int_handler(idx);
end
end
end
axil_bfm sda_axil_bfm(
.axil_clk (clk_core),
.axil_rst_n (sync_rst_n),
.axil_awvalid (sda_cl_awvalid),
.axil_awaddr (sda_cl_awaddr),
.axil_awready (cl_sda_awready),
.axil_wvalid (sda_cl_wvalid),
.axil_wdata (sda_cl_wdata),
.axil_wstrb (sda_cl_wstrb),
.axil_wready (cl_sda_wready),
.axil_bvalid (cl_sda_bvalid),
.axil_bresp (cl_sda_bresp),
.axil_bready (sda_cl_bready),
.axil_arvalid (sda_cl_arvalid),
.axil_araddr (sda_cl_araddr),
.axil_arready (cl_sda_arready),
.axil_rvalid (cl_sda_rvalid),
.axil_rdata (cl_sda_rdata),
.axil_rresp (cl_sda_rresp),
.axil_rready (sda_cl_rready));
axil_bfm ocl_axil_bfm(
.axil_clk (clk_core),
.axil_rst_n (sync_rst_n),
.axil_awvalid (ocl_cl_awvalid),
.axil_awaddr (ocl_cl_awaddr),
.axil_awready (cl_ocl_awready),
.axil_wvalid (ocl_cl_wvalid),
.axil_wdata (ocl_cl_wdata),
.axil_wstrb (ocl_cl_wstrb),
.axil_wready (cl_ocl_wready),
.axil_bvalid (cl_ocl_bvalid),
.axil_bresp (cl_ocl_bresp),
.axil_bready (ocl_cl_bready),
.axil_arvalid (ocl_cl_arvalid),
.axil_araddr (ocl_cl_araddr),
.axil_arready (cl_ocl_arready),
.axil_rvalid (cl_ocl_rvalid),
.axil_rdata (cl_ocl_rdata),
.axil_rresp (cl_ocl_rresp),
.axil_rready (ocl_cl_rready));
// Check core clock frequency when chk_clk_freq is set
always @(posedge clk_core)
begin
if (chk_clk_freq) begin
core_rising_edge = $time;
@(posedge clk_core)
core_clk_period = $time - core_rising_edge;
if (core_clk_period != CORE_DLY * 2) begin
clk_err_count++;
$display("Error - core clk frequency check failed. Expected %x Actual %x", core_clk_period, CORE_DLY);
end
end
end
// Check main clock frequency when chk_clk_freq is set
always @(posedge clk_main_a0)
begin
if (chk_clk_freq) begin
main_rising_edge = $time;
@(posedge clk_main_a0)
main_clk_period = $time - main_rising_edge;
if (main_clk_period != MAIN_A0_DLY * 2) begin
clk_err_count++;
$display("Error - main a0 clk frequency check failed. Expected %x Actual %x", main_clk_period, MAIN_A0_DLY);
end
end
end
// Check extra a1 clock frequency when chk_clk_freq is set
always @(posedge clk_extra_a1)
begin
if (chk_clk_freq) begin
extra_a1_rising_edge = $time;
@(posedge clk_extra_a1)
extra_a1_clk_period = $time - extra_a1_rising_edge;
if (extra_a1_clk_period != EXTRA_A1_DLY * 2) begin
clk_err_count++;
$display("Error - extra a1 clk frequency check failed. Expected %x Actual %x", extra_a1_clk_period, EXTRA_A1_DLY);
end
end
end
// Check clk_hbm_ref clock frequency when chk_clk_freq is set
always @(posedge clk_hbm_ref)
begin
if (chk_clk_freq) begin
hbm_rising_edge = $time;
@(posedge clk_hbm_ref)
hbm_clk_period = $time - hbm_rising_edge;
if (hbm_clk_period != HBM_DLY * 2) begin
clk_err_count++;
$display("Error - hbm clk frequency check failed. Expected %x Actual %x", hbm_clk_period, HBM_DLY);
end
end
end
//=================================================
//
// power_up
//
// Description: asserts and deasserts various resets
// Outputs: None
//
//=================================================
task power_up(input ClockRecipe::A_RECIPE clk_recipe_a = ClockRecipe::A0,
ClockRecipe::B_RECIPE clk_recipe_b = ClockRecipe::B0,
ClockRecipe::C_RECIPE clk_recipe_c = ClockRecipe::C0);
case (clk_recipe_a)
ClockRecipe::A0: begin
MAIN_A0_DLY = 4ns;
CORE_DLY = 4ns;
EXTRA_A1_DLY = 8ns;
end
ClockRecipe::A1: begin
MAIN_A0_DLY = 2ns;
CORE_DLY = 2ns;
EXTRA_A1_DLY = 4ns;
end
ClockRecipe::A2: begin
MAIN_A0_DLY = 32ns;
CORE_DLY = 32ns;
EXTRA_A1_DLY = 32ns;
end
ClockRecipe::A3: begin
MAIN_A0_DLY = 8ns;
CORE_DLY = 8ns;
EXTRA_A1_DLY = 16ns;
end
ClockRecipe::A4: begin
MAIN_A0_DLY = 2.22ns;
CORE_DLY = 2.22ns;
EXTRA_A1_DLY = 4.44ns;
end
ClockRecipe::A5: begin
MAIN_A0_DLY = 2.5ns;
CORE_DLY = 2.5ns;
EXTRA_A1_DLY = 5ns;
end
default: begin
$display("Error - Invalid Clock Profile Selected.");
$finish;
end
endcase
rst_n_i = 1'b0;
rst_main_n_i = 1'b0;
rst_xtra_n_i = 1'b0;
#5000ns;
rst_n_i = 1'b1;
rst_main_n_i = 1'b1;
rst_xtra_n_i = 1'b1;
#1000ns;
endtask // power_up
always @* begin
if ((pcis_pc_asserted === 'b1) || ((pcim_pc_asserted === 'b1) || (ocl_pc_asserted === 'b1) || (sda_pc_asserted === 'b1))) begin
prot_err_count++;
end else if ((pcis_pc_asserted === 1'bx) || (pcim_pc_asserted === 1'bx) || (ocl_pc_asserted === 1'bx) || (sda_pc_asserted === 1'bx)) begin
prot_x_count++;
end
end
//=================================================
//
// set_chk_clk_freq
//
// Description: Starts checking clock frequency
// Outputs: None
//
//=================================================
function void set_chk_clk_freq(logic chk_freq = 1'b1);
$display("[%t] : Start checking clock frequency...", $realtime);
chk_clk_freq = chk_freq;
endfunction // set_chk_clk_freq
//=================================================
//
// chk_prot_err_stat
//
// Description: Checks if there is a protocol checker violation
// Outputs: None
//
//=================================================
function logic chk_prot_err_stat();
$display("[%t] : Checking protocol checker error status...", $realtime);
if ((prot_err_count > 0) || (prot_x_count > 0)) begin
if (prot_err_count > 0) begin
$display("[%t] : *** Protocol Checker Violations Detected. Refer to log file for details about each specific error ***", $realtime);
return 1'b1;
end
if (prot_x_count > 0) begin
$display("[%t] : *** 'X' propagation detected in protocol checker status bits. Please dump waves and look at pc_status bits for more information***", $realtime);
return 1'b1;
end
end // if ((prot_err_count > 0) || (prot_x_count > 0))
else
return 1'b0;
endfunction // chk_prot_err_stat
//=================================================
//
// chk_clk_err_cnt
//
// Description: Checks if there are clock errors
// Outputs: None
//
//=================================================
function logic chk_clk_err_cnt();
$display("[%t] : Checking clock error status...", $realtime);
if (clk_err_count > 0) begin
$display("[%t] : *** Clock Frequency Errors Detected. Refer to log file for details about each specific error ***", $realtime);
return 1'b1;
end
else begin
return 1'b0;
end
endfunction // chk_clk_err_cnt
//=================================================
//
// get_ecc_err_cnt
//
// Description: Checks if there are clock errors
// Outputs: None
//
//=================================================
function int get_ecc_err_cnt();
return ecc_err_cnt;
endfunction
//=================================================
//
// nsec_delay
//
// Description: sets a delay in nsec
// Outputs: None
//
//=================================================
task nsec_delay(int dly = 10000);
# (dly * 1ns);
endtask
//=================================================
//
// set_virtual_dip_switch
//
// Description: writes virtual dip switches
// Outputs: None
//
//=================================================
function void set_virtual_dip_switch(int dip_switch);
sh_cl_status_vdip = dip_switch[15:0];
endfunction
//=================================================
//
// get_virtual_dip_switch
//
// Description: reads virtual dip switch status
// Outputs: dip_status
//
//=================================================
function logic[15:0] get_virtual_dip_switch();
return sh_cl_status_vdip;
endfunction
//=================================================
//
// get_virtual_led
//
// Description: reads virtual led status
// Outputs: led status
//
//=================================================
function logic[15:0] get_virtual_led();
return cl_sh_status_vled;
endfunction
//=================================================
//
// get_global_couter_0
//
// Description: reads global counter 0 value;
// Outputs: 64 bit counter
//
//=================================================
function logic[63:0] get_global_counter_0();
return sh_cl_glcount0;
endfunction // get_global_counter_0
//=================================================
//
// get_global_couter_1
//
// Description: reads global counter 1 value;
// Outputs: 64 bit counter
//
//=================================================
function logic[63:0] get_global_counter_1();
return sh_cl_glcount1;
endfunction // get_global_counter_0
//=================================================
//
// Kernel_reset
//
// Description: sets kernel_reset
// Outputs: None
//
//=================================================
function void kernel_reset(input logic d = 1);
kernel_rst_n = d;
endfunction
//=================================================
//
// power_down
//
// Description: deasserts various resets
// Outputs: None
//
//=================================================
task power_down;
#50ns;
rst_n_i = 1'b0;
rst_main_n_i = 1'b0;
#50ns;
endtask // power_down
//=================================================
//
// issue_flr
//
// Description: issue a FLR command
// Outputs: None
//
//=================================================
task issue_flr();
sh_cl_flr_assert = 1'b1;
wait(cl_sh_flr_done == 1);
sh_cl_flr_assert = 1'b0;
endtask
//=================================================
//
// map_host_memory
//
// Description: used to connect C host memory to simulation memory.
// Outputs: None
//
//=================================================
task map_host_memory(input logic [63:0] addr);
if (debug) begin
$display("[%t] : DEBUG mapping host memory to 0x%16x", $realtime, addr);
end
host_memory_addr = addr;
`TB_TOP.use_c_host_memory = 1'b1;
endtask // map_host_memory
//=================================================
//
// set_ack_bit
//
// Description: used to acknowledge an interrupt and clear pending bit
// Outputs: None
//
//=================================================
function void set_ack_bit(input int int_num);
int_ack[int_num] = 1'b1;
int_pend[int_num] = 1'b0;
endfunction
//=================================================
//
// poke
//
// Description: used to write a single beat of data at addr into one of the four CL AXI ports specified by intf.
// Intf
// 0 = PCIS
// 1 = SDA
// 2 = OCL
//
// id - AXI bus ID
//
// Size
// 0 = 1 byte, 1 = 2 bytes, 2 = 4 bytes (32 bits), 3 = 8 bytes (64 bits)
//
// Outputs: None
//
//=================================================
task poke(input logic [63:0] addr,
logic [511:0] data,
logic [5:0] id = 6'h0,
DataSize::DATA_SIZE size = DataSize::UINT32,
AxiPort::AXI_PORT intf = AxiPort::PORT_DMA_PCIS);
logic [63:0] strb;
case (size)
DataSize::UINT8 : strb = 64'b0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0001;
DataSize::UINT16 : strb = 64'b0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0011;
DataSize::UINT32 : strb = 64'b0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_1111;
DataSize::UINT64 : strb = 64'b0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_1111_1111;
DataSize::UINT128: strb = 64'b0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_1111_1111_1111_1111;
DataSize::UINT256: strb = 64'b0000_0000_0000_0000_0000_0000_0000_0000_1111_1111_1111_1111_1111_1111_1111_1111;
DataSize::UINT512: strb = 64'b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111;
default: begin
$display("FATAL ERROR - Invalid size specified");
$finish;
end
endcase // case (size)
case (intf)
AxiPort::PORT_DMA_PCIS: begin
AXI_Command axi_cmd;
AXI_Data axi_data;
logic [1:0] resp;
axi_cmd.addr = addr;
axi_cmd.len = 0;
axi_cmd.size = size;
axi_cmd.id = id;
sh_cl_wr_cmds.push_back(axi_cmd);
axi_data.data = data << (addr[5:0] * 8);
axi_data.strb = strb << addr[5:0];
axi_data.id = id;
axi_data.last = 1'b1;
#20ns sh_cl_wr_data.push_back(axi_data);
while (cl_sh_b_resps.size() == 0)
#20ns;
resp = cl_sh_b_resps[0].resp;
cl_sh_b_resps.pop_front();
end
AxiPort::PORT_SDA: begin
sda_axil_bfm.poke(addr, data);
end
AxiPort::PORT_OCL: begin
ocl_axil_bfm.poke(addr, data);
end
default: begin
$display("FATAL ERROR - Invalid CL port specified");
$finish;
end
endcase // case (intf)
endtask // poke
task poke_pcis(input logic [63:0] addr,
logic [511:0] data,
logic [63:0] strb,
logic [5:0] id = 6'h0);
AXI_Command axi_cmd;
AXI_Data axi_data;
logic [1:0] resp;
axi_cmd.addr = addr;
axi_cmd.len = 0;
axi_cmd.id = id;
sh_cl_wr_cmds.push_back(axi_cmd);
axi_data.data = data;
axi_data.strb = strb;
axi_data.id = id;
axi_data.last = 1'b1;
#20ns sh_cl_wr_data.push_back(axi_data);
while (cl_sh_b_resps.size() == 0)
#20ns;
resp = cl_sh_b_resps[0].resp;
cl_sh_b_resps.pop_front();
endtask // poke_pcis
//===========================================================================
//
// poke_pcis_wc
//
// Description: Write combine version of poke (will only work on PCIS Intf)
// id - AXI bus ID
// addr - Address for transfer
// data[$][31:0] - Queue of DWs
// size - AXI size
// Outputs: None
//
//==========================================================================
task poke_pcis_wc(input logic [63:0] addr,
logic [31:0] data [$],
logic [5:0] id = 6'h0,
logic [2:0] size = 3'd6
);
AXI_Command axi_cmd;
AXI_Data axi_data;
logic [1:0] resp;
logic [31:0] dw_idx;
logic [31:0] slice_dw_idx;
logic [31:0] total_bytes;
logic [31:0] max_bytes;
total_bytes = data.size() * 4;
if (size == 3'd2 && ((total_bytes != 4) || (addr[5:0] != 6'd0))) begin
$display("FATAL ERROR: poke_pcis_wc:: Size = 2. DW count should be equal to 1 and addr should be DW aligned");
$finish;
end
if (size != 3'd6 && size != 3'd2) begin
$display("FATAL ERROR: poke_pcis_wc:: Only Size = 2 or 6 supported");
$finish;
end
max_bytes = 4096 - addr[5:0];
if (total_bytes > max_bytes) begin
$display("FATAL ERROR: poke_pcis_wc:: AXI transaction is more than 4096 bytes");
$finish;
end
axi_cmd.addr = addr;
axi_cmd.len = (total_bytes + addr[5:0]) % 64 ? ((total_bytes + addr[5:0])>>6) : ((total_bytes + addr[5:0])>>6) - 1;
axi_cmd.size = size;
axi_cmd.id = id;
sh_cl_wr_cmds.push_back(axi_cmd);
dw_idx = 0;
for (int idx = 0; idx <= axi_cmd.len; idx++) begin
axi_data.id = id;
axi_data.data = 512'd0;
axi_data.strb = 512'd0;
slice_dw_idx = idx == 0 ? addr[5:2] : 0;
while ((slice_dw_idx < 16) && (dw_idx < total_bytes/4)) begin
assert(data.size() > 0) else
begin
$display("FATAL ERROR: poke_pcis_wc:: Something went wrong. data queue already empty");
$finish;
end;
axi_data.data[slice_dw_idx*32 +: 32] = data.pop_front();
axi_data.strb[slice_dw_idx*4 +: 4] = 4'hf;
dw_idx++;
slice_dw_idx++;
end
axi_data.last = (axi_cmd.len == idx);
sh_cl_wr_data.push_back(axi_data);
end // for (idx = 0; idx <= len; idx++)
while (cl_sh_b_resps.size() == 0)
#20ns;
resp = cl_sh_b_resps[0].resp;
cl_sh_b_resps.pop_front();
endtask // poke_pcis_wc
//=================================================
//
// peek
//
// Description: used to read a single beat of data at addr from one of the four CL AXI ports specified by intf.
// Intf
// 0 = PCIS
// 1 = SDA
// 2 = OCL
//
// id - AXI bus ID
//
// Size
// 0 = 1 byte, 1 = 2 bytes, 2 = 4 bytes (32 bits), 3 = 8 bytes (64 bits)
//
// Outputs: Read Data Value
//
//=================================================
task peek(input logic [63:0] addr,
output logic [511:0] data,
input logic [5:0] id = 6'h0,
DataSize::DATA_SIZE size = DataSize::UINT32,
AxiPort::AXI_PORT intf = AxiPort::PORT_DMA_PCIS);
data = 0;
case (intf)
AxiPort::PORT_DMA_PCIS : begin
AXI_Command axi_cmd;
int byte_idx;
int mem_arr_idx;
axi_cmd.addr = addr;
axi_cmd.len = 0;
axi_cmd.size = size;
axi_cmd.id = id;
sh_cl_rd_cmds.push_back(axi_cmd);
byte_idx = addr[5:0];
mem_arr_idx = byte_idx*8;
while (cl_sh_rd_data.size() == 0)
#20ns;
for (int num_bytes =0; num_bytes < 2**size; num_bytes++) begin
data[(num_bytes*8)+:8] = cl_sh_rd_data[0].data[(mem_arr_idx+(num_bytes*8))+:8];
end
cl_sh_rd_data.pop_front();
end // case: 0
AxiPort::PORT_SDA : begin
sda_axil_bfm.peek(addr, data);
end
AxiPort::PORT_OCL : begin
ocl_axil_bfm.peek(addr, data);
end
default: begin
$display("FATAL ERROR - Invalid CL port specified");
$finish;
end
endcase // case (intf)
endtask // peek
task peek_pcis(input logic [63:0] addr,
output logic [511:0] data,
input logic [5:0] id = 6'h0);
AXI_Command axi_cmd;
axi_cmd.addr = addr;
axi_cmd.len = 0;
axi_cmd.id = id;
sh_cl_rd_cmds.push_back(axi_cmd);
while (cl_sh_rd_data.size() == 0)
#20ns;
data = cl_sh_rd_data[0].data;
cl_sh_rd_data.pop_front();
endtask // peek_pcis
//=================================================
//
// dma_buffer_to_cl
//
// Description: used to move a data buffer to the CL via the PCIS AXI interface using one of four channels.
// The size of the transfer is determined by the number of bytes in the buffer.
//
// chan = 0-3 channel number
// buffer = AXI bus ID
// cl_addr = starting CL AXI addr
//
//
// Outputs: Read Data Value
//
//=================================================
function void dma_buffer_to_cl(input logic [1:0] chan, logic [63:0] src_addr, logic [63:0] cl_addr, logic [27:0] len);
DMA_OP dop;
dop.buffer = src_addr;
dop.cl_addr = cl_addr;
dop.len = len;
h2c_dma_list[chan].push_back(dop);
endfunction // dma_buffer_to_cl
function automatic void dma_cl_to_buffer(input logic [1:0] chan, logic [63:0] dst_addr, input [63:0] cl_addr, logic [27:0] len);
DMA_OP dop;
dop.buffer = dst_addr;
dop.cl_addr = cl_addr;
dop.len = len;
c2h_dma_list[chan].push_back(dop);
endfunction // dma_cl_to_buffer
function void start_dma_to_cl(input int chan);
h2c_dma_started[chan] = 1'b1;
h2c_dma_done[chan] = 1'b0;
endfunction // start_dma_to_cl
function void start_dma_to_buffer(input int chan);
c2h_dma_started[chan] = 1'b1;
c2h_dma_done[chan] = 1'b0;
endfunction // start_dma_to_buffer
function bit is_dma_to_cl_done(input int chan); // 1 = done
//$display("In function is_dma_to_cl_done h2c_dma_done is %x \n", h2c_dma_done[chan]);
return h2c_dma_done[chan];
endfunction // is_dma_to_cl_done
function bit is_dma_to_buffer_done(input int chan); // 1 = done
//$display("In function is_dma_to_buffer_done c2h_dma_done is %x \n", c2h_dma_done[chan]);
return c2h_dma_done[chan];
endfunction // is_dma_to_buffer_done
function bit is_ddr_ready(); // 1 = done
return ddr_is_ready;
endfunction // is_ddr_ready
//=================================================
//
// sh->cl xdma Interface
//
//=================================================
always @(negedge rst_n or posedge clk_core) begin
if (!rst_n) begin
h2c_dma_started <= 4'b0;
c2h_dma_started <= 4'b0;
end
else begin
AXI_Command axi_cmd;
AXI_Data axi_data;
DMA_OP dop;
logic [63:0] host_memory;
int num_of_data_beats;
int byte_cnt;
int num_bytes;
logic [63:0] aligned_addr;
bit last_beat;
logic [5:0] start_addr;
bit aligned;
bit last_data_beat;
num_of_data_beats = 0;
last_data_beat = 0;
byte_cnt = 0;
num_bytes = 0;
aligned_addr = 0;
last_beat = 0;
start_addr = 0;
aligned = 0;
for (int chan = 0; chan < 4; chan++) begin
if ((h2c_dma_started[chan] != 1'b0) && (h2c_dma_list[chan].size() > 0)) begin
dop = h2c_dma_list[chan].pop_front();
if (dop.cl_addr[5:0] !== 6'h00) begin
$fatal("Address in a SH->CL transfer should be aligned to 64 byte boundary for address %x \n", dop.cl_addr);
end
aligned_addr = {dop.cl_addr[63:6], 6'h00};
num_of_data_beats = ((dop.len + dop.cl_addr[5:0] - 1)/64) + 1;
byte_cnt = 0;
last_beat = ((dop.len + dop.cl_addr[5:0])%64 > 0);
start_addr = dop.cl_addr[5:0];
aligned = (aligned_addr == dop.cl_addr);
for(int burst_cnt=0; burst_cnt < num_of_data_beats; ) begin
if(burst_cnt == 0) begin // if first data beat
axi_cmd.addr = dop.cl_addr;
axi_cmd.len = (num_of_data_beats==1) ? 0 :
aligned ? (num_of_data_beats - 1 - last_beat) : 0;
// handle the condition if addr is crossing 4k page boundry
if(dop.cl_addr[11:0] + ((axi_cmd.len + 1) * 64) > 4095) begin
axi_cmd.len = ((4096 - dop.cl_addr[11:0])/64) - 1;
end
end
else if((num_of_data_beats - 1) - burst_cnt == 0) begin // last data beat
axi_cmd.addr = (aligned_addr + (burst_cnt * 64));
axi_cmd.len = 0;
end
else begin // intermediate data beats
axi_cmd.addr = (aligned_addr + (burst_cnt * 64));
axi_cmd.len = num_of_data_beats - last_beat - burst_cnt - 1;
// handle the condition if addr is crossing 4k page boundry
$display("Address is going to cross 4K boundary \n");
if( (axi_cmd.addr[11:0] + ((axi_cmd.len + 1) * 64)) > 4095) begin
axi_cmd.len = ((4096 - axi_cmd.addr[11:0])/64) - 1;
end
end
axi_cmd.id = chan;
axi_cmd.size = 6;
sh_cl_wr_cmds.push_back(axi_cmd);
h2c_dma_wr_cmd_cnt[chan]++;
// loop to do multiple data beats
for(int j = 0; j <= axi_cmd.len; j++) begin
axi_data.data = 0;
axi_data.strb = 64'b0;
axi_data.id = chan;
last_data_beat = (((num_of_data_beats - 1) - burst_cnt) == 0) ? 1 : 0;
num_bytes = last_beat ? (dop.len + dop.cl_addr[5:0])%64 : 64;
axi_data.last = (j == axi_cmd.len) ? 1 : 0;
if(num_of_data_beats == 1) begin
num_bytes = (dop.len == 64) ? 64 : (dop.len)%64;
for(int i=start_addr[5:0]; i < (num_bytes+start_addr[5:0]); i++) begin
axi_data.data = axi_data.data | `TB_TOP.hm_get_byte(.addr(dop.buffer + byte_cnt)) << 8*i;
axi_data.strb = axi_data.strb | 1 << i;
byte_cnt++;
end
end
else if(last_data_beat) begin
for(int i=0; i < num_bytes; i++) begin
axi_data.data = axi_data.data | `TB_TOP.hm_get_byte(.addr(dop.buffer + byte_cnt)) << 8*i;
axi_data.strb = axi_data.strb | 1 << i;
byte_cnt++;
end
end
else begin
for(int i=start_addr[5:0]; i < 64; i++) begin
axi_data.data = {`TB_TOP.hm_get_byte(.addr(dop.buffer + byte_cnt)), axi_data.data[511:8]};
axi_data.strb = {1'b1, axi_data.strb[63:1]};
byte_cnt++;
end
end
sh_cl_wr_data.push_back(axi_data);
start_addr = 0;
burst_cnt++;
end // for(int j = 0; j <= axi_cmd.len; j++) begin
end // for(int burst_cnt=0; burst_cnt < num_of_data_beats; )
end // if ((h2c_dma_started[chan] != 1'b0) && (h2c_dma_list[chan].size() > 0))
end // for (int chan = 0; chan < 4; chan++)
end // else
end // always
//=================================================
//
// cl->sh xdma data Interface
//
//=================================================
always @(negedge rst_n or posedge clk_core) begin
if (!rst_n) begin
c2h_dma_done <= 1'b0;
end
else begin
DMA_OP dop;
static int byte_cnt[4];
for (int chan = 0; chan < 4; chan++) begin
if((cl_sh_rd_data.size() > 0) && (c2h_dma_started[chan] != 1'b0)) begin
if(chan == cl_sh_rd_data[0].id) begin
dop = c2h_data_dma_list[chan].pop_front();
for (int i = dop.cl_addr[5:0]; i < 64 ; i++) begin
`TB_TOP.hm_put_byte(.addr(dop.buffer + byte_cnt[chan]), .d(cl_sh_rd_data[0].data[(i*8)+:8]));
if (debug) begin
$display("[%t] - DEBUG read data dop.buffer[%2d]: %0x read_que data: %0x",
$realtime, i, dop.buffer[i], cl_sh_rd_data[0].data[(i*8)+:8]);
end
byte_cnt[chan]++;
end
c2h_dma_done[chan] = (c2h_data_dma_list[chan].size() == 0);
if ((c2h_dma_done[chan]) && (cl_sh_rd_data[0].last == 1)) c2h_dma_started[chan] = 0;
if ((cl_sh_rd_data[0].last == 1) && (byte_cnt[chan] >= dop.len)) // end of current DMA op, reset byte count
byte_cnt[chan] = 0;
if (cl_sh_dma_pcis_rresp == 2'b10) begin
if (ECC_EN == 1) begin
ecc_err_cnt++;
$display("ECC error detected in the read data from CL. A SLVERR response is returned\n");
end
else begin
ecc_err_cnt = 0;
$display("CL returned SLVERR on READ Response \n");
end
end
cl_sh_rd_data.pop_front();
end // if (chan == cl_sh_rd_data[0].id)
end
end
end
end
//=================================================
//
// cl->sh xdma Interface
//
//=================================================
always @(negedge rst_n or posedge clk_core) begin
if (!rst_n) begin
h2c_dma_started <= 4'b0;
c2h_dma_started <= 4'b0;
end
else begin
AXI_Command axi_cmd;
AXI_Data axi_data;
DMA_OP dop;
DMA_OP data_dop;
int num_of_data_beats;
bit aligned;
logic [63:0] aligned_addr;
bit last_beat;
num_of_data_beats = 0;
aligned = 0;
aligned_addr = 0;
last_beat = 0;
for (int chan = 0; chan < 4; chan++) begin
if ((c2h_dma_started[chan] != 1'b0) && (c2h_dma_list[chan].size() > 0)) begin
dop = c2h_dma_list[chan].pop_front();
if (dop.cl_addr[5:0] !== 6'h00) begin
$fatal("Address in a CL->SH transfer should be aligned to 64 byte boundary");
end
num_of_data_beats = ((dop.len + dop.cl_addr[5:0] - 1)/64) + 1;
aligned_addr = {dop.cl_addr[63:6], 6'h00};
aligned = (aligned_addr == dop.cl_addr);
last_beat = ((dop.len + dop.cl_addr[5:0])%64 > 0);
for(int burst_cnt=0; burst_cnt < num_of_data_beats; ) begin
if(burst_cnt == 0) begin // if first data beat
axi_cmd.addr = dop.cl_addr;
axi_cmd.len = (num_of_data_beats==1) ? 0 :
aligned ? (num_of_data_beats - 1 - last_beat) : 0;
// handle the condition if addr is crossing 4k page boundry
if(aligned && (dop.cl_addr[11:0] + ((axi_cmd.len + 1) * 64) > 4095)) begin
axi_cmd.len = ((4096 - dop.cl_addr[11:0])/64) - 1;
end
axi_cmd.id = chan;
end
else if((num_of_data_beats - 1) - burst_cnt == 0) begin // last data beat
axi_cmd.addr = (aligned_addr + (burst_cnt * 64));
axi_cmd.len = 0;
axi_cmd.id = chan;
end
else begin // intermediate data beats
axi_cmd.addr = (aligned_addr + (burst_cnt * 64));
axi_cmd.len = num_of_data_beats - last_beat - burst_cnt - 1;
// handle the condition if addr is crossing 4k page boundry
if( (axi_cmd.addr[11:0] + ((axi_cmd.len + 1) * 64)) > 4095) begin
axi_cmd.len = ((4096 - axi_cmd.addr[11:0])/64) - 1;
end
axi_cmd.id = chan;
end
axi_cmd.size = 6;
sh_cl_rd_cmds.push_back(axi_cmd);
for(int i = 0; i <= axi_cmd.len; i++) begin
data_dop.buffer = dop.buffer;
data_dop.cl_addr = (axi_cmd.addr + (i*64));
data_dop.len = dop.len;
c2h_data_dma_list[chan].push_back(data_dop);
burst_cnt++;
end // for(int i = 0; i <= axi_cmd.len; i++)
end // for(int burst_cnt=0; burst_cnt < num_of_data_beats; )
end // if ((c2h_dma_started[chan] != 1'b0) && (c2h_dma_list[chan].size() > 0))
end // for (int chan = 0; chan < 4; chan++)
end // else begin
end // always
task poke_stat(input logic [7:0] addr, string intf, logic[31:0] data);
case (intf)
"ddr": begin
@ (posedge clk_main_a0);
sh_cl_ddr_stat_wr <= 1;
sh_cl_ddr_stat_addr <= addr;
sh_cl_ddr_stat_wdata <= data;
sh_cl_ddr_stat_rd <= 0;
@ (posedge clk_main_a0);
sh_cl_ddr_stat_wr <= 0;
@ (posedge clk_main_a0);
while (cl_sh_ddr_stat_ack !== 1'b1)
@ (posedge clk_main_a0);
sh_cl_ddr_stat_wdata <= 'hX;
end
"hbm": begin
tb.poke_ocl(.addr(`HBM_STAT_BUS_CL), .data(data));
end
default: begin
$display("FATAL ERROR - Invalid DDR/HBM");
$finish;
end
endcase // case (intf)
endtask
task peek_stat(input logic [7:0] addr, string intf, output logic[31:0] data);
case (intf)
"ddr": begin
@ (posedge clk_main_a0);
sh_cl_ddr_stat_wr <= 0;
sh_cl_ddr_stat_addr <= addr;
sh_cl_ddr_stat_rd <= 1;
@ (posedge clk_main_a0);
sh_cl_ddr_stat_rd <= 0;
while (cl_sh_ddr_stat_ack !== 1'b1)
@ (posedge clk_main_a0);
#1;
data = cl_sh_ddr_stat_rdata;
@ (posedge clk_main_a0);
end
"hbm": begin
tb.peek_ocl(.addr(`HBM_STAT_BUS_CL), .data(data));
end
default: begin
$display("FATAL ERROR - Invalid DDR/HBM");
$finish;
end
endcase // case (intf)
endtask
task automatic wait_clock(int count);
repeat (count) begin
@ (posedge clk_core);
end
endtask : wait_clock
initial begin
hbm_apb_preset_n_1 = 1'b0;
hbm_apb_preset_n_0 = 1'b0;
#100ns;
hbm_apb_preset_n_1 = 1'b1;
hbm_apb_preset_n_0 = 1'b1;
end
endmodule // sh_bfm