hdk/common/verif/models/sh_bfm/sh_bfm.sv (1,877 lines of code) (raw):

// ============================================================================ // Amazon FPGA Hardware Development Kit // // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // // Licensed under the Amazon Software License (the "License"). You may not use // this file except in compliance with the License. A copy of the License is // located at // // http://aws.amazon.com/asl/ // // or in the "license" file accompanying this file. This file is distributed on // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or // implied. See the License for the specific language governing permissions and // limitations under the License. // ============================================================================ `ifndef TB_TOP `define TB_TOP tb `endif `define HBM_STAT_BUS_CL 64'h0300 module sh_bfm #( parameter NUM_HMC = 4, parameter NUM_QSFP = 4, parameter NUM_PCIE = 1, parameter NUM_GTY = 4, parameter NUM_I2C = 2, parameter NUM_POWER = 4 )( //--------------------------------------------------------------------------- // cl_ports_sh_bfm.vh is generated from cl_ports.vh in $(HDK_SHELL_DESIGN_DIR)/interfaces. // This is to ensure that there is no integration issues. //--------------------------------------------------------------------------- `include "cl_ports_sh_bfm.vh" ); `include "axi_bfm_defines.svh" import tb_type_defines_pkg::*; AXI_Command sh_cl_wr_cmds[$]; AXI_Data sh_cl_wr_data[$]; AXI_Command sh_cl_rd_cmds[$]; AXI_Data cl_sh_rd_data[$]; AXI_Command sh_cl_b_resps[$]; AXI_Command cl_sh_wr_cmds[$]; AXI_Data cl_sh_wr_data[$]; AXI_Command cl_sh_rd_cmds[$]; AXI_Command sh_cl_rd_data[$]; AXI_Command cl_sh_b_resps[$]; logic clk_core; logic clk_extra_a1; logic kernel_rst_n; logic rst_n; logic pre_sync0_rst_n; logic pre_sync1_rst_n; logic pre_sync2_rst_n; logic pre_sync3_rst_n; logic sync_rst_n; logic intf_sync_rst_n; logic ddr_user_clk; logic ddr_user_rst; logic ddr_user_rst_n; logic ddr_is_ready; logic ddr_is_ready_presync; logic ddr_is_ready_sync; bit debug; logic chk_clk_freq = 1'b0; logic ECC_EN; int ecc_err_cnt=0; typedef struct { logic [63:0] buffer; logic [27:0] len; logic [63:0] cl_addr; } DMA_OP; DMA_OP h2c_dma_list[0:3][$]; int h2c_dma_wr_cmd_cnt[0:3]; DMA_OP c2h_dma_list[0:3][$]; DMA_OP c2h_data_dma_list[0:3][$]; logic [3:0] h2c_dma_started; logic [3:0] c2h_dma_started; logic [3:0] c2h_dma_done; logic [3:0] h2c_dma_done; logic [7:0] read_data_buffer[]; real MAIN_A0_DLY = 4ns; real CORE_DLY = 4ns; real EXTRA_A1_DLY = 8ns; real HBM_DLY = 5ns; real main_rising_edge; real core_rising_edge; real extra_a1_rising_edge; real hbm_rising_edge; real main_clk_period; real core_clk_period; real extra_a1_clk_period; real hbm_clk_period; logic [96:0] pcis_pc_status; logic pcis_pc_asserted; logic [96:0] pcim_pc_status; logic pcim_pc_asserted; logic [96:0] ocl_pc_status; logic ocl_pc_asserted; logic [96:0] sda_pc_status; logic sda_pc_asserted; int prot_err_count; int clk_err_count; int prot_x_count; int counter; logic [63:0] glcount0, glcount1; //------------------------------------------------------------------------------------------------------------- // Xilinx AXI Protocol Checker Instance (for CL_SH_DMA_PCIS*). // Protocol checker checks for protocol violations on the interface where protocol checker // is instantiated. This will help the CL designers in catiching protocol violations before // testing with real system. Refer to hdk/common/verif/models/xilinx_axi_pc/axi_protocol_checker_v1_1_vl_rfs.v // for more details about each PC_STATUS bit. //------------------------------------------------------------------------------------------------------------ axi_protocol_checker_v1_1_12_top #( .C_AXI_PROTOCOL (0), .C_AXI_ID_WIDTH (16), .C_AXI_DATA_WIDTH (512), .C_AXI_ADDR_WIDTH (64), .C_AXI_AWUSER_WIDTH (1), .C_AXI_ARUSER_WIDTH (1), .C_AXI_WUSER_WIDTH (1), .C_AXI_RUSER_WIDTH (1), .C_AXI_BUSER_WIDTH (1), .C_PC_MAXRBURSTS (32), .C_PC_MAXWBURSTS (32), .C_PC_EXMON_WIDTH (0), .C_PC_AW_MAXWAITS (`MAXWAITS), .C_PC_AR_MAXWAITS (`MAXWAITS), .C_PC_W_MAXWAITS (`MAXWAITS), .C_PC_R_MAXWAITS (`MAXWAITS), .C_PC_B_MAXWAITS (`MAXWAITS), .C_PC_MESSAGE_LEVEL (2), .C_PC_SUPPORTS_NARROW_BURST(1), .C_PC_MAX_BURST_LENGTH (256), .C_PC_HAS_SYSTEM_RESET (1), .C_PC_STATUS_WIDTH (97) ) axi_pc_mstr_inst_pcis ( .pc_status (pcis_pc_status), .pc_asserted (pcis_pc_asserted), .system_resetn (rst_main_n), .aclk (clk_main_a0), .aresetn (rst_main_n), .pc_axi_awid (sh_cl_dma_pcis_awid), .pc_axi_awaddr (sh_cl_dma_pcis_awaddr), .pc_axi_awlen (sh_cl_dma_pcis_awlen), .pc_axi_awsize (sh_cl_dma_pcis_awsize), .pc_axi_awburst (2'b01), .pc_axi_awlock (1'b0), .pc_axi_awcache (4'b0000), .pc_axi_awprot (3'b000), .pc_axi_awqos (4'b0000), .pc_axi_awregion (4'b0000), .pc_axi_awuser (1'h0), .pc_axi_awvalid (sh_cl_dma_pcis_awvalid), .pc_axi_awready (cl_sh_dma_pcis_awready), .pc_axi_wid (16'h0000), // AXI3 only .pc_axi_wlast (sh_cl_dma_pcis_wlast), .pc_axi_wdata (sh_cl_dma_pcis_wdata), .pc_axi_wstrb (sh_cl_dma_pcis_wstrb), .pc_axi_wuser (1'h0), .pc_axi_wvalid (sh_cl_dma_pcis_wvalid), .pc_axi_wready (cl_sh_dma_pcis_wready), .pc_axi_bid (cl_sh_dma_pcis_bid), .pc_axi_bresp (cl_sh_dma_pcis_bresp), .pc_axi_buser (1'h0), .pc_axi_bvalid (cl_sh_dma_pcis_bvalid), .pc_axi_bready (sh_cl_dma_pcis_bready), .pc_axi_arid (sh_cl_dma_pcis_arid), .pc_axi_araddr (sh_cl_dma_pcis_araddr), .pc_axi_arlen (sh_cl_dma_pcis_arlen), .pc_axi_arsize (sh_cl_dma_pcis_arsize), .pc_axi_arburst (2'b01), .pc_axi_arlock (1'b0), .pc_axi_arcache (4'b0000), .pc_axi_arprot (3'b000), .pc_axi_arqos (4'b0000), .pc_axi_arregion (4'b0000), .pc_axi_aruser (1'h0), .pc_axi_arvalid (sh_cl_dma_pcis_arvalid), .pc_axi_arready (cl_sh_dma_pcis_arready), .pc_axi_rid (cl_sh_dma_pcis_rid), .pc_axi_rlast (cl_sh_dma_pcis_rlast), .pc_axi_rdata (cl_sh_dma_pcis_rdata), .pc_axi_rresp (cl_sh_dma_pcis_rresp), .pc_axi_ruser (1'h0), .pc_axi_rvalid (cl_sh_dma_pcis_rvalid), .pc_axi_rready (sh_cl_dma_pcis_rready) ); //---------------------------------------------------------------- // Xilinx AXI Protocol Checker Instance (for CL_SH_PCIM*) //---------------------------------------------------------------- axi_protocol_checker_v1_1_12_top #( .C_AXI_PROTOCOL (0), .C_AXI_ID_WIDTH (16), .C_AXI_DATA_WIDTH (512), .C_AXI_ADDR_WIDTH (64), .C_AXI_AWUSER_WIDTH (1), .C_AXI_ARUSER_WIDTH (1), .C_AXI_WUSER_WIDTH (1), .C_AXI_RUSER_WIDTH (1), .C_AXI_BUSER_WIDTH (1), .C_PC_MAXRBURSTS (32), .C_PC_MAXWBURSTS (32), .C_PC_EXMON_WIDTH (0), .C_PC_AW_MAXWAITS (`MAXWAITS), .C_PC_AR_MAXWAITS (`MAXWAITS), .C_PC_W_MAXWAITS (`MAXWAITS), .C_PC_R_MAXWAITS (`MAXWAITS), .C_PC_B_MAXWAITS (`MAXWAITS), .C_PC_MESSAGE_LEVEL (0), .C_PC_SUPPORTS_NARROW_BURST(1), .C_PC_MAX_BURST_LENGTH (256), .C_PC_HAS_SYSTEM_RESET (1), .C_PC_STATUS_WIDTH (97) ) axi_pc_mstr_inst_pcim ( .pc_status (pcim_pc_status), .pc_asserted (pcim_pc_asserted), .system_resetn (rst_main_n), .aclk (clk_main_a0), .aresetn (rst_main_n), .pc_axi_awid (cl_sh_pcim_awid), .pc_axi_awaddr (cl_sh_pcim_awaddr), .pc_axi_awlen (cl_sh_pcim_awlen), .pc_axi_awsize (cl_sh_pcim_awsize), .pc_axi_awburst (2'b01), .pc_axi_awlock (1'b0), .pc_axi_awcache (4'b0000), .pc_axi_awprot (3'b000), .pc_axi_awqos (4'b0000), .pc_axi_awregion (4'b0000), .pc_axi_awuser (1'H0), .pc_axi_awvalid (cl_sh_pcim_awvalid), .pc_axi_awready (sh_cl_pcim_awready), .pc_axi_wid (16'h0000), // AXI3 only .pc_axi_wlast (cl_sh_pcim_wlast), .pc_axi_wdata (cl_sh_pcim_wdata), .pc_axi_wstrb (cl_sh_pcim_wstrb), .pc_axi_wuser (1'H0), .pc_axi_wvalid (cl_sh_pcim_wvalid), .pc_axi_wready (sh_cl_pcim_wready), .pc_axi_bid (sh_cl_pcim_bid), .pc_axi_bresp (sh_cl_pcim_bresp), .pc_axi_buser (1'H0), .pc_axi_bvalid (sh_cl_pcim_bvalid), .pc_axi_bready (cl_sh_pcim_bready), .pc_axi_arid (cl_sh_pcim_arid), .pc_axi_araddr (cl_sh_pcim_araddr), .pc_axi_arlen (cl_sh_pcim_arlen), .pc_axi_arsize (cl_sh_pcim_arsize), .pc_axi_arburst (2'b01), .pc_axi_arlock (1'b0), .pc_axi_arcache (4'b0000), .pc_axi_arprot (3'b000), .pc_axi_arqos (4'b0000), .pc_axi_arregion (4'b0000), .pc_axi_aruser (1'H0), .pc_axi_arvalid (cl_sh_pcim_arvalid), .pc_axi_arready (sh_cl_pcim_arready), .pc_axi_rid (sh_cl_pcim_rid), .pc_axi_rlast (sh_cl_pcim_rlast), .pc_axi_rdata (sh_cl_pcim_rdata), .pc_axi_rresp (sh_cl_pcim_rresp), .pc_axi_ruser (1'H0), .pc_axi_rvalid (sh_cl_pcim_rvalid), .pc_axi_rready (cl_sh_pcim_rready) ); //------------------------------------------------------------------------- // [axi_pc] Xilinx AXI Protocol Checker Instance (for OCL AXL interface) //------------------------------------------------------------------------- axi_protocol_checker_v1_1_12_top #( .C_AXI_PROTOCOL (2), // 2 = AXI4-Lite .C_AXI_DATA_WIDTH (32), .C_AXI_ADDR_WIDTH (32), .C_AXI_AWUSER_WIDTH (1), // Actually, these are all 0 .C_AXI_ARUSER_WIDTH (1), .C_AXI_WUSER_WIDTH (1), .C_AXI_RUSER_WIDTH (1), .C_AXI_BUSER_WIDTH (1), .C_PC_MAXRBURSTS (8), // Technicaly, up to 8, but must be in-order - no use of IDs .C_PC_MAXWBURSTS (8), .C_PC_EXMON_WIDTH (0), .C_PC_AW_MAXWAITS (`MAXWAITS), .C_PC_AR_MAXWAITS (`MAXWAITS), .C_PC_W_MAXWAITS (`MAXWAITS), // These three are don't care because "ready" signals on master behave properly (or are tied) .C_PC_R_MAXWAITS (`MAXWAITS), .C_PC_B_MAXWAITS (`MAXWAITS), .C_PC_MESSAGE_LEVEL (0), .C_PC_SUPPORTS_NARROW_BURST(0), .C_PC_MAX_BURST_LENGTH (1), .C_PC_HAS_SYSTEM_RESET (1), .C_PC_STATUS_WIDTH (97) ) axl_pc_ocl_slv_inst ( .pc_status (ocl_pc_status), .pc_asserted (ocl_pc_asserted), .system_resetn (rst_main_n), .aclk (clk_main_a0), .aresetn (rst_main_n), .pc_axi_awid (1'h0), .pc_axi_awaddr (ocl_cl_awaddr), .pc_axi_awlen (8'd0), .pc_axi_awsize (3'd0), .pc_axi_awburst (2'b01), .pc_axi_awlock (1'b0), .pc_axi_awcache (4'b0000), .pc_axi_awprot (3'b000), .pc_axi_awqos (4'b0000), .pc_axi_awregion (4'b0000), .pc_axi_awuser (1'H0), .pc_axi_awvalid (ocl_cl_awvalid), .pc_axi_awready (cl_ocl_awready), .pc_axi_wid (1'b0), // AXI3 only .pc_axi_wlast (1'd1), .pc_axi_wdata (ocl_cl_wdata), .pc_axi_wstrb (ocl_cl_wstrb), .pc_axi_wuser (1'H0), .pc_axi_wvalid (ocl_cl_wvalid), .pc_axi_wready (cl_ocl_wready), .pc_axi_bid (1'h0), .pc_axi_bresp (cl_ocl_bresp), .pc_axi_buser (1'H0), .pc_axi_bvalid (cl_ocl_bvalid), .pc_axi_bready (ocl_cl_bready), .pc_axi_arid (1'h0), .pc_axi_araddr (ocl_cl_araddr), .pc_axi_arlen (8'd0), .pc_axi_arsize (3'd0), .pc_axi_arburst (2'b01), .pc_axi_arlock (1'b0), .pc_axi_arcache (4'b0000), .pc_axi_arprot (3'b000), .pc_axi_arqos (4'b0000), .pc_axi_arregion (4'b0000), .pc_axi_aruser (1'H0), .pc_axi_arvalid (ocl_cl_arvalid), .pc_axi_arready (cl_ocl_arready), .pc_axi_rid (1'h0), .pc_axi_rlast (1'd1), .pc_axi_rdata (cl_ocl_rdata), .pc_axi_rresp (cl_ocl_rresp), .pc_axi_ruser (1'H0), .pc_axi_rvalid (cl_ocl_rvalid), .pc_axi_rready (ocl_cl_rready) ); //------------------------------------------------------------------------- // [axi_pc] Xilinx AXI Protocol Checker Instance (for SDA AXL interface) //------------------------------------------------------------------------- axi_protocol_checker_v1_1_12_top #( .C_AXI_PROTOCOL (2), // 2 = AXI4-Lite .C_AXI_DATA_WIDTH (32), .C_AXI_ADDR_WIDTH (32), .C_AXI_AWUSER_WIDTH (1), // Actually, these are all 0 .C_AXI_ARUSER_WIDTH (1), .C_AXI_WUSER_WIDTH (1), .C_AXI_RUSER_WIDTH (1), .C_AXI_BUSER_WIDTH (1), .C_PC_MAXRBURSTS (8), // Technicaly, up to 8, but must be in-order - no use of IDs .C_PC_MAXWBURSTS (8), .C_PC_EXMON_WIDTH (0), .C_PC_AW_MAXWAITS (`MAXWAITS), .C_PC_AR_MAXWAITS (`MAXWAITS), .C_PC_W_MAXWAITS (`MAXWAITS), // These three are don't care because "ready" signals on master behave properly (or are tied) .C_PC_R_MAXWAITS (`MAXWAITS), .C_PC_B_MAXWAITS (`MAXWAITS), .C_PC_MESSAGE_LEVEL (0), .C_PC_SUPPORTS_NARROW_BURST(0), .C_PC_MAX_BURST_LENGTH (1), .C_PC_HAS_SYSTEM_RESET (1), .C_PC_STATUS_WIDTH (97) ) axl_pc_sda_slv_inst ( .pc_status (sda_pc_status), .pc_asserted (sda_pc_asserted), .system_resetn (rst_main_n), .aclk (clk_main_a0), .aresetn (rst_main_n), .pc_axi_awid (1'b0), .pc_axi_awaddr (sda_cl_awaddr), .pc_axi_awlen (8'd0), .pc_axi_awsize (3'd0), .pc_axi_awburst (2'b01), .pc_axi_awlock (1'b0), .pc_axi_awcache (4'b0000), .pc_axi_awprot (3'b000), .pc_axi_awqos (4'b0000), .pc_axi_awregion (4'b0000), .pc_axi_awuser (1'H0), .pc_axi_awvalid (sda_cl_awvalid), .pc_axi_awready (cl_sda_awready), .pc_axi_wid (1'b0), // AXI3 only .pc_axi_wlast (1'd1), .pc_axi_wdata (sda_cl_wdata), .pc_axi_wstrb (sda_cl_wstrb), .pc_axi_wuser (1'H0), .pc_axi_wvalid (sda_cl_wvalid), .pc_axi_wready (cl_sda_wready), .pc_axi_bid (1'h0), .pc_axi_bresp (cl_sda_bresp), .pc_axi_buser (1'H0), .pc_axi_bvalid (cl_sda_bvalid), .pc_axi_bready (sda_cl_bready), .pc_axi_arid (1'h0), .pc_axi_araddr (sda_cl_araddr), .pc_axi_arlen (8'd0), .pc_axi_arsize (3'd0), .pc_axi_arburst (2'b01), .pc_axi_arlock (1'b0), .pc_axi_arcache (4'b0000), .pc_axi_arprot (3'b000), .pc_axi_arqos (4'b0000), .pc_axi_arregion (4'b0000), .pc_axi_aruser (1'H0), .pc_axi_arvalid (sda_cl_arvalid), .pc_axi_arready (cl_sda_arready), .pc_axi_rid (1'h0), .pc_axi_rlast (1'd1), .pc_axi_rdata (cl_sda_rdata), .pc_axi_rresp (cl_sda_rresp), .pc_axi_ruser (1'H0), .pc_axi_rvalid (cl_sda_rvalid), .pc_axi_rready (sda_cl_rready) ); initial begin debug = 1'b0; /* TODO: Use the code below once plusarg support is enabled if ($test$plusargs("DEBUG")) begin debug = 1'b1; end else begin debug = 1'b0; end */ end `ifdef ECC_DIRECT_EN assign ECC_EN = 1'b1; `else `ifdef RND_ECC_EN assign ECC_EN = 1'b1; `else assign ECC_EN = 1'b0; `endif `endif initial begin clk_core = 1'b0; forever #CORE_DLY clk_core = ~clk_core; end initial begin clk_main_a0 = 1'b0; forever #MAIN_A0_DLY clk_main_a0 = ~clk_main_a0; end initial begin clk_extra_a1 = 1'b0; forever #EXTRA_A1_DLY clk_extra_a1 = ~clk_extra_a1; end initial begin clk_hbm_ref = 1'b0; forever #HBM_DLY clk_hbm_ref = ~clk_hbm_ref; end logic rst_n_i; logic rst_main_n_i = 0; logic rst_xtra_n_i; always @(posedge clk_core) rst_n <= rst_n_i; always @(posedge clk_main_a0) rst_main_n <= rst_main_n_i; initial begin kernel_rst_n = 1'b0; // kernel reset is not used for non-SDAccel simulations. end always_ff @(negedge rst_n or posedge clk_core) if (!rst_n) begin pre_sync0_rst_n <= 0; pre_sync1_rst_n <= 0; pre_sync2_rst_n <= 0; pre_sync3_rst_n <= 0; sync_rst_n <= 0; end else begin pre_sync0_rst_n <= 1'b1; pre_sync1_rst_n <= pre_sync0_rst_n; pre_sync2_rst_n <= pre_sync1_rst_n; pre_sync3_rst_n <= pre_sync2_rst_n; sync_rst_n <= pre_sync3_rst_n; end assign sh_cl_pwr_state = 2'b00; initial begin sh_cl_ctl0 <= 32'h0; sh_cl_ctl1 <= 32'h0; end initial begin sh_cl_flr_assert <= 1'b0; end initial begin sh_cl_status_vdip <= 32'h0; end always_ff @(posedge clk_core or negedge sync_rst_n) if (~sync_rst_n) intf_sync_rst_n <= 0; else intf_sync_rst_n <= ~(sh_cl_flr_assert); always_ff @(negedge rst_n or posedge clk_core) if (!rst_n) begin glcount0 <= 0; end else begin glcount0 <= glcount0+1; end always_ff @(negedge rst_n or negedge clk_core) if (!rst_n) begin glcount1 <= 0; end else begin glcount1 <= glcount1+1; end always_ff @(posedge clk_main_a0) begin sh_cl_glcount0 <= glcount0; sh_cl_glcount1 <= glcount1; end initial begin for (int i=0; i<NUM_PCIE; i++) begin cfg_max_payload[i] <= 2'b01; // 256 bytes cfg_max_read_req[i] <= 3'b001; // 256 bytes end end assign ddr_user_rst_n = ~ddr_user_rst; // TODO: Connect up DDR stats interfaces if needed initial begin sh_cl_ddr_stat_addr = 8'h00; sh_cl_ddr_stat_wr = 1'b0; sh_cl_ddr_stat_rd = 1'b0; sh_cl_ddr_stat_wdata = 32'h0; sh_cl_ddr_stat_user = 3'b0; end //================================================= // // sh->cl PCIeS Interface // //================================================= // initial various counts for DMA operations initial begin for(int i=0; i<4; i++) h2c_dma_wr_cmd_cnt[i] = 0; sh_cl_dma_pcis_awaddr = 64'b0; sh_cl_dma_pcis_awid = 16'b0; sh_cl_dma_pcis_awlen = 8'b0; sh_cl_dma_pcis_awsize = 3'b0; sh_cl_dma_pcis_awburst = 2'h1; // this should be 2'h1 (INCR) as that seems to be only burst mode we use sh_cl_dma_pcis_awcache = 4'b0; sh_cl_dma_pcis_awlock = 1'b0; sh_cl_dma_pcis_awprot = 3'b0; sh_cl_dma_pcis_awqos = 4'b0; sh_cl_dma_pcis_awuser = 55'b0; sh_cl_dma_pcis_wdata = 512'b0; sh_cl_dma_pcis_wstrb = 64'b0; sh_cl_dma_pcis_wlast = 1'b0; sh_cl_dma_pcis_wid = 16'b0; sh_cl_dma_pcis_wuser = 64'b0; sh_cl_dma_pcis_araddr = 64'b0; sh_cl_dma_pcis_arlen = 8'b0; sh_cl_dma_pcis_arsize = 3'b0; sh_cl_dma_pcis_arburst = 2'h1; // this should be 2'h1 (INCR) as that seems to be only burst mode we use sh_cl_dma_pcis_arcache = 4'b0; sh_cl_dma_pcis_arlock = 1'b0; sh_cl_dma_pcis_arprot = 3'b0; sh_cl_dma_pcis_arqos = 4'b0; sh_cl_dma_pcis_aruser = 55'b0; sh_cl_pcim_bid = 16'b0; sh_cl_pcim_bresp = 2'b0; sh_cl_pcim_rid = 16'b0; sh_cl_pcim_ruser = 64'b0; sh_cl_pcim_rresp = 2'b0; sh_cl_pcim_rdata = 512'b0; end // // sh->cl Address Write Channel // always @(posedge clk_core) begin if (sh_cl_wr_cmds.size() != 0) begin sh_cl_dma_pcis_awaddr <= sh_cl_wr_cmds[0].addr; sh_cl_dma_pcis_awid <= sh_cl_wr_cmds[0].id; sh_cl_dma_pcis_awlen <= sh_cl_wr_cmds[0].len; sh_cl_dma_pcis_awsize <= /*sh_cl_wr_cmds[0].size*/3'h6; sh_cl_dma_pcis_awvalid <= !sh_cl_dma_pcis_awvalid ? 1'b1 : !cl_sh_dma_pcis_awready ? 1'b1 : 1'b0; if (cl_sh_dma_pcis_awready && sh_cl_dma_pcis_awvalid) begin if (debug) begin $display("[%t] : DEBUG popping sh to cl write cmd fifo - %d", $realtime, sh_cl_wr_cmds.size()); end sh_cl_wr_cmds.pop_front(); end end else sh_cl_dma_pcis_awvalid <= 1'b0; end // // write Data Channel // // // sh->cl data Write Channel // always @(posedge clk_core) begin if (sh_cl_wr_data.size() != 0) begin sh_cl_dma_pcis_wdata <= sh_cl_wr_data[0].data; sh_cl_dma_pcis_wstrb <= sh_cl_wr_data[0].strb; sh_cl_dma_pcis_wlast <= sh_cl_wr_data[0].last; sh_cl_dma_pcis_wvalid <= !sh_cl_dma_pcis_wvalid ? 1'b1 : !cl_sh_dma_pcis_wready ? 1'b1 : 1'b0; if (cl_sh_dma_pcis_wready && sh_cl_dma_pcis_wvalid) begin if (debug) begin $display("[%t] : DEBUG popping wr data fifo - %d", $realtime, sh_cl_wr_data.size()); end if (sh_cl_dma_pcis_wlast) h2c_dma_wr_cmd_cnt[sh_cl_wr_data[0].id]--; h2c_dma_done[sh_cl_wr_data[0].id] = (h2c_dma_wr_cmd_cnt[sh_cl_wr_data[0].id] == 0); sh_cl_wr_data.pop_front(); end end else sh_cl_dma_pcis_wvalid <= 1'b0; end // // cl->sh B Response Channel // always @(posedge clk_core) begin sh_cl_dma_pcis_bready <= 1'b1; end always @(posedge clk_core) begin AXI_Command resp; if (cl_sh_dma_pcis_bvalid & sh_cl_dma_pcis_bready) begin resp.resp = cl_sh_dma_pcis_bresp; resp.id = cl_sh_dma_pcis_bid; cl_sh_b_resps.push_back(resp); end end // // sh->cl Address Read Channel // always @(posedge clk_core) begin if (sh_cl_rd_cmds.size() != 0) begin sh_cl_dma_pcis_araddr <= sh_cl_rd_cmds[0].addr; sh_cl_dma_pcis_arid <= sh_cl_rd_cmds[0].id; sh_cl_dma_pcis_arlen <= sh_cl_rd_cmds[0].len; sh_cl_dma_pcis_arsize <= /*sh_cl_rd_cmds[0].size*/3'h6; sh_cl_dma_pcis_arvalid <= !sh_cl_dma_pcis_arvalid ? 1'b1 : !cl_sh_dma_pcis_arready ? 1'b1 : 1'b0; if (cl_sh_dma_pcis_arready && sh_cl_dma_pcis_arvalid) begin if (debug) begin $display("[%t] : DEBUG popping sh to cl read cmd fifo - %d", $realtime, sh_cl_rd_cmds.size()); end sh_cl_rd_cmds.pop_front(); end end else begin sh_cl_dma_pcis_arid <= 16'b0; sh_cl_dma_pcis_arvalid <= 1'b0; end end // // cl->sh Read Data Channel // always @(posedge clk_core) begin sh_cl_dma_pcis_rready <= (cl_sh_rd_data.size() < 16) ? 1'b1 : 1'b0; end always @(posedge clk_core) begin AXI_Data data; if (cl_sh_dma_pcis_rvalid & sh_cl_dma_pcis_rready) begin data.data = cl_sh_dma_pcis_rdata; data.id = cl_sh_dma_pcis_rid; data.last = cl_sh_dma_pcis_rlast; if (debug) begin for (int i=0; i<16; i++) begin $display("[%t] - DEBUG read data [%2d]: 0x%08h", $realtime, i, cl_sh_dma_pcis_rdata[(i*32)+:32]); end end cl_sh_rd_data.push_back(data); end end //================================================= // // cl->sh PCIeM Interface // //================================================= logic [63:0] host_memory_addr = 0; AXI_Command host_mem_wr_que[$]; logic first_wr_beat = 1; int wr_last_cnt = 0; logic [63:0] wr_addr, wr_addr_t; always @(posedge clk_core) begin if (host_mem_wr_que.size() > 0) begin if (first_wr_beat == 1) begin wr_addr = host_mem_wr_que[0].addr; first_wr_beat = 1'b0; end if (cl_sh_wr_data.size() > 0) begin if (debug) begin $display("[%t] - DEBUG fb: %1d 0x%0128x 0x%016x", $realtime, first_wr_beat, cl_sh_wr_data[0].data, cl_sh_wr_data[0].strb); end for(int i=wr_addr[5:2]; i<16; i++) begin logic [31:0] word; if (!`TB_TOP.use_c_host_memory) if (`TB_TOP.sv_host_memory.exists({wr_addr[63:2], 2'b00})) word = `TB_TOP.sv_host_memory[{wr_addr[63:2], 2'b00}]; else word = 32'hffff_ffff; // return a default value else begin wr_addr_t = {wr_addr[63:2], 2'b00}; for(int k=0; k<4; k++) begin byte t; t = `TB_TOP.host_memory_getc(wr_addr_t + k); word = {t, word[31:8]}; end end for(int j=0; j<4; j++) begin logic [7:0] c; int index; index = j + (i * 4); if (cl_sh_wr_data[0].strb[index]) begin c = cl_sh_wr_data[0].data >> (index * 8); //FIX partial DW order word = {c, word[31:8]}; word[8*j+:8] = c; end end // for (int j=0; j<4; j++) if (!`TB_TOP.use_c_host_memory) begin `TB_TOP.sv_host_memory[{wr_addr[63:2], 2'b00}] = word; end else begin wr_addr_t = {wr_addr[63:2], 2'b00}; for(int k=0; k<4; k++) begin byte t; t = word[7:0]; `TB_TOP.host_memory_putc(wr_addr_t + k, t); word = word >> 8; end end wr_addr += 4; end if (cl_sh_wr_data[0].last == 1) begin first_wr_beat = 1'b1; host_mem_wr_que.pop_front(); if (debug) begin $display("[%t] - DEBUG reseting...", $realtime); end end cl_sh_wr_data.pop_front(); end // if (cl_sh_wr_data.size() > 0) end // if (host_mem_wr_que.size() > 0) end // // cl->sh Write Address Channel // always @(posedge clk_core) begin AXI_Command cmd; `ifdef QUESTA_SIM automatic int awready_cnt = 0; `else int awready_cnt = 0; `endif if (cl_sh_pcim_awvalid && sh_cl_pcim_awready) begin cmd.addr = cl_sh_pcim_awaddr; cmd.id = cl_sh_pcim_awid; cmd.len = cl_sh_pcim_awlen; cmd.size = cl_sh_pcim_awsize; cmd.last = 0; if(cl_sh_pcim_awsize != 6) begin $display("FATAL ERROR: AwSize other than 6 are not supported"); $finish; end cl_sh_wr_cmds.push_back(cmd); sh_cl_b_resps.push_back(cmd); host_mem_wr_que.push_back(cmd); end if ((cl_sh_wr_cmds.size() < 4) || (awready_cnt == 0)) begin sh_cl_pcim_awready <= 1'b1; awready_cnt = $urandom_range(0, 80); end else begin sh_cl_pcim_awready <= 1'b0; awready_cnt--; end end // // cl->sh write Data Channel // always @(posedge clk_core) begin AXI_Data wr_data; `ifdef QUESTA_SIM automatic int wready_cnt = 0; automatic int wready_nonzero_wait = 0; `else int wready_cnt = 0; int wready_nonzero_wait = 0; `endif if (sh_cl_pcim_wready && cl_sh_pcim_wvalid) begin wr_data.data = cl_sh_pcim_wdata; wr_data.strb = cl_sh_pcim_wstrb; wr_data.last = cl_sh_pcim_wlast; cl_sh_wr_data.push_back(wr_data); if (wr_data.last == 1) wr_last_cnt += 1; end // if (sh_cl_pcim_wready && cl_sh_pcim_wvalid) if ((cl_sh_wr_data.size() > 64) || (wready_cnt > 0)) begin sh_cl_pcim_wready <= 1'b0; wready_cnt--; end else begin sh_cl_pcim_wready <= 1'b1; wready_cnt = $urandom_range(10, 0); wready_nonzero_wait = $urandom_range(8, 0); wready_cnt = (wready_nonzero_wait == 0) ? wready_cnt : 0; end end // // cl->sh B Response Channel // always @(posedge clk_core) begin if (sh_cl_b_resps.size() != 0) begin if (debug) begin $display("[%t] : DEBUG resp.size %2d ", $realtime, sh_cl_b_resps.size()); end if (wr_last_cnt != 0) begin sh_cl_pcim_bid <= sh_cl_b_resps[0].id; sh_cl_pcim_bresp <= 2'b00; sh_cl_pcim_bvalid <= !sh_cl_pcim_bvalid ? 1'b1 : !cl_sh_pcim_bready ? 1'b1 : 1'b0; if (cl_sh_pcim_bready && sh_cl_pcim_bvalid) begin wr_last_cnt -= 1; sh_cl_b_resps.pop_front(); cl_sh_wr_cmds.pop_front(); end end end else sh_cl_pcim_bvalid <= 1'b0; end // // sh->cl Address Read Channel // always @(posedge clk_core) begin AXI_Command cmd; `ifdef QUESTA_SIM automatic int arready_cnt = 0; `else int arready_cnt = 0; `endif if (cl_sh_pcim_arvalid && sh_cl_pcim_arready) begin cmd.addr = cl_sh_pcim_araddr; cmd.id = cl_sh_pcim_arid; cmd.len = cl_sh_pcim_arlen; cmd.size = cl_sh_pcim_arsize; cmd.last = 0; if(cl_sh_pcim_arsize != 6) begin $display("FATAL ERROR: ArSize other than 6 are not supported"); $finish; end cl_sh_rd_cmds.push_back(cmd); sh_cl_rd_data.push_back(cmd); end if ((cl_sh_rd_cmds.size() < 4) || (arready_cnt == 0)) begin sh_cl_pcim_arready <= 1'b1; arready_cnt = $urandom_range(0, 80); end else begin sh_cl_pcim_arready <= 1'b0; arready_cnt--; end end // // sh->cl Read Data Channel // logic first_rd_beat; logic [63:0] rd_addr, rd_addr_t; always @(posedge clk_core) begin AXI_Command rd_cmd; logic [511:0] beat; if (sh_cl_rd_data.size() != 0) begin sh_cl_pcim_rid <= sh_cl_rd_data[0].id; sh_cl_pcim_rresp <= 2'b00; sh_cl_pcim_rvalid <= !sh_cl_pcim_rvalid ? 1'b1 : !cl_sh_pcim_rready ? 1'b1 : !sh_cl_pcim_rlast ? 1'b1 : 1'b0; sh_cl_pcim_rlast <= (sh_cl_rd_data[0].len == 0) ? 1'b1 : (sh_cl_rd_data[0].len == 1) && sh_cl_pcim_rvalid && cl_sh_pcim_rready ? 1'b1 : 1'b0; if (first_rd_beat == 1'b1) begin rd_addr = sh_cl_rd_data[0].addr; first_rd_beat = 1'b0; end beat = {512{1'b1}}; if (cl_sh_pcim_rready) begin for(int i=rd_addr[5:2]; i<16; i++) begin logic [31:0] c; if (debug) begin $display("[%t] : DEBUG reading addr 0x%016x", $realtime, rd_addr); end if (!`TB_TOP.use_c_host_memory) if (`TB_TOP.sv_host_memory.exists({rd_addr[63:2], 2'b00})) c = `TB_TOP.sv_host_memory[{rd_addr[63:2], 2'b00}]; else c = 32'hffffffff; else begin rd_addr_t = {rd_addr[63:2], 2'b00}; for(int k=0; k<4; k++) begin byte t; t = `TB_TOP.host_memory_getc(rd_addr_t + k); c = {t, c[31:8]}; end end beat = {c, beat[511:32]}; rd_addr +=4; end if (debug) begin $display("[%t] : DEBUG beat 0x%0128x", $realtime, beat); end sh_cl_pcim_rdata <= beat; end //if(cl_sh_pcim_rready) end else begin sh_cl_pcim_rvalid <= 1'b0; sh_cl_pcim_rlast <= 1'b0; first_rd_beat = 1'b1; end if (cl_sh_pcim_rready && sh_cl_pcim_rvalid && (sh_cl_rd_data.size() != 0)) begin if (sh_cl_rd_data[0].len == 0) begin sh_cl_rd_data.pop_front(); cl_sh_rd_cmds.pop_front(); first_rd_beat = 1'b1; end else sh_cl_rd_data[0].len--; end end //================================================= // // Interrupt handling // //================================================= logic [15:0] int_ack; logic [15:0] int_pend; initial begin int_ack = 16'h0000; int_pend = 16'h0000; end always @(posedge clk_core) begin for (int idx=0; idx<16; idx++) begin if (cl_sh_apppf_irq_req[idx] == 1'b1) begin int_pend |= 1'b1 << idx; end end if (|int_ack) begin for (int idx=0; idx<16; idx++) begin if (int_ack[idx] == 1'b1) begin $display("[%t] : Sending ack for interrupt %2d", $realtime, idx); end end end sh_cl_apppf_irq_ack <= int_ack; int_ack = 16'h0000; end always @(posedge clk_core) begin for (int idx=0; idx<16; idx++) begin if (int_pend[idx] == 1'b1) begin `TB_TOP.int_handler(idx); end end end axil_bfm sda_axil_bfm( .axil_clk (clk_core), .axil_rst_n (sync_rst_n), .axil_awvalid (sda_cl_awvalid), .axil_awaddr (sda_cl_awaddr), .axil_awready (cl_sda_awready), .axil_wvalid (sda_cl_wvalid), .axil_wdata (sda_cl_wdata), .axil_wstrb (sda_cl_wstrb), .axil_wready (cl_sda_wready), .axil_bvalid (cl_sda_bvalid), .axil_bresp (cl_sda_bresp), .axil_bready (sda_cl_bready), .axil_arvalid (sda_cl_arvalid), .axil_araddr (sda_cl_araddr), .axil_arready (cl_sda_arready), .axil_rvalid (cl_sda_rvalid), .axil_rdata (cl_sda_rdata), .axil_rresp (cl_sda_rresp), .axil_rready (sda_cl_rready)); axil_bfm ocl_axil_bfm( .axil_clk (clk_core), .axil_rst_n (sync_rst_n), .axil_awvalid (ocl_cl_awvalid), .axil_awaddr (ocl_cl_awaddr), .axil_awready (cl_ocl_awready), .axil_wvalid (ocl_cl_wvalid), .axil_wdata (ocl_cl_wdata), .axil_wstrb (ocl_cl_wstrb), .axil_wready (cl_ocl_wready), .axil_bvalid (cl_ocl_bvalid), .axil_bresp (cl_ocl_bresp), .axil_bready (ocl_cl_bready), .axil_arvalid (ocl_cl_arvalid), .axil_araddr (ocl_cl_araddr), .axil_arready (cl_ocl_arready), .axil_rvalid (cl_ocl_rvalid), .axil_rdata (cl_ocl_rdata), .axil_rresp (cl_ocl_rresp), .axil_rready (ocl_cl_rready)); // Check core clock frequency when chk_clk_freq is set always @(posedge clk_core) begin if (chk_clk_freq) begin core_rising_edge = $time; @(posedge clk_core) core_clk_period = $time - core_rising_edge; if (core_clk_period != CORE_DLY * 2) begin clk_err_count++; $display("Error - core clk frequency check failed. Expected %x Actual %x", core_clk_period, CORE_DLY); end end end // Check main clock frequency when chk_clk_freq is set always @(posedge clk_main_a0) begin if (chk_clk_freq) begin main_rising_edge = $time; @(posedge clk_main_a0) main_clk_period = $time - main_rising_edge; if (main_clk_period != MAIN_A0_DLY * 2) begin clk_err_count++; $display("Error - main a0 clk frequency check failed. Expected %x Actual %x", main_clk_period, MAIN_A0_DLY); end end end // Check extra a1 clock frequency when chk_clk_freq is set always @(posedge clk_extra_a1) begin if (chk_clk_freq) begin extra_a1_rising_edge = $time; @(posedge clk_extra_a1) extra_a1_clk_period = $time - extra_a1_rising_edge; if (extra_a1_clk_period != EXTRA_A1_DLY * 2) begin clk_err_count++; $display("Error - extra a1 clk frequency check failed. Expected %x Actual %x", extra_a1_clk_period, EXTRA_A1_DLY); end end end // Check clk_hbm_ref clock frequency when chk_clk_freq is set always @(posedge clk_hbm_ref) begin if (chk_clk_freq) begin hbm_rising_edge = $time; @(posedge clk_hbm_ref) hbm_clk_period = $time - hbm_rising_edge; if (hbm_clk_period != HBM_DLY * 2) begin clk_err_count++; $display("Error - hbm clk frequency check failed. Expected %x Actual %x", hbm_clk_period, HBM_DLY); end end end //================================================= // // power_up // // Description: asserts and deasserts various resets // Outputs: None // //================================================= task power_up(input ClockRecipe::A_RECIPE clk_recipe_a = ClockRecipe::A0, ClockRecipe::B_RECIPE clk_recipe_b = ClockRecipe::B0, ClockRecipe::C_RECIPE clk_recipe_c = ClockRecipe::C0); case (clk_recipe_a) ClockRecipe::A0: begin MAIN_A0_DLY = 4ns; CORE_DLY = 4ns; EXTRA_A1_DLY = 8ns; end ClockRecipe::A1: begin MAIN_A0_DLY = 2ns; CORE_DLY = 2ns; EXTRA_A1_DLY = 4ns; end ClockRecipe::A2: begin MAIN_A0_DLY = 32ns; CORE_DLY = 32ns; EXTRA_A1_DLY = 32ns; end ClockRecipe::A3: begin MAIN_A0_DLY = 8ns; CORE_DLY = 8ns; EXTRA_A1_DLY = 16ns; end ClockRecipe::A4: begin MAIN_A0_DLY = 2.22ns; CORE_DLY = 2.22ns; EXTRA_A1_DLY = 4.44ns; end ClockRecipe::A5: begin MAIN_A0_DLY = 2.5ns; CORE_DLY = 2.5ns; EXTRA_A1_DLY = 5ns; end default: begin $display("Error - Invalid Clock Profile Selected."); $finish; end endcase rst_n_i = 1'b0; rst_main_n_i = 1'b0; rst_xtra_n_i = 1'b0; #5000ns; rst_n_i = 1'b1; rst_main_n_i = 1'b1; rst_xtra_n_i = 1'b1; #1000ns; endtask // power_up always @* begin if ((pcis_pc_asserted === 'b1) || ((pcim_pc_asserted === 'b1) || (ocl_pc_asserted === 'b1) || (sda_pc_asserted === 'b1))) begin prot_err_count++; end else if ((pcis_pc_asserted === 1'bx) || (pcim_pc_asserted === 1'bx) || (ocl_pc_asserted === 1'bx) || (sda_pc_asserted === 1'bx)) begin prot_x_count++; end end //================================================= // // set_chk_clk_freq // // Description: Starts checking clock frequency // Outputs: None // //================================================= function void set_chk_clk_freq(logic chk_freq = 1'b1); $display("[%t] : Start checking clock frequency...", $realtime); chk_clk_freq = chk_freq; endfunction // set_chk_clk_freq //================================================= // // chk_prot_err_stat // // Description: Checks if there is a protocol checker violation // Outputs: None // //================================================= function logic chk_prot_err_stat(); $display("[%t] : Checking protocol checker error status...", $realtime); if ((prot_err_count > 0) || (prot_x_count > 0)) begin if (prot_err_count > 0) begin $display("[%t] : *** Protocol Checker Violations Detected. Refer to log file for details about each specific error ***", $realtime); return 1'b1; end if (prot_x_count > 0) begin $display("[%t] : *** 'X' propagation detected in protocol checker status bits. Please dump waves and look at pc_status bits for more information***", $realtime); return 1'b1; end end // if ((prot_err_count > 0) || (prot_x_count > 0)) else return 1'b0; endfunction // chk_prot_err_stat //================================================= // // chk_clk_err_cnt // // Description: Checks if there are clock errors // Outputs: None // //================================================= function logic chk_clk_err_cnt(); $display("[%t] : Checking clock error status...", $realtime); if (clk_err_count > 0) begin $display("[%t] : *** Clock Frequency Errors Detected. Refer to log file for details about each specific error ***", $realtime); return 1'b1; end else begin return 1'b0; end endfunction // chk_clk_err_cnt //================================================= // // get_ecc_err_cnt // // Description: Checks if there are clock errors // Outputs: None // //================================================= function int get_ecc_err_cnt(); return ecc_err_cnt; endfunction //================================================= // // nsec_delay // // Description: sets a delay in nsec // Outputs: None // //================================================= task nsec_delay(int dly = 10000); # (dly * 1ns); endtask //================================================= // // set_virtual_dip_switch // // Description: writes virtual dip switches // Outputs: None // //================================================= function void set_virtual_dip_switch(int dip_switch); sh_cl_status_vdip = dip_switch[15:0]; endfunction //================================================= // // get_virtual_dip_switch // // Description: reads virtual dip switch status // Outputs: dip_status // //================================================= function logic[15:0] get_virtual_dip_switch(); return sh_cl_status_vdip; endfunction //================================================= // // get_virtual_led // // Description: reads virtual led status // Outputs: led status // //================================================= function logic[15:0] get_virtual_led(); return cl_sh_status_vled; endfunction //================================================= // // get_global_couter_0 // // Description: reads global counter 0 value; // Outputs: 64 bit counter // //================================================= function logic[63:0] get_global_counter_0(); return sh_cl_glcount0; endfunction // get_global_counter_0 //================================================= // // get_global_couter_1 // // Description: reads global counter 1 value; // Outputs: 64 bit counter // //================================================= function logic[63:0] get_global_counter_1(); return sh_cl_glcount1; endfunction // get_global_counter_0 //================================================= // // Kernel_reset // // Description: sets kernel_reset // Outputs: None // //================================================= function void kernel_reset(input logic d = 1); kernel_rst_n = d; endfunction //================================================= // // power_down // // Description: deasserts various resets // Outputs: None // //================================================= task power_down; #50ns; rst_n_i = 1'b0; rst_main_n_i = 1'b0; #50ns; endtask // power_down //================================================= // // issue_flr // // Description: issue a FLR command // Outputs: None // //================================================= task issue_flr(); sh_cl_flr_assert = 1'b1; wait(cl_sh_flr_done == 1); sh_cl_flr_assert = 1'b0; endtask //================================================= // // map_host_memory // // Description: used to connect C host memory to simulation memory. // Outputs: None // //================================================= task map_host_memory(input logic [63:0] addr); if (debug) begin $display("[%t] : DEBUG mapping host memory to 0x%16x", $realtime, addr); end host_memory_addr = addr; `TB_TOP.use_c_host_memory = 1'b1; endtask // map_host_memory //================================================= // // set_ack_bit // // Description: used to acknowledge an interrupt and clear pending bit // Outputs: None // //================================================= function void set_ack_bit(input int int_num); int_ack[int_num] = 1'b1; int_pend[int_num] = 1'b0; endfunction //================================================= // // poke // // Description: used to write a single beat of data at addr into one of the four CL AXI ports specified by intf. // Intf // 0 = PCIS // 1 = SDA // 2 = OCL // // id - AXI bus ID // // Size // 0 = 1 byte, 1 = 2 bytes, 2 = 4 bytes (32 bits), 3 = 8 bytes (64 bits) // // Outputs: None // //================================================= task poke(input logic [63:0] addr, logic [511:0] data, logic [5:0] id = 6'h0, DataSize::DATA_SIZE size = DataSize::UINT32, AxiPort::AXI_PORT intf = AxiPort::PORT_DMA_PCIS); logic [63:0] strb; case (size) DataSize::UINT8 : strb = 64'b0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0001; DataSize::UINT16 : strb = 64'b0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0011; DataSize::UINT32 : strb = 64'b0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_1111; DataSize::UINT64 : strb = 64'b0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_1111_1111; DataSize::UINT128: strb = 64'b0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_1111_1111_1111_1111; DataSize::UINT256: strb = 64'b0000_0000_0000_0000_0000_0000_0000_0000_1111_1111_1111_1111_1111_1111_1111_1111; DataSize::UINT512: strb = 64'b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111; default: begin $display("FATAL ERROR - Invalid size specified"); $finish; end endcase // case (size) case (intf) AxiPort::PORT_DMA_PCIS: begin AXI_Command axi_cmd; AXI_Data axi_data; logic [1:0] resp; axi_cmd.addr = addr; axi_cmd.len = 0; axi_cmd.size = size; axi_cmd.id = id; sh_cl_wr_cmds.push_back(axi_cmd); axi_data.data = data << (addr[5:0] * 8); axi_data.strb = strb << addr[5:0]; axi_data.id = id; axi_data.last = 1'b1; #20ns sh_cl_wr_data.push_back(axi_data); while (cl_sh_b_resps.size() == 0) #20ns; resp = cl_sh_b_resps[0].resp; cl_sh_b_resps.pop_front(); end AxiPort::PORT_SDA: begin sda_axil_bfm.poke(addr, data); end AxiPort::PORT_OCL: begin ocl_axil_bfm.poke(addr, data); end default: begin $display("FATAL ERROR - Invalid CL port specified"); $finish; end endcase // case (intf) endtask // poke task poke_pcis(input logic [63:0] addr, logic [511:0] data, logic [63:0] strb, logic [5:0] id = 6'h0); AXI_Command axi_cmd; AXI_Data axi_data; logic [1:0] resp; axi_cmd.addr = addr; axi_cmd.len = 0; axi_cmd.id = id; sh_cl_wr_cmds.push_back(axi_cmd); axi_data.data = data; axi_data.strb = strb; axi_data.id = id; axi_data.last = 1'b1; #20ns sh_cl_wr_data.push_back(axi_data); while (cl_sh_b_resps.size() == 0) #20ns; resp = cl_sh_b_resps[0].resp; cl_sh_b_resps.pop_front(); endtask // poke_pcis //=========================================================================== // // poke_pcis_wc // // Description: Write combine version of poke (will only work on PCIS Intf) // id - AXI bus ID // addr - Address for transfer // data[$][31:0] - Queue of DWs // size - AXI size // Outputs: None // //========================================================================== task poke_pcis_wc(input logic [63:0] addr, logic [31:0] data [$], logic [5:0] id = 6'h0, logic [2:0] size = 3'd6 ); AXI_Command axi_cmd; AXI_Data axi_data; logic [1:0] resp; logic [31:0] dw_idx; logic [31:0] slice_dw_idx; logic [31:0] total_bytes; logic [31:0] max_bytes; total_bytes = data.size() * 4; if (size == 3'd2 && ((total_bytes != 4) || (addr[5:0] != 6'd0))) begin $display("FATAL ERROR: poke_pcis_wc:: Size = 2. DW count should be equal to 1 and addr should be DW aligned"); $finish; end if (size != 3'd6 && size != 3'd2) begin $display("FATAL ERROR: poke_pcis_wc:: Only Size = 2 or 6 supported"); $finish; end max_bytes = 4096 - addr[5:0]; if (total_bytes > max_bytes) begin $display("FATAL ERROR: poke_pcis_wc:: AXI transaction is more than 4096 bytes"); $finish; end axi_cmd.addr = addr; axi_cmd.len = (total_bytes + addr[5:0]) % 64 ? ((total_bytes + addr[5:0])>>6) : ((total_bytes + addr[5:0])>>6) - 1; axi_cmd.size = size; axi_cmd.id = id; sh_cl_wr_cmds.push_back(axi_cmd); dw_idx = 0; for (int idx = 0; idx <= axi_cmd.len; idx++) begin axi_data.id = id; axi_data.data = 512'd0; axi_data.strb = 512'd0; slice_dw_idx = idx == 0 ? addr[5:2] : 0; while ((slice_dw_idx < 16) && (dw_idx < total_bytes/4)) begin assert(data.size() > 0) else begin $display("FATAL ERROR: poke_pcis_wc:: Something went wrong. data queue already empty"); $finish; end; axi_data.data[slice_dw_idx*32 +: 32] = data.pop_front(); axi_data.strb[slice_dw_idx*4 +: 4] = 4'hf; dw_idx++; slice_dw_idx++; end axi_data.last = (axi_cmd.len == idx); sh_cl_wr_data.push_back(axi_data); end // for (idx = 0; idx <= len; idx++) while (cl_sh_b_resps.size() == 0) #20ns; resp = cl_sh_b_resps[0].resp; cl_sh_b_resps.pop_front(); endtask // poke_pcis_wc //================================================= // // peek // // Description: used to read a single beat of data at addr from one of the four CL AXI ports specified by intf. // Intf // 0 = PCIS // 1 = SDA // 2 = OCL // // id - AXI bus ID // // Size // 0 = 1 byte, 1 = 2 bytes, 2 = 4 bytes (32 bits), 3 = 8 bytes (64 bits) // // Outputs: Read Data Value // //================================================= task peek(input logic [63:0] addr, output logic [511:0] data, input logic [5:0] id = 6'h0, DataSize::DATA_SIZE size = DataSize::UINT32, AxiPort::AXI_PORT intf = AxiPort::PORT_DMA_PCIS); data = 0; case (intf) AxiPort::PORT_DMA_PCIS : begin AXI_Command axi_cmd; int byte_idx; int mem_arr_idx; axi_cmd.addr = addr; axi_cmd.len = 0; axi_cmd.size = size; axi_cmd.id = id; sh_cl_rd_cmds.push_back(axi_cmd); byte_idx = addr[5:0]; mem_arr_idx = byte_idx*8; while (cl_sh_rd_data.size() == 0) #20ns; for (int num_bytes =0; num_bytes < 2**size; num_bytes++) begin data[(num_bytes*8)+:8] = cl_sh_rd_data[0].data[(mem_arr_idx+(num_bytes*8))+:8]; end cl_sh_rd_data.pop_front(); end // case: 0 AxiPort::PORT_SDA : begin sda_axil_bfm.peek(addr, data); end AxiPort::PORT_OCL : begin ocl_axil_bfm.peek(addr, data); end default: begin $display("FATAL ERROR - Invalid CL port specified"); $finish; end endcase // case (intf) endtask // peek task peek_pcis(input logic [63:0] addr, output logic [511:0] data, input logic [5:0] id = 6'h0); AXI_Command axi_cmd; axi_cmd.addr = addr; axi_cmd.len = 0; axi_cmd.id = id; sh_cl_rd_cmds.push_back(axi_cmd); while (cl_sh_rd_data.size() == 0) #20ns; data = cl_sh_rd_data[0].data; cl_sh_rd_data.pop_front(); endtask // peek_pcis //================================================= // // dma_buffer_to_cl // // Description: used to move a data buffer to the CL via the PCIS AXI interface using one of four channels. // The size of the transfer is determined by the number of bytes in the buffer. // // chan = 0-3 channel number // buffer = AXI bus ID // cl_addr = starting CL AXI addr // // // Outputs: Read Data Value // //================================================= function void dma_buffer_to_cl(input logic [1:0] chan, logic [63:0] src_addr, logic [63:0] cl_addr, logic [27:0] len); DMA_OP dop; dop.buffer = src_addr; dop.cl_addr = cl_addr; dop.len = len; h2c_dma_list[chan].push_back(dop); endfunction // dma_buffer_to_cl function automatic void dma_cl_to_buffer(input logic [1:0] chan, logic [63:0] dst_addr, input [63:0] cl_addr, logic [27:0] len); DMA_OP dop; dop.buffer = dst_addr; dop.cl_addr = cl_addr; dop.len = len; c2h_dma_list[chan].push_back(dop); endfunction // dma_cl_to_buffer function void start_dma_to_cl(input int chan); h2c_dma_started[chan] = 1'b1; h2c_dma_done[chan] = 1'b0; endfunction // start_dma_to_cl function void start_dma_to_buffer(input int chan); c2h_dma_started[chan] = 1'b1; c2h_dma_done[chan] = 1'b0; endfunction // start_dma_to_buffer function bit is_dma_to_cl_done(input int chan); // 1 = done //$display("In function is_dma_to_cl_done h2c_dma_done is %x \n", h2c_dma_done[chan]); return h2c_dma_done[chan]; endfunction // is_dma_to_cl_done function bit is_dma_to_buffer_done(input int chan); // 1 = done //$display("In function is_dma_to_buffer_done c2h_dma_done is %x \n", c2h_dma_done[chan]); return c2h_dma_done[chan]; endfunction // is_dma_to_buffer_done function bit is_ddr_ready(); // 1 = done return ddr_is_ready; endfunction // is_ddr_ready //================================================= // // sh->cl xdma Interface // //================================================= always @(negedge rst_n or posedge clk_core) begin if (!rst_n) begin h2c_dma_started <= 4'b0; c2h_dma_started <= 4'b0; end else begin AXI_Command axi_cmd; AXI_Data axi_data; DMA_OP dop; logic [63:0] host_memory; int num_of_data_beats; int byte_cnt; int num_bytes; logic [63:0] aligned_addr; bit last_beat; logic [5:0] start_addr; bit aligned; bit last_data_beat; num_of_data_beats = 0; last_data_beat = 0; byte_cnt = 0; num_bytes = 0; aligned_addr = 0; last_beat = 0; start_addr = 0; aligned = 0; for (int chan = 0; chan < 4; chan++) begin if ((h2c_dma_started[chan] != 1'b0) && (h2c_dma_list[chan].size() > 0)) begin dop = h2c_dma_list[chan].pop_front(); if (dop.cl_addr[5:0] !== 6'h00) begin $fatal("Address in a SH->CL transfer should be aligned to 64 byte boundary for address %x \n", dop.cl_addr); end aligned_addr = {dop.cl_addr[63:6], 6'h00}; num_of_data_beats = ((dop.len + dop.cl_addr[5:0] - 1)/64) + 1; byte_cnt = 0; last_beat = ((dop.len + dop.cl_addr[5:0])%64 > 0); start_addr = dop.cl_addr[5:0]; aligned = (aligned_addr == dop.cl_addr); for(int burst_cnt=0; burst_cnt < num_of_data_beats; ) begin if(burst_cnt == 0) begin // if first data beat axi_cmd.addr = dop.cl_addr; axi_cmd.len = (num_of_data_beats==1) ? 0 : aligned ? (num_of_data_beats - 1 - last_beat) : 0; // handle the condition if addr is crossing 4k page boundry if(dop.cl_addr[11:0] + ((axi_cmd.len + 1) * 64) > 4095) begin axi_cmd.len = ((4096 - dop.cl_addr[11:0])/64) - 1; end end else if((num_of_data_beats - 1) - burst_cnt == 0) begin // last data beat axi_cmd.addr = (aligned_addr + (burst_cnt * 64)); axi_cmd.len = 0; end else begin // intermediate data beats axi_cmd.addr = (aligned_addr + (burst_cnt * 64)); axi_cmd.len = num_of_data_beats - last_beat - burst_cnt - 1; // handle the condition if addr is crossing 4k page boundry $display("Address is going to cross 4K boundary \n"); if( (axi_cmd.addr[11:0] + ((axi_cmd.len + 1) * 64)) > 4095) begin axi_cmd.len = ((4096 - axi_cmd.addr[11:0])/64) - 1; end end axi_cmd.id = chan; axi_cmd.size = 6; sh_cl_wr_cmds.push_back(axi_cmd); h2c_dma_wr_cmd_cnt[chan]++; // loop to do multiple data beats for(int j = 0; j <= axi_cmd.len; j++) begin axi_data.data = 0; axi_data.strb = 64'b0; axi_data.id = chan; last_data_beat = (((num_of_data_beats - 1) - burst_cnt) == 0) ? 1 : 0; num_bytes = last_beat ? (dop.len + dop.cl_addr[5:0])%64 : 64; axi_data.last = (j == axi_cmd.len) ? 1 : 0; if(num_of_data_beats == 1) begin num_bytes = (dop.len == 64) ? 64 : (dop.len)%64; for(int i=start_addr[5:0]; i < (num_bytes+start_addr[5:0]); i++) begin axi_data.data = axi_data.data | `TB_TOP.hm_get_byte(.addr(dop.buffer + byte_cnt)) << 8*i; axi_data.strb = axi_data.strb | 1 << i; byte_cnt++; end end else if(last_data_beat) begin for(int i=0; i < num_bytes; i++) begin axi_data.data = axi_data.data | `TB_TOP.hm_get_byte(.addr(dop.buffer + byte_cnt)) << 8*i; axi_data.strb = axi_data.strb | 1 << i; byte_cnt++; end end else begin for(int i=start_addr[5:0]; i < 64; i++) begin axi_data.data = {`TB_TOP.hm_get_byte(.addr(dop.buffer + byte_cnt)), axi_data.data[511:8]}; axi_data.strb = {1'b1, axi_data.strb[63:1]}; byte_cnt++; end end sh_cl_wr_data.push_back(axi_data); start_addr = 0; burst_cnt++; end // for(int j = 0; j <= axi_cmd.len; j++) begin end // for(int burst_cnt=0; burst_cnt < num_of_data_beats; ) end // if ((h2c_dma_started[chan] != 1'b0) && (h2c_dma_list[chan].size() > 0)) end // for (int chan = 0; chan < 4; chan++) end // else end // always //================================================= // // cl->sh xdma data Interface // //================================================= always @(negedge rst_n or posedge clk_core) begin if (!rst_n) begin c2h_dma_done <= 1'b0; end else begin DMA_OP dop; static int byte_cnt[4]; for (int chan = 0; chan < 4; chan++) begin if((cl_sh_rd_data.size() > 0) && (c2h_dma_started[chan] != 1'b0)) begin if(chan == cl_sh_rd_data[0].id) begin dop = c2h_data_dma_list[chan].pop_front(); for (int i = dop.cl_addr[5:0]; i < 64 ; i++) begin `TB_TOP.hm_put_byte(.addr(dop.buffer + byte_cnt[chan]), .d(cl_sh_rd_data[0].data[(i*8)+:8])); if (debug) begin $display("[%t] - DEBUG read data dop.buffer[%2d]: %0x read_que data: %0x", $realtime, i, dop.buffer[i], cl_sh_rd_data[0].data[(i*8)+:8]); end byte_cnt[chan]++; end c2h_dma_done[chan] = (c2h_data_dma_list[chan].size() == 0); if ((c2h_dma_done[chan]) && (cl_sh_rd_data[0].last == 1)) c2h_dma_started[chan] = 0; if ((cl_sh_rd_data[0].last == 1) && (byte_cnt[chan] >= dop.len)) // end of current DMA op, reset byte count byte_cnt[chan] = 0; if (cl_sh_dma_pcis_rresp == 2'b10) begin if (ECC_EN == 1) begin ecc_err_cnt++; $display("ECC error detected in the read data from CL. A SLVERR response is returned\n"); end else begin ecc_err_cnt = 0; $display("CL returned SLVERR on READ Response \n"); end end cl_sh_rd_data.pop_front(); end // if (chan == cl_sh_rd_data[0].id) end end end end //================================================= // // cl->sh xdma Interface // //================================================= always @(negedge rst_n or posedge clk_core) begin if (!rst_n) begin h2c_dma_started <= 4'b0; c2h_dma_started <= 4'b0; end else begin AXI_Command axi_cmd; AXI_Data axi_data; DMA_OP dop; DMA_OP data_dop; int num_of_data_beats; bit aligned; logic [63:0] aligned_addr; bit last_beat; num_of_data_beats = 0; aligned = 0; aligned_addr = 0; last_beat = 0; for (int chan = 0; chan < 4; chan++) begin if ((c2h_dma_started[chan] != 1'b0) && (c2h_dma_list[chan].size() > 0)) begin dop = c2h_dma_list[chan].pop_front(); if (dop.cl_addr[5:0] !== 6'h00) begin $fatal("Address in a CL->SH transfer should be aligned to 64 byte boundary"); end num_of_data_beats = ((dop.len + dop.cl_addr[5:0] - 1)/64) + 1; aligned_addr = {dop.cl_addr[63:6], 6'h00}; aligned = (aligned_addr == dop.cl_addr); last_beat = ((dop.len + dop.cl_addr[5:0])%64 > 0); for(int burst_cnt=0; burst_cnt < num_of_data_beats; ) begin if(burst_cnt == 0) begin // if first data beat axi_cmd.addr = dop.cl_addr; axi_cmd.len = (num_of_data_beats==1) ? 0 : aligned ? (num_of_data_beats - 1 - last_beat) : 0; // handle the condition if addr is crossing 4k page boundry if(aligned && (dop.cl_addr[11:0] + ((axi_cmd.len + 1) * 64) > 4095)) begin axi_cmd.len = ((4096 - dop.cl_addr[11:0])/64) - 1; end axi_cmd.id = chan; end else if((num_of_data_beats - 1) - burst_cnt == 0) begin // last data beat axi_cmd.addr = (aligned_addr + (burst_cnt * 64)); axi_cmd.len = 0; axi_cmd.id = chan; end else begin // intermediate data beats axi_cmd.addr = (aligned_addr + (burst_cnt * 64)); axi_cmd.len = num_of_data_beats - last_beat - burst_cnt - 1; // handle the condition if addr is crossing 4k page boundry if( (axi_cmd.addr[11:0] + ((axi_cmd.len + 1) * 64)) > 4095) begin axi_cmd.len = ((4096 - axi_cmd.addr[11:0])/64) - 1; end axi_cmd.id = chan; end axi_cmd.size = 6; sh_cl_rd_cmds.push_back(axi_cmd); for(int i = 0; i <= axi_cmd.len; i++) begin data_dop.buffer = dop.buffer; data_dop.cl_addr = (axi_cmd.addr + (i*64)); data_dop.len = dop.len; c2h_data_dma_list[chan].push_back(data_dop); burst_cnt++; end // for(int i = 0; i <= axi_cmd.len; i++) end // for(int burst_cnt=0; burst_cnt < num_of_data_beats; ) end // if ((c2h_dma_started[chan] != 1'b0) && (c2h_dma_list[chan].size() > 0)) end // for (int chan = 0; chan < 4; chan++) end // else begin end // always task poke_stat(input logic [7:0] addr, string intf, logic[31:0] data); case (intf) "ddr": begin @ (posedge clk_main_a0); sh_cl_ddr_stat_wr <= 1; sh_cl_ddr_stat_addr <= addr; sh_cl_ddr_stat_wdata <= data; sh_cl_ddr_stat_rd <= 0; @ (posedge clk_main_a0); sh_cl_ddr_stat_wr <= 0; @ (posedge clk_main_a0); while (cl_sh_ddr_stat_ack !== 1'b1) @ (posedge clk_main_a0); sh_cl_ddr_stat_wdata <= 'hX; end "hbm": begin tb.poke_ocl(.addr(`HBM_STAT_BUS_CL), .data(data)); end default: begin $display("FATAL ERROR - Invalid DDR/HBM"); $finish; end endcase // case (intf) endtask task peek_stat(input logic [7:0] addr, string intf, output logic[31:0] data); case (intf) "ddr": begin @ (posedge clk_main_a0); sh_cl_ddr_stat_wr <= 0; sh_cl_ddr_stat_addr <= addr; sh_cl_ddr_stat_rd <= 1; @ (posedge clk_main_a0); sh_cl_ddr_stat_rd <= 0; while (cl_sh_ddr_stat_ack !== 1'b1) @ (posedge clk_main_a0); #1; data = cl_sh_ddr_stat_rdata; @ (posedge clk_main_a0); end "hbm": begin tb.peek_ocl(.addr(`HBM_STAT_BUS_CL), .data(data)); end default: begin $display("FATAL ERROR - Invalid DDR/HBM"); $finish; end endcase // case (intf) endtask task automatic wait_clock(int count); repeat (count) begin @ (posedge clk_core); end endtask : wait_clock initial begin hbm_apb_preset_n_1 = 1'b0; hbm_apb_preset_n_0 = 1'b0; #100ns; hbm_apb_preset_n_1 = 1'b1; hbm_apb_preset_n_0 = 1'b1; end endmodule // sh_bfm