Verilog inout port assignment results in X - verilog

I am fairly new to Verilog and I am stuck on a particular problem of doing an assignment operation to an inout wire port. The issue I faced is after the assignment to the inout port -_memd_data in the processor_core module, I get XXXXXXXX as the result in the _memd_data variable when it should be 0x5b78193a.
Line with issue: assign _memd_data = (mem_dWE)? mem_o_data : 32'bz;
Before assigning the data, below are the lines of code that occured before the execution of that line above (can't figure out what is wrong, looks completely fine to me).
STR: //Result store
begin
if(memAddr==7) begin
csmemd = 1;
mem_o_data <= R[R1]; //STR r1, [r2]
mem_dWE = 1;
memAddr <= memAddr + Addr;
Brief explanation of this small project: Basically it simulates the workings of a single cycle processor. The processor_core module should be able to fetch memory from the imem module based on a given address and decode the instruction fetched, then load the data from the data memory which is the dmem module, then condition_uct a XOR operation on the data with an encryption key(0x5a5b5c5d) fetched from the imem instruction address and compute the result and store the resulting data back to the dmem module to replace the old data. Also simulates an encryption of data.
Note: I only need to implement the processor_core module, feels like I am so close, but yet so far from getting it done..
I am using Vivado software to code this.
TestBench
`timescale 1ns / 1ps
module Nexys4_MAT_Top(
//CLK Input
input CLK100MHZ ,
input CPU_RESETN,
//Push Button Inputs
input BTNC ,
// Slide Switch Inputs
input [15:0] SW ,
// LED Outputs
output [15:0] LED ,
// Seven Segment Display Outputs
output CA ,
output CB ,
output CC ,
output CD ,
output CE ,
output CF ,
output CG ,
output [ 7:0] AN ,
output DP
);
//CLK: 100MHz
parameter periodCLK_2 = 5;
parameter perioddump = 10;
parameter delay = 1;
parameter delay_in = 2;
//Clock & reset signals
wire clk_main ;
wire rstn ;
//Processor signals
wire [ 4:0] memAddr ;
wire [31:0] memData_I ;
wire [ 5:0] memAddr_d ;
wire [31:0] _memd_data ;
wire [31:0] _memd_data_cpu ;
wire dmem_wr ;
wire csmemd ;
reg CLK_TB;
reg RSTN;
// CLK_TB //
initial
begin
CLK_TB = 1'b0;
#(perioddump);
CLK_TB = 1'b1;
forever
begin
CLK_TB = !CLK_TB;
#(periodCLK_2);
end
end
initial begin
global_reset();
repeat(1) #(posedge CLK_TB); #delay;
repeat(1) #(posedge CLK_TB); #delay;
repeat(10) #(posedge CLK_TB);#delay;
end
task global_reset;
begin
repeat(2) #(posedge CLK_TB); #delay;
repeat(2) #(posedge CLK_TB); #delay;
RSTN = 1'b0;
repeat(2) #(posedge CLK_TB); #delay;
RSTN = 1'b1;
end
endtask
// Circuit implementation
clkrst u_clkrst(
.CLK100MHZ (CLK_TB ),
.rst_btn (RSTN ),
.clk_out (clk_main ),
.rstn (rstn )
);
processor_core u_processor(
.clk (clk_main ),
.rstn (rstn ),
.memAddr (memAddr ),
.memData_I (memData_I ),
.memAddr_d (_memd_data_cpu ),
._memd_data (_memd_data ),
.mem_dWE (mem_dWE ),
.csmemd (csmemd )
);
imem u_imem(
.clk (clk_main ),
.rstn (rstn ),
.addr (memAddr ),
.cs (1'b1 ),
.we (1'b0 ),
.data (memData_I )
);
dmem u_dmem(
.clk (clk_main ),
.rstn (rstn ),
.addr (_memd_data_cpu ),
.cs (csmemd ),
.we (mem_dWE ),
.data (_memd_data )
);
endmodule
Design sources
`timescale 1ns / 1ps
module processor_core(
input clk ,//Input clock
input rstn ,//Reset signal, low active
output [ 4:0] memAddr ,//instruction memory address
input [31:0] memData_I ,//instruction memory data
output [ 5:0] memAddr_d ,//data memory address
inout [31:0] _memd_data ,
output mem_dWE ,
output csmemd
);
// For I/O signals
reg [ 4:0] memAddr ;
reg [ 5:0] memAddr_d ;
reg mem_dWE ;
reg csmemd ;
// For internal signals
reg [31:0] _memd_data_i ;//The input data from dmem
reg [31:0] mem_o_data ;//The output data for dmem
parameter AND = 4'h2;
parameter SUB = 4'h4;
parameter ORR = 4'h5;
parameter XOR = 4'h1;
parameter ADD = 4'h2;
parameter MOV = 4'h3;
parameter LDR = 4'h5;
parameter STR = 4'h6;
parameter CMP = 4'h4;
parameter N = 3;
parameter Z = 2;
parameter C = 1;
parameter V = 0;
reg [3:0] nzcv_;
parameter Addr = 1;
reg[31:0] condition_;
reg[0:0] condition_tionCarry;
reg[3:0] Aluchk;
reg[3:0]Opcde_chk;
reg[31:0]Br_check;
reg[7:0]IVal;
reg[23:0]branc_offs;
reg[3:0] R1;
reg[3:0] R2;
reg[3:0] R3;
reg[31:0] R[15:0];
reg [31:0] b;
integer index;
integer clk_count;
assign _memd_data = (mem_dWE)? mem_o_data : 32'bz; //Issue with assignment
always # (posedge clk or negedge rstn) begin
if (!rstn) begin
_memd_data_i <= 32'h0;
end
else begin
_memd_data_i <= _memd_data;
end
end
always # (posedge clk or negedge rstn)
begin
if(!rstn) begin
nzcv_[N] = 0;
nzcv_[Z] = 0;
nzcv_[C] = 0;
nzcv_[V] = 0;
Br_check<=32'h0;
IVal<=0;
mem_o_data = 32'h0;
R1<=0;
R2<=0;
R3<=0;
end
else begin
Aluchk = memData_I[31:31];
R1 = memData_I[15:12];
R2 = memData_I[11:8];
R3 = memData_I[7:4];
branc_offs = memData_I[27:4];
Opcde_chk = memData_I[30:28];
Br_check = memData_I[31:0];
IVal = memData_I[27:16];
case(Br_check)
BEQ:
begin
if(nzcv_[Z] == 1'b1) //BEQ Done
$stop;
end
BNE:
begin
if(nzcv_[Z] == 1'b0)
memAddr <= memAddr - branc_offs;
end
case(Aluchk)
1'b1:
begin
case(Opcde_chk)
XOR: //ADD ompute Result
begin
if(memAddr==6) begin
// csmemd = 0;
R[R1] = R[R2] ^ R[R3];
// mem_dWE = 1;
// csmemd = 1;
mem_o_data <= R[R1];
memAddr <= memAddr + Addr;
end
end
ADD:
begin
// mem_dWE = 0;
if(memAddr==8) begin
mem_dWE = 0;
// R[R2]<=R[R2]+IVal;
memAddr_d<=memAddr_d+IVal;
// memAddr_d = R[R2];
memAddr <= memAddr + Addr;
end
end
endcase
end
1'b0:
begin
case(Opcde_chk)
MOV:
begin
clk_count = clk_count + 1;
if(memAddr==0) begin
R[R3][31:24] <= IVal;
end
if(memAddr==4 && clk_count == 5) begin
R[R2] <= IVal;
end
memAddr <= memAddr + Addr;
end
LDR:
begin
if(memAddr==5) begin
csmemd = 1;
R[R1] = 0;
R[R1] = _memd_data_i; //LDR r1, [r2]
memAddr <= memAddr + Addr;
end
end
STR: //Result store
begin
if(memAddr==7) begin
csmemd = 1;
mem_o_data <= R[R1];
mem_dWE = 1;
memAddr <= memAddr + Addr;
// mem_o_data = R[R1];
end
end
endcase
end
endcase
end
end
endmodule
`timescale 1ns / 1ps
module dmem(
input clk ,//Input clock
input rstn ,//Reset signal, low active
input [ 5:0] addr ,//memory address
input cs ,
input we ,
inout [31:0] data
);
// For I/O signals
// For internal signals
reg [31:0] mem[0:63] ;
// Circuit implementation
assign data = (cs) ? mem[addr] : 32'bz;
always # (posedge clk or negedge rstn) begin
if (!rstn) begin
//The hard coded data, or plain text
mem[ 0] <= 32'h43314220;
mem[ 1] <= 32'h42020032;
mem[ 2] <= 32'h00650039;
mem[ 3] <= 32'h01150032;
mem[ 4] <= 32'h01150097;
mem[ 5] <= 32'h01020101;
mem[ 6] <= 32'h00320116;
mem[ 7] <= 32'h01010120;
mem[ 8] <= 32'h01160032;
mem[ 9] <= 32'h01190105;
mem[10] <= 32'h01160104;
mem[11] <= 32'h00320099;
mem[12] <= 32'h01110100;
mem[13] <= 32'h01010032;
mem[14] <= 32'h00480120;
mem[15] <= 32'h00490050;
mem[16] <= 32'h00510052;
mem[17] <= 32'h00330000;
end
else begin
if (we&&cs)
begin
mem[addr] <= data;
end
end
end
endmodule
`timescale 1ns / 1ps
`timescale 1ns / 1ps
module imem(
input clk ,//Input clock
input rstn ,//Reset signal, low active
input [ 4:0] addr ,//memory address
input cs ,
input we ,
inout [31:0] data
);
// For I/O signals
// For internal signals
reg [31:0] mem[0:11] ;
// Circuit implementation
assign data = (cs) ? mem[addr] : 32'bz;
always # (posedge clk or negedge rstn) begin
if (!rstn) begin
//The hard coded instructions
mem[ 0] <= 32'h10330030;
mem[ 1] <= 32'h203f0030;
mem[ 2] <= 32'h30120030;
mem[ 3] <= 32'h40220030;
mem[ 4] <= 32'h30000230;
mem[ 5] <= 32'h50001200;
mem[ 6] <= 32'h90005130;
mem[ 7] <= 32'h10006000;
mem[ 8] <= 32'ha3011430;
mem[ 9] <= 32'h40120400;
mem[10] <= 32'h200000b1;
mem[11] <= 32'h20000000;
end
else begin
if (cs && we)
begin
mem[addr] = data;
end
end
end
endmodule
module clkrst(
input CLK100MHZ ,//On-board input clock
input rst_btn ,//On-board reset from button, HIGH active
output clk_out ,//The working clk for the rest of circuit
output rstn //The working reset for the rest of circuit, LOW active
);
// For I/O signals
reg clk_50m ;
reg clk_25m ;
// For internal signals
// Circuit implementation
assign rstn = rst_btn;
assign clk_out = CLK100MHZ;
endmodule

In your testbench, the dmem_data wire is connected to 2 module instance output ports:
processor_core u_processor(
.dmem_data (dmem_data ),
//...
);
dmem u_dmem(
.data (dmem_data )
//...
);
That is fine, but only if one of them is active. Your problem is that both drivers are active at the same time. These are the 2 drivers:
assign dmem_data = (dmem_we)? dmem_data_o : 32'bz; //Issue with assignment
assign data = (cs) ? mem[addr] : 32'bz;
Since dmem_we and cs are both 1 at the same time (starting at time 145ns, for example), both are trying to drive the same signal with different values. This results in contention which is why you get X (unknown).
Here is one place where the tristate enables are both set to 1:
if(imem_addr==7) begin
dmem_cs = 1;
dmem_data_o <= R[R1]; //STR r1, [r2]
dmem_we = 1;
You need to change this logic.

Related

UART Transmit and receive data does not start (Vivado)

I can't figure out why is it that when I set the clock frequency from 50MHz to 100MHz, by changing the clk period to 5 in the testbench, my output transmit and receive data stays at 0. Can anyone enlighten me on this? I need my clock frequency to be 100MHz. Your help will be much appreciated.
Testbench
`timescale 1ns / 1ps
module uart_tx_test();
parameter periodCLK_2 = 5;
parameter perioddump = 10;
parameter delay = 1;
parameter delay_in = 2;
reg CLK_TB = 0 ;
reg RSTN ;
reg [7:0] data = 0;
reg clk = 0;
reg enable = 0;
wire tx_busy;
wire rdy;
wire [7:0] rxdata;
wire loopback;
reg rdy_clr = 0;
uart test_uart(.din(data),
.wr_en(enable),
.clk_50m(clk),
.tx(loopback),
.tx_busy(tx_busy),
.rx(loopback),
.rdy(rdy),
.rdy_clr(rdy_clr),
.dout(rxdata));
initial begin
// $dumpfile("uart.vcd");
$dumpvars(0, uart_tx_test);
enable <= 1'b1;
#2 enable <= 1'b0;
end
always begin
#5 clk = ~clk; //I set period to 5; period was 1 previously.
end
always #(posedge rdy) begin
#2 rdy_clr <= 1;
#2 rdy_clr <= 0;
if (rxdata != data) begin
$display("FAIL: rx data %x does not match tx %x", rxdata, data);
$finish;
end else begin
if (rxdata == 8'hff) begin
$display("SUCCESS: all bytes verified");
$finish;
end
data <= data + 1'b1;
enable <= 1'b1;
#2 enable <= 1'b0;
end
end
endmodule
Design Sources
module uart(
input wire [7:0] din,
input wire wr_en,
input wire clk_50m,
output wire tx,
output wire tx_busy,
input wire rx,
input wire rdy_clr,
output wire rdy,
output wire [7:0] dout
);
wire rxclk_en, txclk_en;
baud_rate_gen uart_baud(
.clk_50m(clk_50m),
.rxclk_en(rxclk_en),
.txclk_en(txclk_en)
);
transmitter uart_tx(
.tx(tx),
.din(din),
.clk_50m(clk_50m),
.clken(txclk_en),
.wr_en(wr_en),
.tx_busy(tx_busy)
);
receiver uart_rx(
.rx(rx),
.data(dout),
.clk_50m(clk_50m),
.clken(rxclk_en),
.rdy(rdy),
.rdy_clr(rdy_clr)
);
endmodule
/*
* Hacky baud rate generator to divide a 50MHz clock into a 9600 baud
* rx/tx pair where the rx clcken oversamples by 16x.
*/
module baud_rate_gen(input wire clk_50m,
output wire rxclk_en,
output wire txclk_en);
parameter RX_ACC_MAX = 100000000 / (9600 * 16);
parameter TX_ACC_MAX = 100000000 / 9600;
parameter RX_ACC_WIDTH = $clog2(RX_ACC_MAX);
parameter TX_ACC_WIDTH = $clog2(TX_ACC_MAX);
reg [RX_ACC_WIDTH - 1:0] rx_acc = 0;
reg [TX_ACC_WIDTH - 1:0] tx_acc = 0;
assign rxclk_en = (rx_acc == 5'd0);
assign txclk_en = (tx_acc == 9'd0);
always #(posedge clk_50m) begin
if (rx_acc == RX_ACC_MAX[RX_ACC_WIDTH - 1:0])
rx_acc <= 0;
else
rx_acc <= rx_acc + 5'b1;
end
always #(posedge clk_50m) begin
if (tx_acc == TX_ACC_MAX[TX_ACC_WIDTH - 1:0])
tx_acc <= 0;
else
tx_acc <= tx_acc + 9'b1;
end
endmodule
module transmitter(
input wire [7:0] din,
input wire wr_en,
input wire clk_50m,
input wire clken,
output reg tx,
output wire tx_busy
);
initial begin
tx = 1'b1;
end
parameter STATE_IDLE = 2'b00;
parameter STATE_START = 2'b01;
parameter STATE_DATA = 2'b10;
parameter STATE_STOP = 2'b11;
reg [7:0] data = 8'h00;
reg [2:0] bitpos = 3'h0;
reg [1:0] state = STATE_IDLE;
always #(posedge clk_50m) begin
case (state)
STATE_IDLE: begin
if (wr_en) begin
state <= STATE_START;
data <= din;
bitpos <= 3'h0;
end
end
STATE_START: begin
if (clken) begin
tx <= 1'b0;
state <= STATE_DATA;
end
end
STATE_DATA: begin
if (clken) begin
if (bitpos == 3'h7)
state <= STATE_STOP;
else
bitpos <= bitpos + 3'h1;
tx <= data[bitpos];
end
end
STATE_STOP: begin
if (clken) begin
tx <= 1'b1;
state <= STATE_IDLE;
end
end
default: begin
tx <= 1'b1;
state <= STATE_IDLE;
end
endcase
end
assign tx_busy = (state != STATE_IDLE);
endmodule
module receiver(
input wire rx,
input wire rdy_clr,
input wire clk_50m,
input wire clken,
output reg rdy,
output reg [7:0] data
);
initial begin
rdy = 0;
data = 8'b0;
end
parameter RX_STATE_START = 2'b00;
parameter RX_STATE_DATA = 2'b01;
parameter RX_STATE_STOP = 2'b10;
reg [1:0] state = RX_STATE_START;
reg [3:0] sample = 0;
reg [3:0] bitpos = 0;
reg [7:0] scratch = 8'b0;
always #(posedge clk_50m) begin
if (rdy_clr)
rdy <= 0;
if (clken) begin
case (state)
RX_STATE_START: begin
/*
* Start counting from the first low sample, once we've
* sampled a full bit, start collecting data bits.
*/
if (!rx || sample != 0)
sample <= sample + 4'b1;
if (sample == 15) begin
state <= RX_STATE_DATA;
bitpos <= 0;
sample <= 0;
scratch <= 0;
end
end
RX_STATE_DATA: begin
sample <= sample + 4'b1;
if (sample == 4'h8) begin
scratch[bitpos[2:0]] <= rx;
bitpos <= bitpos + 4'b1;
end
if (bitpos == 8 && sample == 15)
state <= RX_STATE_STOP;
end
RX_STATE_STOP: begin
/*
* The baud clock may not be running at exactly the
* same rate as the transmitter. If we thing that
* we're at least half way into the stop bit, allow
* transition into handling the next start bit.
*/
if (sample == 15 || (sample >= 8 && !rx)) begin
state <= RX_STATE_START;
data <= scratch;
rdy <= 1'b1;
sample <= 0;
end else begin
sample <= sample + 4'b1;
end
end
default: begin
state <= RX_STATE_START;
end
endcase
end
end
endmodule
You need to scale all your other delays accordingly. Change all your #2 to #10, then you will see the SUCCESS: all bytes verified message.
With your original clock delay of #1, your other input signal pulses (enable and rdy_clr) were wide enough for your uart design module to sample properly. For example, on the 1st posedge of clk, your design properly sampled the enable input as 1, which started the TX state machine.
You increased the clock period by a factor of 5 when you changed the delay from #1 to #5. However, your enable pulse stayed the same width as before, which means that the design sampled enable as 0, not 1. So your TX state machine stayed in the IDLE state. By changing the enable delay from #2 to #10, you are able to properly sample enable as 1.
You can easily prove this to yourself by dumping a VCD file, and viewing the waveforms inside the design.
You could replace the numeric delays with a parameter to make it easier to change to different frequencies.
Note: You stated the clk delay was originally #1. This gives the clk signal a period of 2ns, which is 500MHz, not 50MHz.

Why doesn't the up/down counter count down?

I was doing the Logic Design assignment, and I found some problems I can't solve.
I need to design a 6-bit counter, and this counter needs to count with two functions, for up and down respectively.
I have done the up part and down part, but when I run the simulation, the counting down part doesn't work correctly.
The function for counting down: the next a = a - 2^n, where n = 0, 1, 2, 3... eg. a1 = 63, a2 = 63 - 1 = 62, a3 = 62 - 2 = 60, a4 = 56...
But the simulation with my program, it becomes 63, 62, 61(63 - 2), 59(63 - 4)...
By the way, this assignment has a reset feature.
However, my program won't keep counting after being reset.
It should back to zero and continue counting theoretically.
The following is my code:
`timescale 1ns/100ps
module lab2_1(
input clk,
input rst,
output reg [5:0] out
);
reg [5:0] cnt;
wire [5:0] cnt_next;
reg updown;
wire [5:0] out_next;
initial begin
out = 0;
cnt = 1;
updown = 1;
end
assign cnt_next = (out == 6'b111111) ? 0 : cnt + 1;
assign out_next = out - (2**cnt);
always #(*) begin
if(out == 6'b111111)begin
updown = 0;
end
if(out == 6'b000000)begin
updown = 1;
end
if(rst == 1) begin
out = 0;
updown = 1;
cnt = 0;
end
end
always #(posedge clk, posedge rst) begin
if(updown == 1)begin
if(out > cnt)begin
out <= out - cnt;
end
else
out <= out + cnt;
end
else begin
out <= out_next;
end
cnt <= cnt_next;
end
endmodule
The testbench just monitors the output and drives the inpupts.
`timescale 1ns/100ps
module lab2_1_t;
wire [5:0] out;
reg clk;
reg rst;
lab2_1 v(clk, rst, out);
initial begin
clk = 0;
rst = 0;
$monitor($time,":clk = %b, rst = %b, out = %d", clk, rst, out);
end
always #10 clk = ~clk;
always #10000 rst = ~rst;
endmodule
You should not make assignments to the same signal (such as cnt) from multiple blocks. You can assign to cnt from a single always block. I don't think you need both out and cnt.
Here is a simplified version which automatically switches between up and down:
module lab2_1(
input clk,
input rst,
output reg [5:0] cnt
);
reg updown;
always #(posedge clk, posedge rst) begin
if (rst) begin
updown <= 1;
end else if (cnt == 6'b111110) begin
updown <= 0;
end else if (cnt == 6'b000001) begin
updown <= 1;
end
end
always #(posedge clk, posedge rst) begin
if (rst) begin
cnt <= 0;
end else if (updown) begin
cnt <= cnt + 1;
end else begin
cnt <= cnt - 1;
end
end
endmodule
The code shows a more typical use of the reset signal in the sequential always block. See also for more examples.
Here is a modified testbench where the reset is asserted at time 0, then released after a couple clock cycles. At the end, it asserts reset again so you can see that the counter goes to 0.
module lab2_1_t;
wire [5:0] out;
reg clk;
reg rst;
lab2_1 v(clk, rst, out);
initial begin
$monitor($time,":clk = %b, rst = %b, out = %d", clk, rst, out);
clk = 0;
rst = 1;
#40 rst = 0;
#10000 rst = 1;
#1000 $finish;
end
always #10 clk = ~clk;
endmodule

How can I modify this code? Error is coming out

I'm designing some codes of data bus-system by using ideal SRAM and CPU. I want to write memory mem[0] -> IR, and read memory IR -> mem[1], and finally write memory mem[1] -> DR.
But I'm having some problems.
Here are the codes, and I'm very confusing about making port of input, output, reg, wire. I'm having hard time using 2 DUTs by 1 Testbench. How can I avoid error by modifying this codes?
module sram(addr,clk,din,dout,we);
parameter addr_width = 12, word_depth = 4096, word_width = 16;
input clk,we;
input [addr_width-1:0] addr;
input [word_width-1:0] din;
output [word_width-1:0] dout;
reg [word_width-1:0] mem [0:word_depth-1];
reg [word_width-1:0] dout;
always #(posedge clk) begin
if(!we)
mem[addr] <= din[word_width-1:0];
end
always #(posedge clk) begin
#1 dout <= mem[addr];
end
endmodule
module cpu(clk,load,reset,select,ir,dr,ac,ar,pc,addr,we);
input clk,reset;
input [1:0]select;
input [1:0]load;
output reg[15:0] ir,dr,ac;
output reg[11:0] ar,pc;
input we;
input [11:0] addr;
reg[15:0] din;
wire[15:0] dout;
sram sram(addr,clk,din,dout,we);
always # (posedge clk or negedge reset) begin
if(!reset) begin
ar <= 12'b0; ir <= 16'b0; pc <= 12'b0; dr <= 16'b0; ac <= 16'b0;
end
if(select==2'b01 && load==2'b01 && we==1)
ir[15:0] <= dout[15:0];
else if(select==2'b01 && load==2'b10 && we==0)
din[15:0] <= ir[15:0];
else if(select==2'b10 && load==2'b01 && we==1)
dr[15:0] <= dout[15:0];
end
endmodule
module tb_cpu();
parameter addr_width = 12, word_depth = 4096, word_width = 16;
reg clk,reset,we;
reg [1:0]select;
reg [1:0]load;
reg [addr_width-1:0] addr;
wire [word_width-1:0] ir,dr,ac;
wire [word_width-5:0] ar,pc;
integer file_pointer;
integer file_pointer2;
cpu cpu(clk,load,reset,select,ir,dr,ac,ar,pc,addr,we);
always #5 clk = ~clk;
initial begin
clk = 0; addr = 12'b0; we = 0; reset = 1;
#2 reset = 0; #2 reset = 1;
$readmemb("sram.dat", tb_cpu.cpu.sram.mem);
file_pointer = $fopen("reg.dat");
file_pointer2 = $fopen("memory.dat");
#10 select = 2'b01; load = 2'b01; we = 1; addr = 12'b000000000000; //cycle 1
#10 select = 2'b01; load = 2'b10; we = 0; addr = 12'b000000000001; //cycle 2
#10 select = 2'b10; load = 2'b01; we = 1; addr = 12'b000000000001; //cycle 3
$fdisplay(file_pointer, "AR = %b", tb_cpu.cpu.ar);
$fdisplay(file_pointer, "IR = %b", tb_cpu.cpu.ir);
$fdisplay(file_pointer, "PC = %b", tb_cpu.cpu.pc);
$fdisplay(file_pointer, "DR = %b", tb_cpu.cpu.dr);
$fdisplay(file_pointer, "AC = %b", tb_cpu.cpu.ac);
$fdisplay(file_pointer2, "mem[0000 0000 0000] = %b",tb_cpu.cpu.sram.mem[000000000000]);
$fdisplay(file_pointer2, "mem[0000 0000 0001] = %b",tb_cpu.cpu.sram.mem[000000000001]);
$fdisplay(file_pointer2, "mem[0000 0000 0010] = %b",tb_cpu.cpu.sram.mem[000000000010]);
$fclose(file_pointer);
$fclose(file_pointer2);
#10 $finish;
end
endmodule
I get compile errors for your code in the tb_cpu module regarding the tb_cpu.sram hierarchical specifier. You should change all:
tb_cpu.sram
to:
tb_cpu.cpu.sram
For example, change:
$readmemb("sram.dat", tb_cpu.sram.mem);
to:
$readmemb("sram.dat", tb_cpu.cpu.sram.mem);
After I fix those compile errors, I also see compile warnings related to addr and we. I think you need to add addr and we input ports to the cpu module, with proper connections.
Module cpu:
module cpu(clk,load,reset,select,ir,dr,ac,ar,pc,addr,we);
parameter addr_width = 12, word_depth = 4096, word_width = 16;
input we;
input [addr_width-1:0] addr;
Module tb_cpu:
cpu cpu(clk,load,reset,select,ir,dr,ac,ar,pc,addr,we);

Fifo block implementation

i wrote a fifo in system verilog
i try to push some data to this fifo (i wrote a tb) and when i push data the fifo_wr_ptr, fifo_fre_space,fifo_used_space don't update (only data write to mem[0])
i will be glad for help (why my ptr don't increment by 1 for example)
Thanks alot!
and here is my simulation that shows my problem:
i attached my code:
module fifo
#(parameter WIDTH = 32, parameter DEPTH = 64 ) ( clk, rst_l, sw_rst, fifo_din, fifo_push_en, fifo_pop_en, fifo_dout, fifo_o_full, fifo_o_empty, fifo_used_space, fifo_free_space );
function integer log2; //can use the $clog2() function
input [31:0] value;
reg [31:0] value_tmp;
begin value_tmp = value; for(log2=0; value_tmp>0; log2=log2+1)
value_tmp=(value_tmp>>1);
end endfunction
localparam DEPTH_LOG2 = log2(DEPTH);
//interface input clk; input rst_l; input sw_rst; input[WIDTH-1:0] fifo_din; input fifo_push_en; input fifo_pop_en; output logic[WIDTH-1:0] fifo_dout; output logic fifo_o_full; output logic fifo_o_empty; output logic[DEPTH_LOG2-1:0] fifo_used_space; output logic[DEPTH_LOG2-1:0] fifo_free_space; logic debug_flag; //internal logic logic[WIDTH-1:0] mem[DEPTH_LOG2-1:0]; logic[DEPTH_LOG2-1:0] fifo_rd_ptr,fifo_wr_ptr;
assign fifo_o_empty = (fifo_used_space==0); assign fifo_o_full = (fifo_free_space==0);
always # (posedge clk or negedge rst_l) begin if(~rst_l) begin
fifo_free_space <= DEPTH;
fifo_used_space <= 0;
fifo_rd_ptr <= 0;
fifo_wr_ptr <= 0;
debug_flag <=0 ;
end else if (~sw_rst) begin
fifo_free_space <= DEPTH;
fifo_used_space <= 0;
fifo_rd_ptr <= 0;
fifo_wr_ptr <= 0;
debug_flag <= 0;
end else if(fifo_push_en==1 && fifo_o_full==0 && fifo_pop_en==0) begin //the fifo isn't full and can perform the write trasaction (and no read transaction)
fifo_used_space <= fifo_used_space + 1;
fifo_free_space <= fifo_free_space - 1;
mem[fifo_wr_ptr]<= fifo_din;
debug_flag <= 1;
if(fifo_wr_ptr == (DEPTH - 1))
fifo_wr_ptr <= 0;
else
fifo_wr_ptr++;
end else if (fifo_pop_en==1 && fifo_o_empty==0 && fifo_push_en==0) begin // the fifo isn't empty and can perform the read trasaction (and no write trasaction)
fifo_used_space <= fifo_used_space - 1;
fifo_free_space <= fifo_free_space + 1;
fifo_dout <= mem[fifo_rd_ptr];
if(fifo_rd_ptr == (DEPTH - 1)) begin
fifo_rd_ptr <= 0;
end else begin
fifo_rd_ptr <= fifo_rd_ptr + 1;
end end else begin
fifo_rd_ptr <= fifo_rd_ptr;
//fifo_wr_ptr <= fifo_wr_ptr;
//fifo_dout <= fifo_dout;
//fifo_used_space <= fifo_used_space;
fifo_free_space <= fifo_free_space; end end
endmodule
and here is the tb code:
`define WIDTH 32
`define DEPTH 64
module fifo_tb();
function integer log2; //can use the $clog2() function
input [31:0] value;
reg [31:0] value_tmp;
begin
value_tmp = value;
for(log2=0; value_tmp>0; log2=log2+1)
value_tmp=(value_tmp>>1);
end
endfunction
localparam DEPTH_LOG2 = log2(`DEPTH);
logic clk,rst_l,sw_rst,fifo_push_en,fifo_pop_en,fifo_o_full,fifo_o_empty;
logic[`WIDTH-1:0] fifo_din,fifo_dout,tempdata;
logic[DEPTH_LOG2-1:0] fifo_used_space,fifo_free_space;
fifo #(`WIDTH,`DEPTH) ff(.clk(clk), .rst_l(rst_l), .sw_rst(sw_rst), .fifo_din(fifo_din),
.fifo_push_en(fifo_push_en), .fifo_pop_en(fifo_pop_en),
.fifo_dout(fifo_dout), .fifo_o_full(fifo_o_full), .fifo_o_empty(fifo_o_empty),
.fifo_used_space(fifo_used_space), .fifo_free_space(fifo_free_space) );
initial
begin
clk =0;
rst_l = 0;
sw_rst= 0;
fifo_push_en=0;
fifo_pop_en=0;
fifo_din=0;
tempdata=0;
#15 rst_l=1;
#1 sw_rst=1;
push(10);
push(20);
push(30);
push(40);
pop(tempdata);
push(tempdata);
end
always
#5 clk=~clk;
task push;
input[`WIDTH-1:0] data;
if(fifo_o_full)
$display("--- Cannot push: Buffer full ----");
else begin
$display("Pushed: ",data);
#(posedge clk);
fifo_din = data;
fifo_push_en=1;
#(posedge clk);
fifo_push_en=0;
end
endtask
task pop;
output [`WIDTH-1:0] data;
if(fifo_o_empty)
$display("Cannot pop: buffer empty ---");
else begin
#(posedge clk);
fifo_pop_en=1;
#(posedge clk);
fifo_pop_en=0;
data=fifo_dout;
$display("----- Poped : ",data);
end
endtask
endmodule
Taking aside the oddity related to pointer incrementation, the code itself is confusing and difficult to deal with. Pasting reference FIFO module that should do the job, this also should help you to grasp on basics of coding style.
//----------------------------------------------------
// Module Name: fifo_sync.v
//----------------------------------------------------
// Description: generic sync FIFO module
//----------------------------------------------------
module fifo_sync #
(
parameter FIFO_DATA_WIDTH = 'd32,
parameter FIFO_PTR_WIDTH = 'd6
)
(
//------------------------------------------------
// Inputs
//------------------------------------------------
input clk,
input rst_n,
input wr_en,
input [FIFO_DATA_WIDTH-1:0] wr_data,
input rd_en,
//------------------------------------------------
// Outputs
//------------------------------------------------
output reg [FIFO_DATA_WIDTH-1:0] rd_data,
output stat_full,
output stat_empty,
output [ FIFO_PTR_WIDTH-1:0] stat_occupancy
);
//------------------------------------------------
// Local Parameters
//------------------------------------------------
localparam FIFO_DEPTH = 2**(FIFO_PTR_WIDTH-1);
//------------------------------------------------
// Internal Register(s)/Wire(s)/Integer(s)
//------------------------------------------------
reg [ FIFO_PTR_WIDTH-1:0] wr_ptr;
reg [ FIFO_PTR_WIDTH-1:0] rd_ptr;
reg [FIFO_DATA_WIDTH-1:0] fifo_array [FIFO_DEPTH-1:0];
integer int_i;
//------------------------------------------------
// Write Pointer Logic
//------------------------------------------------
always #(posedge clk or negedge rst_n)
begin: p_wr_ptr
if (!rst_n)
wr_ptr <= {FIFO_PTR_WIDTH{1'b0}};
else if (wr_en & !stat_full)
wr_ptr <= wr_ptr + 1'b1;
end
//------------------------------------------------
// Read Pointer Logic
//------------------------------------------------
always #(posedge clk or negedge rst_n)
begin: p_rd_ptr
if (!rst_n)
rd_ptr <= {FIFO_PTR_WIDTH{1'b0}};
else if (rd_en & !stat_empty)
rd_ptr <= rd_ptr + 1'b1;
end
//------------------------------------------------
// Status Interface
//------------------------------------------------
// FIFO full status flag
assign stat_full = (wr_ptr[FIFO_PTR_WIDTH-1] ^ rd_ptr[FIFO_PTR_WIDTH-1]) & (wr_ptr[FIFO_PTR_WIDTH-2:0] == rd_ptr[FIFO_PTR_WIDTH-2:0]);
// FIFO empty status flag
assign stat_empty = (wr_ptr == rd_ptr);
// FIFO occupancy status
assign stat_occupancy = wr_ptr - rd_ptr;
//-----------------------------------------------
// FIFO Write
//-----------------------------------------------
always #(posedge clk or negedge rst_n)
begin: p_fifo_write
if (!rst_n)
for (int_i = 0; int_i < FIFO_DEPTH - 1; int_i = int_i + 1)
fifo_array[int_i] <= {FIFO_DATA_WIDTH{1'b0}};
else if (wr_en & !stat_full)
fifo_array[wr_ptr] <= wr_data;
end
//-----------------------------------------------
// FIFO Read
//-----------------------------------------------
always #(posedge clk or negedge rst_n)
begin: p_fifo_read
if (!rst_n)
rd_data <= {FIFO_DATA_WIDTH{1'b0}};
else if (rd_en & !stat_empty)
rd_data <= fifo_array[rd_ptr];
end
endmodule

Generating unsigned number for booth multiplier

For an academic excercise, I have implemented a 32-bit Karatsuba multiplier which takes 17 cycles to run by doing parallel multiplication of 16 bits each and shifting them accordingly.
I am getting an issue where the partial products need to be unsigned, but booth multiplier is generating signed partial product for me, regardless of the input type I give, because of which I get incorrect partial products. How can I solve this?
For eg. my two signed inputs are 0xA000_000A and 0x000A_A000. So the first partial product of A000 * 000A should be 64000 but I get 0xFFFC4000 (FFFF_A000 * 0000_000A). I have shared my code here for the booth mult and its testbench.
module booth_multiplier
(
input logic clk,
input logic rst,
input logic valid,
input logic signed [15:0] Mul_X,
input logic signed [15:0] Mul_Y,
output logic signed [31:0] product,
output logic result_ready
);
logic unsigned Q_1;
bit [4:0] count;
logic signed [15:0] multiplier;
logic signed [15:0] multiplicand;
logic [15:0] A, temp_A;
logic signed [32:0] partial_product;
logic signed [32:0] partial_multiplier;
typedef enum {IDLE=0, OPERATE} fsm;
fsm state, next_state;
parameter ADD = 2'b01, SUB = 2'b10;
//assign product = multiplier[16:1];
always#(posedge clk or negedge rst)
begin
if(~rst)
begin
count <= 0;
state <= IDLE;
multiplier <= 0;
multiplicand <= 0;
end
else begin
count <= count+1;
state <= next_state;
end
end
always#(*)
begin
case(state)
IDLE : begin
Q_1 = 0;
A = 0;
count = 0;
product = 0;
temp_A = 0;
result_ready = 0;
if(valid) begin
multiplicand = Mul_X;
multiplier = Mul_Y;
partial_product = {A, multiplier, Q_1};
partial_multiplier = 0;
next_state = OPERATE;
end
end
OPERATE: begin
case(partial_product[1:0])
ADD: begin
temp_A = A + multiplicand;
multiplier = partial_product[16:1];
partial_multiplier = {temp_A, multiplier, Q_1};
partial_product = partial_multiplier >>> 1;
Q_1 = partial_product[0];
A = partial_product[32:17];
end
SUB: begin
temp_A = A - multiplicand;
multiplier = partial_product[16:1];
partial_multiplier = {temp_A, multiplier, Q_1};
partial_product = partial_multiplier >>> 1;
Q_1 = partial_product[0];
A = partial_product[32:17];
end
default: begin
temp_A = A;
multiplier = partial_product[16:1];
partial_multiplier = {temp_A, multiplier, Q_1};
partial_product = partial_multiplier >>> 1;
Q_1 = multiplier[0];
A = partial_product[32:17];
end
endcase
if(count == 16) begin
next_state = IDLE;
product = partial_product >> 1;
result_ready = 1;
end
else next_state = OPERATE;
end
endcase
end
endmodule
This I am using to do 4 parallel multiplications in
module fast_multiplier
(
input logic clk,
input logic rst,
input valid,
input logic signed [31:0] multiplicand,
input logic signed [31:0] multiplier,
output logic signed [63:0] product,
output logic ready);
logic [15:0] X1;
logic [15:0] Y1;
logic [15:0] Xr;
logic [15:0] Yr;
logic [31:0] X1_Yr;
logic [31:0] Xr_Yr;
logic [31:0] X1_Y1;
logic [31:0] Xr_Y1;
logic ready1, ready2, ready3, ready4;
assign X1 = multiplicand[31:16];
assign Y1 = multiplier[31:16];
assign Xr = multiplicand[15:0];
assign Yr = multiplier[15:0];
booth_multiplier X1Y1
(
.clk(clk),
.rst(rst),
.valid(valid),
.Mul_X(X1),
.Mul_Y(Y1),
.product(X1_Y1),
.result_ready(ready1));
booth_multiplier X1Yr
(
.clk(clk),
.rst(rst),
.valid(valid),
.Mul_X(X1),
.Mul_Y(Yr),
.product(X1_Yr),
.result_ready(ready2));
booth_multiplier XrY1
(
.clk(clk),
.rst(rst),
.valid(valid),
.Mul_X(Xr),
.Mul_Y(Y1),
.product(Xr_Y1),
.result_ready(ready3));
booth_multiplier XrYr
(
.clk(clk),
.rst(rst),
.valid(valid),
.Mul_X(Xr),
.Mul_Y(Yr),
.product(Xr_Yr),
.result_ready(ready4));
always#(posedge clk or negedge rst)
begin
if(~rst)
begin
product <= 0;
ready <= 0;
X1_Yr <= 0;
X1_Y1 <= 0;
Xr_Yr <= 0;
Xr_Y1 <= 0;
end
else begin
product <= ({32'b0,X1_Y1} << 32) + (({32'b0,X1_Yr} + {32'b0,Xr_Y1}) << 16) + {32'b0,Xr_Yr};
ready <= ready1 & ready2 & ready3 & ready4;
end
end
endmodule
Also, sharing the testbench,
module top_booth_multiplier ();
logic clk;
logic rst;
logic valid;
logic signed [31:0] multiplicand;
logic signed [31:0] multiplier;
logic signed [63:0] product;
logic ready;
fast_multiplier booth (.*);
initial
begin
clk = 0;
forever #10 clk = ~clk;
end
initial
begin
rst = 0;
#7 rst = 1;
#(posedge clk) valid <= 1;
multiplier = 32'hA000000A;
multiplicand = 32'h000AA000;
#(posedge clk) valid <= 0;
while(ready == 0)
begin
#(posedge clk);
end
repeat (20) #(posedge clk);
$finish;
end
endmodule
You need to consider "signed" inputs in booth multiplier ONLY for X1Y1 instance. All other instances MUST use "unsigned" inputs. This change should help!

Resources