Register file not reading any data - verilog

I am trying to design a state machine that counts through and replaces values between 47 and 58. In my waveform though, I keep getting XXX for my R_data in the register file, and thus the rest of my top level design is thrown off. I can't seem to find a reason as to why R_data is outputting XXX for any address with R_en = 1. I'm on Vivado 2020.2; thank you for any help, and please let me know if I need to clarify anything.
Register:
`timescale 1ns / 1ps
module RegFile16x8(R_Addr, W_Addr, R_en, W_en, R_Data, W_Data, Clk, Rst);
input [3:0] R_Addr, W_Addr;
input Clk, Rst, R_en, W_en;
output reg [7:0] R_Data;
input [7:0] W_Data;
reg [7:0] RegFile [0:15];
always #(posedge Clk) begin
if (Rst == 1) begin
RegFile[0] <= 8'd48;
RegFile[1] <= 8'd53;
RegFile[2] <= 8'd68;
RegFile[3] <= 8'd57;
RegFile[4] <= 8'd55;
RegFile[5] <= 8'd59;
RegFile[6] <= 8'd40;
RegFile[7] <= 8'd49;
RegFile[8] <= 8'd31;
RegFile[9] <= 8'd38;
RegFile[10] <= 8'd54;
RegFile[11] <= 8'd50;
RegFile[12] <= 8'd63;
RegFile[13] <= 8'd58;
RegFile[14] <= 8'd70;
RegFile[15] <= 8'd51;
end
else if (W_en==1) begin
RegFile[W_Addr] <= W_Data;
end
end
always #(*) begin
if (R_en==1)
R_Data <= RegFile[R_Addr];
else
R_Data <= 8'bZZZZZZZZ;
end
endmodule
Top Level:
`timescale 1ns / 1ps
module PartA(Clk, Rst, go, done, count);
input Clk, Rst, go;
output reg [6:0] count;
output reg done;
reg [2:0] State, StateNext;
parameter s1 = 0, s2 = 1, s3 = 2, s4 = 3, s5 = 4, s6 = 5, s7 = 6, s8 = 7;
reg [4:0] i;
reg [7:0] temp;
reg R_en, W_en;
reg [7 :0] a_i;
wire [7:0] a_o;
//RegFile16x8(R_Addr, W_Addr, R_en, W_en, R_Data, W_Data, Clk, Rst);
RegFile16x8 r1(i[3:0], i[3:0], R_en, W_en, a_o, a_i, Clk, Rst);
always #(State, Rst, count) begin
R_en = 0;
W_en = 0;
case(State)
s1: begin
if(go == 1)
StateNext = s2;
else
StateNext = s1;
end
s2: begin
done = 0;
count = 0;
i = 0;
StateNext = s3;
end
s3: begin
if(i < 16)
StateNext = s4;
else
StateNext = s5;
end
s4: begin
R_en = 1;
temp = a_o;
if ((temp > 47) && (temp < 58))
StateNext = s7;
else
StateNext = s6;
end
s5: begin
done = 1;
StateNext = s1;
end
s6: begin
StateNext = s8;
end
s7: begin
W_en = 1;
count = count + 1;
a_i = temp - 48;
StateNext = s8;
end
s8: begin
i = i + 1;
StateNext = s3;
end
default: begin
StateNext = s1;
end
endcase
end
always #(posedge Clk) begin
if (Rst == 1)
State = s1;
else
State = StateNext;
end
endmodule
Test Bench:
`timescale 1ns / 1ps
module partA_tb();
reg Clk, Rst, go;
wire [6:0] count;
wire done;
PartA a1(Clk, Rst, go, done, count);
always begin
Clk = 0;
#200
Clk = 1;
#200;
end
initial begin
Rst = 1;
#200
Rst = 0;
go = 1;
end
endmodule

You need to keep Rst in the testbench high for a longer amount of time. The 1st posedge of Clk happens at time 200ns, and that is when you release the reset. You need to keep the reset asserted until after the 1st posedge of the clock in order to properly reset your RegFile, since it is a synchronous reset.
This testbench change allows RegFile to be reset to known values:
initial begin
Rst = 1;
#400
Rst = 0;
go = 1;
end
The change above removes the XXX from R_data.
I chose an arbitrary delay of 400, but it can be anything greater than 200. The key is that you need at least one posedge of Clk to sample Rst when it is high.

Related

UART Transmit and receive data does not start (Vivado)

I can't figure out why is it that when I set the clock frequency from 50MHz to 100MHz, by changing the clk period to 5 in the testbench, my output transmit and receive data stays at 0. Can anyone enlighten me on this? I need my clock frequency to be 100MHz. Your help will be much appreciated.
Testbench
`timescale 1ns / 1ps
module uart_tx_test();
parameter periodCLK_2 = 5;
parameter perioddump = 10;
parameter delay = 1;
parameter delay_in = 2;
reg CLK_TB = 0 ;
reg RSTN ;
reg [7:0] data = 0;
reg clk = 0;
reg enable = 0;
wire tx_busy;
wire rdy;
wire [7:0] rxdata;
wire loopback;
reg rdy_clr = 0;
uart test_uart(.din(data),
.wr_en(enable),
.clk_50m(clk),
.tx(loopback),
.tx_busy(tx_busy),
.rx(loopback),
.rdy(rdy),
.rdy_clr(rdy_clr),
.dout(rxdata));
initial begin
// $dumpfile("uart.vcd");
$dumpvars(0, uart_tx_test);
enable <= 1'b1;
#2 enable <= 1'b0;
end
always begin
#5 clk = ~clk; //I set period to 5; period was 1 previously.
end
always #(posedge rdy) begin
#2 rdy_clr <= 1;
#2 rdy_clr <= 0;
if (rxdata != data) begin
$display("FAIL: rx data %x does not match tx %x", rxdata, data);
$finish;
end else begin
if (rxdata == 8'hff) begin
$display("SUCCESS: all bytes verified");
$finish;
end
data <= data + 1'b1;
enable <= 1'b1;
#2 enable <= 1'b0;
end
end
endmodule
Design Sources
module uart(
input wire [7:0] din,
input wire wr_en,
input wire clk_50m,
output wire tx,
output wire tx_busy,
input wire rx,
input wire rdy_clr,
output wire rdy,
output wire [7:0] dout
);
wire rxclk_en, txclk_en;
baud_rate_gen uart_baud(
.clk_50m(clk_50m),
.rxclk_en(rxclk_en),
.txclk_en(txclk_en)
);
transmitter uart_tx(
.tx(tx),
.din(din),
.clk_50m(clk_50m),
.clken(txclk_en),
.wr_en(wr_en),
.tx_busy(tx_busy)
);
receiver uart_rx(
.rx(rx),
.data(dout),
.clk_50m(clk_50m),
.clken(rxclk_en),
.rdy(rdy),
.rdy_clr(rdy_clr)
);
endmodule
/*
* Hacky baud rate generator to divide a 50MHz clock into a 9600 baud
* rx/tx pair where the rx clcken oversamples by 16x.
*/
module baud_rate_gen(input wire clk_50m,
output wire rxclk_en,
output wire txclk_en);
parameter RX_ACC_MAX = 100000000 / (9600 * 16);
parameter TX_ACC_MAX = 100000000 / 9600;
parameter RX_ACC_WIDTH = $clog2(RX_ACC_MAX);
parameter TX_ACC_WIDTH = $clog2(TX_ACC_MAX);
reg [RX_ACC_WIDTH - 1:0] rx_acc = 0;
reg [TX_ACC_WIDTH - 1:0] tx_acc = 0;
assign rxclk_en = (rx_acc == 5'd0);
assign txclk_en = (tx_acc == 9'd0);
always #(posedge clk_50m) begin
if (rx_acc == RX_ACC_MAX[RX_ACC_WIDTH - 1:0])
rx_acc <= 0;
else
rx_acc <= rx_acc + 5'b1;
end
always #(posedge clk_50m) begin
if (tx_acc == TX_ACC_MAX[TX_ACC_WIDTH - 1:0])
tx_acc <= 0;
else
tx_acc <= tx_acc + 9'b1;
end
endmodule
module transmitter(
input wire [7:0] din,
input wire wr_en,
input wire clk_50m,
input wire clken,
output reg tx,
output wire tx_busy
);
initial begin
tx = 1'b1;
end
parameter STATE_IDLE = 2'b00;
parameter STATE_START = 2'b01;
parameter STATE_DATA = 2'b10;
parameter STATE_STOP = 2'b11;
reg [7:0] data = 8'h00;
reg [2:0] bitpos = 3'h0;
reg [1:0] state = STATE_IDLE;
always #(posedge clk_50m) begin
case (state)
STATE_IDLE: begin
if (wr_en) begin
state <= STATE_START;
data <= din;
bitpos <= 3'h0;
end
end
STATE_START: begin
if (clken) begin
tx <= 1'b0;
state <= STATE_DATA;
end
end
STATE_DATA: begin
if (clken) begin
if (bitpos == 3'h7)
state <= STATE_STOP;
else
bitpos <= bitpos + 3'h1;
tx <= data[bitpos];
end
end
STATE_STOP: begin
if (clken) begin
tx <= 1'b1;
state <= STATE_IDLE;
end
end
default: begin
tx <= 1'b1;
state <= STATE_IDLE;
end
endcase
end
assign tx_busy = (state != STATE_IDLE);
endmodule
module receiver(
input wire rx,
input wire rdy_clr,
input wire clk_50m,
input wire clken,
output reg rdy,
output reg [7:0] data
);
initial begin
rdy = 0;
data = 8'b0;
end
parameter RX_STATE_START = 2'b00;
parameter RX_STATE_DATA = 2'b01;
parameter RX_STATE_STOP = 2'b10;
reg [1:0] state = RX_STATE_START;
reg [3:0] sample = 0;
reg [3:0] bitpos = 0;
reg [7:0] scratch = 8'b0;
always #(posedge clk_50m) begin
if (rdy_clr)
rdy <= 0;
if (clken) begin
case (state)
RX_STATE_START: begin
/*
* Start counting from the first low sample, once we've
* sampled a full bit, start collecting data bits.
*/
if (!rx || sample != 0)
sample <= sample + 4'b1;
if (sample == 15) begin
state <= RX_STATE_DATA;
bitpos <= 0;
sample <= 0;
scratch <= 0;
end
end
RX_STATE_DATA: begin
sample <= sample + 4'b1;
if (sample == 4'h8) begin
scratch[bitpos[2:0]] <= rx;
bitpos <= bitpos + 4'b1;
end
if (bitpos == 8 && sample == 15)
state <= RX_STATE_STOP;
end
RX_STATE_STOP: begin
/*
* The baud clock may not be running at exactly the
* same rate as the transmitter. If we thing that
* we're at least half way into the stop bit, allow
* transition into handling the next start bit.
*/
if (sample == 15 || (sample >= 8 && !rx)) begin
state <= RX_STATE_START;
data <= scratch;
rdy <= 1'b1;
sample <= 0;
end else begin
sample <= sample + 4'b1;
end
end
default: begin
state <= RX_STATE_START;
end
endcase
end
end
endmodule
You need to scale all your other delays accordingly. Change all your #2 to #10, then you will see the SUCCESS: all bytes verified message.
With your original clock delay of #1, your other input signal pulses (enable and rdy_clr) were wide enough for your uart design module to sample properly. For example, on the 1st posedge of clk, your design properly sampled the enable input as 1, which started the TX state machine.
You increased the clock period by a factor of 5 when you changed the delay from #1 to #5. However, your enable pulse stayed the same width as before, which means that the design sampled enable as 0, not 1. So your TX state machine stayed in the IDLE state. By changing the enable delay from #2 to #10, you are able to properly sample enable as 1.
You can easily prove this to yourself by dumping a VCD file, and viewing the waveforms inside the design.
You could replace the numeric delays with a parameter to make it easier to change to different frequencies.
Note: You stated the clk delay was originally #1. This gives the clk signal a period of 2ns, which is 500MHz, not 50MHz.

Why doesn't the up/down counter count down?

I was doing the Logic Design assignment, and I found some problems I can't solve.
I need to design a 6-bit counter, and this counter needs to count with two functions, for up and down respectively.
I have done the up part and down part, but when I run the simulation, the counting down part doesn't work correctly.
The function for counting down: the next a = a - 2^n, where n = 0, 1, 2, 3... eg. a1 = 63, a2 = 63 - 1 = 62, a3 = 62 - 2 = 60, a4 = 56...
But the simulation with my program, it becomes 63, 62, 61(63 - 2), 59(63 - 4)...
By the way, this assignment has a reset feature.
However, my program won't keep counting after being reset.
It should back to zero and continue counting theoretically.
The following is my code:
`timescale 1ns/100ps
module lab2_1(
input clk,
input rst,
output reg [5:0] out
);
reg [5:0] cnt;
wire [5:0] cnt_next;
reg updown;
wire [5:0] out_next;
initial begin
out = 0;
cnt = 1;
updown = 1;
end
assign cnt_next = (out == 6'b111111) ? 0 : cnt + 1;
assign out_next = out - (2**cnt);
always #(*) begin
if(out == 6'b111111)begin
updown = 0;
end
if(out == 6'b000000)begin
updown = 1;
end
if(rst == 1) begin
out = 0;
updown = 1;
cnt = 0;
end
end
always #(posedge clk, posedge rst) begin
if(updown == 1)begin
if(out > cnt)begin
out <= out - cnt;
end
else
out <= out + cnt;
end
else begin
out <= out_next;
end
cnt <= cnt_next;
end
endmodule
The testbench just monitors the output and drives the inpupts.
`timescale 1ns/100ps
module lab2_1_t;
wire [5:0] out;
reg clk;
reg rst;
lab2_1 v(clk, rst, out);
initial begin
clk = 0;
rst = 0;
$monitor($time,":clk = %b, rst = %b, out = %d", clk, rst, out);
end
always #10 clk = ~clk;
always #10000 rst = ~rst;
endmodule
You should not make assignments to the same signal (such as cnt) from multiple blocks. You can assign to cnt from a single always block. I don't think you need both out and cnt.
Here is a simplified version which automatically switches between up and down:
module lab2_1(
input clk,
input rst,
output reg [5:0] cnt
);
reg updown;
always #(posedge clk, posedge rst) begin
if (rst) begin
updown <= 1;
end else if (cnt == 6'b111110) begin
updown <= 0;
end else if (cnt == 6'b000001) begin
updown <= 1;
end
end
always #(posedge clk, posedge rst) begin
if (rst) begin
cnt <= 0;
end else if (updown) begin
cnt <= cnt + 1;
end else begin
cnt <= cnt - 1;
end
end
endmodule
The code shows a more typical use of the reset signal in the sequential always block. See also for more examples.
Here is a modified testbench where the reset is asserted at time 0, then released after a couple clock cycles. At the end, it asserts reset again so you can see that the counter goes to 0.
module lab2_1_t;
wire [5:0] out;
reg clk;
reg rst;
lab2_1 v(clk, rst, out);
initial begin
$monitor($time,":clk = %b, rst = %b, out = %d", clk, rst, out);
clk = 0;
rst = 1;
#40 rst = 0;
#10000 rst = 1;
#1000 $finish;
end
always #10 clk = ~clk;
endmodule

Can't resolve multiple constant drivers for net Quartus error

this is a very basic question but if somebody can help me with these errors, I'd really appreciate it. I am an EE undergrad, new to Verilog, and I'd appreciate any help/explanation.
The errors I received were:
10028 Can't resolve multiple constant drivers for "led" at compare_block.sv (66)
10029 Constant driver at compare_block.sv(61)
What are the multiple constant drivers here? Originally, I had the led assignment (led <= led_val) within the second always block, and I thought moving it up to the first always block would change the errors, but it did not.
module compare_block (clk, reset_n, result, led);
parameter data_width = 8; /
parameter size = 1000;
input clk, reset_n;
input [data_width:0] result;
logic [(data_width):0] data_from_rom;
logic [9:0] addr_to_rom;
output reg led;
reg [(data_width):0] comp_sig [size-1:0];
reg [data_width:0] comp_sig_temp;
reg [(data_width):0] filt_sig [size-1:0];
reg [(data_width):0] ans_sig [size-1:0];
integer i, iii;
reg [9:0] ii ;
integer sum_array;
wire [(data_width+3):0] array_avg;
reg [data_width:0] summed_arr [size-2 : 0];//container for all summation steps
wire total_sum;
reg ans_sig_done;
reg [data_width : 0] summed_ans;
reg [data_width:0] max_val, error_val;
initial begin
i = 0;
ii = 0;
led=0;
data_from_rom='b000000000;
summed_ans='b000000000;
max_val='b000000000;
for (iii=0;iii<(size-1);iii=iii+1) begin
filt_sig[iii]=0;
ans_sig [iii]=0;
comp_sig[iii]=0;
summed_arr[iii]=0;
end
end
//port map to ROM
rom_compare ref_wave(
.clk(clk),
.addr(addr_to_rom), //text file?
.data(data_from_rom)
);
//Moore FSM
localparam [3:0]
s0=0, s1 = 1, s2 = 2, s3 = 3, s4 = 4, s5 = 5;
reg [3:0] state_reg, state_next;
initial state_next = 0;
always #(posedge clk, negedge reset_n) begin
if (reset_n == 'b0) begin //reset is active low
state_reg <= s0;
end else begin
state_reg <= state_next;
led <= led_val;
end
end
always #(state_reg) begin
state_next = state_reg;
led=0;
case (state_reg)
s0 : begin //initial state, reset state
if (!reset_n) begin
led <= 0;
ii <= 0;
end else begin
state_next <= s1;
end
end
s1 : begin
if (ii>(size)) begin
ii <= 0;
end else begin
addr_to_rom <= ii;
state_next <= s2;
end
end
s2 : begin
comp_sig_temp <= data_from_rom;
filt_sig [ii] <= result;
state_next <= s3;
end
s3 : begin
comp_sig[ii] <= comp_sig_temp;
state_next <= s4;
end
s4 : begin
ans_sig[ii] <= filt_sig[ii] - comp_sig[ii];
state_next <= s5;
end
s5 : begin
if (ii>(size-2)) begin
ans_sig_done = 1;
end else begin
ans_sig_done = 0;
end
ii <= ii+1;
state_next <= s0;
end
endcase
end
reg [(data_width+2):0] sum;
integer j;
always #* begin
sum = 0;
if (ans_sig_done == 1) begin
for (j=4; j<(size-1); j=j+2) begin
sum = sum +ans_sig[j];
if (ans_sig[j] > max_val) begin
max_val = ans_sig[j];
end
end
end
end
assign array_avg = sum >> 'd3; //2^3 = 8
always #(clk, result) begin //posedge clk, result
filt_sig [i] <= result;
i <= i + 1;
end
assign error_val = max_val >> 'd2; //approx 25% of max value of array
reg led_val;
always #(*)
begin
if (array_avg < error_val) begin
led_val <= 'b1;
end else begin
led_val <= 'b0;
end
end
endmodule
I figured it out!
The two instances were me trying to initialize one led = 0 in one always block and then assigning led <= led_val in another. You can't refer to an output in two different always blocks.

Cause of inferred latches (not else or default statement) in Verilog

Quartus is telling me that I have inferred latches for input_val, ii, output_val, delayed, and addr_to_rom. I have looked at previous posts and made changes so that all my if statements have an else component and that my case statement has a default option.
What else could be causing all of these inferred latches?
(updated code below)
module simple_fir(clk, reset_n, output_val);
parameter data_width = 8;
parameter size = 1024;
input wire clk;
input wire reset_n;
//input reg [(data_width):0] input_val;
reg [(data_width):0] input_val;
output reg [data_width:0] output_val;
reg [(data_width):0] delayed;
reg [data_width:0] to_avg;
reg [9:0] ii ;
reg [9:0] i ;
reg [data_width:0] val;
reg [data_width:0] output_arr [(size-1):0];
logic [(data_width):0] data_from_rom; //precalculated 1 Hz sine wave
logic [9:0] addr_to_rom;
initial delayed = 0;
initial i = 0;
initial ii = 0;
//port map to ROM
rom_data input_data(
.clk(clk),
.addr(addr_to_rom), //text file?
.data(data_from_rom)
);
//Moore FSM
localparam [3:0]
s0=0, s1 = 1, s2 = 2, s3 = 3, s4 = 4, s5 = 5, s6 = 6;
reg [3:0] state_reg, state_next;
initial state_next = 0;
always #(posedge clk, negedge reset_n) begin //posedge clk, reset_n
if (reset_n == 'b0) begin //reset is active low
state_reg <= s0;
end else begin
state_reg <= state_next;
end
end
always #* begin
state_next = state_reg; // default state_next
case (state_reg)
s0 : begin //initial state, reset state
if (!reset_n) begin
output_val = 0;
delayed = 0;
to_avg= 0;
i = 0;
ii = 0;
end else begin
state_next = s1;
end
end
s1 : begin
if (ii>(size-2)) begin
ii = 0;
end else begin
addr_to_rom = ii;
end
input_val = input_val;
delayed = delayed;
state_next = s2;
end
s2 : begin
input_val = data_from_rom;
ii = ii+1;
delayed = delayed;
state_next = s3;
end
s3 : begin
delayed = input_val;
state_next = s4;
end
s4 : begin
addr_to_rom = ii;
input_val = input_val;
delayed = delayed;
state_next = s5;
end
s5 : begin
input_val = data_from_rom;
delayed = delayed;
//i=i+1;
//ii <= ii+1;
state_next = s6;
end
s6 : begin
to_avg = input_val + delayed; //summing two values
val = (to_avg >> 1); //taking the average
output_arr[ii-1] = val; //indexing starts on [2]
output_val = val;
state_next = s0;
input_val = input_val;
end
default: begin
addr_to_rom = addr_to_rom;
data_from_rom = data_from_rom;
delayed = delayed;
input_val = input_val;
to_avg = to_avg;
val = val;
output_val = output_val;
ii = ii;
end
endcase
end
endmodule
You have 6 states in your combo always block, but you are assigning to delayed in only 3 of them. This implies that you want to retain state, which infers latches. You should make sure delayed is assigned some value in all states (and in all branches of if/else, if applicable.
Also, you have an incomplete sensitivity list. You likely want to change:
always #(state_reg) begin
to:
always #* begin
#* is the implicit sensitivity list, which triggers the always block whenever a change occurs to any signal on the RHS of an assignment.
you have to look at the ways state machines are programmed. Usually states are calculated using flop and the final assignment is a combinatorial. You have it vice versa. Therefore you have issues. Something like the following should work for you. Sorry, i did not test it. The code needs to use non-blocking assignments except in the place where you caculate an intermediate value.
always #*
state_reg = state_next;
always #(posedge clk, negedge reset_n) begin //posedge clk, reset_n
if (reset_n == 'b0) begin //reset is active low
state_next <= s0;
end
else begin
case (state_reg)
S0: begin
output_val <= 0;
delayed <= 0;
i <= 0;
ii <= 0;
end
s1 : begin
if (ii>(size-2)) begin
ii <= 0;
end else begin
addr_to_rom <= ii;
end
input_val <= input_val;
delayed <= delayed;
state_next <= s2;
end
s2 : begin
input_val <= data_from_rom;
ii <= ii+1;
state_next <= s3;
end
s3 : begin
delayed <= input_val;
state_next <= s4;
end
s4 : begin
addr_to_rom <= ii;
state_next <= s5;
end
s5 : begin
input_val <= data_from_rom;
state_next <= s6;
end
s6 : begin
// calculating intermediate value, need blocking assignments here
to_avg = input_val + delayed; //need blocking here
val = (to_avg >> 1); //need blocking here
output_arr[ii-1] <= val; //indexing starts on [2]
output_val <= val;
state_next <= s0;
end
endcase
end // else
end // always
I noticed you used a coding style that a default assignment state_next = state_reg is placed at the very top of the always block. That makes state_next always have a legal driver if it is not explicitly assigned. This is also required for other combinational signals. Otherwise, latch inferred. Besides, the signal data_from_rom, which I assume is an output from module rom_data, has mutiple drivers.

Generating unsigned number for booth multiplier

For an academic excercise, I have implemented a 32-bit Karatsuba multiplier which takes 17 cycles to run by doing parallel multiplication of 16 bits each and shifting them accordingly.
I am getting an issue where the partial products need to be unsigned, but booth multiplier is generating signed partial product for me, regardless of the input type I give, because of which I get incorrect partial products. How can I solve this?
For eg. my two signed inputs are 0xA000_000A and 0x000A_A000. So the first partial product of A000 * 000A should be 64000 but I get 0xFFFC4000 (FFFF_A000 * 0000_000A). I have shared my code here for the booth mult and its testbench.
module booth_multiplier
(
input logic clk,
input logic rst,
input logic valid,
input logic signed [15:0] Mul_X,
input logic signed [15:0] Mul_Y,
output logic signed [31:0] product,
output logic result_ready
);
logic unsigned Q_1;
bit [4:0] count;
logic signed [15:0] multiplier;
logic signed [15:0] multiplicand;
logic [15:0] A, temp_A;
logic signed [32:0] partial_product;
logic signed [32:0] partial_multiplier;
typedef enum {IDLE=0, OPERATE} fsm;
fsm state, next_state;
parameter ADD = 2'b01, SUB = 2'b10;
//assign product = multiplier[16:1];
always#(posedge clk or negedge rst)
begin
if(~rst)
begin
count <= 0;
state <= IDLE;
multiplier <= 0;
multiplicand <= 0;
end
else begin
count <= count+1;
state <= next_state;
end
end
always#(*)
begin
case(state)
IDLE : begin
Q_1 = 0;
A = 0;
count = 0;
product = 0;
temp_A = 0;
result_ready = 0;
if(valid) begin
multiplicand = Mul_X;
multiplier = Mul_Y;
partial_product = {A, multiplier, Q_1};
partial_multiplier = 0;
next_state = OPERATE;
end
end
OPERATE: begin
case(partial_product[1:0])
ADD: begin
temp_A = A + multiplicand;
multiplier = partial_product[16:1];
partial_multiplier = {temp_A, multiplier, Q_1};
partial_product = partial_multiplier >>> 1;
Q_1 = partial_product[0];
A = partial_product[32:17];
end
SUB: begin
temp_A = A - multiplicand;
multiplier = partial_product[16:1];
partial_multiplier = {temp_A, multiplier, Q_1};
partial_product = partial_multiplier >>> 1;
Q_1 = partial_product[0];
A = partial_product[32:17];
end
default: begin
temp_A = A;
multiplier = partial_product[16:1];
partial_multiplier = {temp_A, multiplier, Q_1};
partial_product = partial_multiplier >>> 1;
Q_1 = multiplier[0];
A = partial_product[32:17];
end
endcase
if(count == 16) begin
next_state = IDLE;
product = partial_product >> 1;
result_ready = 1;
end
else next_state = OPERATE;
end
endcase
end
endmodule
This I am using to do 4 parallel multiplications in
module fast_multiplier
(
input logic clk,
input logic rst,
input valid,
input logic signed [31:0] multiplicand,
input logic signed [31:0] multiplier,
output logic signed [63:0] product,
output logic ready);
logic [15:0] X1;
logic [15:0] Y1;
logic [15:0] Xr;
logic [15:0] Yr;
logic [31:0] X1_Yr;
logic [31:0] Xr_Yr;
logic [31:0] X1_Y1;
logic [31:0] Xr_Y1;
logic ready1, ready2, ready3, ready4;
assign X1 = multiplicand[31:16];
assign Y1 = multiplier[31:16];
assign Xr = multiplicand[15:0];
assign Yr = multiplier[15:0];
booth_multiplier X1Y1
(
.clk(clk),
.rst(rst),
.valid(valid),
.Mul_X(X1),
.Mul_Y(Y1),
.product(X1_Y1),
.result_ready(ready1));
booth_multiplier X1Yr
(
.clk(clk),
.rst(rst),
.valid(valid),
.Mul_X(X1),
.Mul_Y(Yr),
.product(X1_Yr),
.result_ready(ready2));
booth_multiplier XrY1
(
.clk(clk),
.rst(rst),
.valid(valid),
.Mul_X(Xr),
.Mul_Y(Y1),
.product(Xr_Y1),
.result_ready(ready3));
booth_multiplier XrYr
(
.clk(clk),
.rst(rst),
.valid(valid),
.Mul_X(Xr),
.Mul_Y(Yr),
.product(Xr_Yr),
.result_ready(ready4));
always#(posedge clk or negedge rst)
begin
if(~rst)
begin
product <= 0;
ready <= 0;
X1_Yr <= 0;
X1_Y1 <= 0;
Xr_Yr <= 0;
Xr_Y1 <= 0;
end
else begin
product <= ({32'b0,X1_Y1} << 32) + (({32'b0,X1_Yr} + {32'b0,Xr_Y1}) << 16) + {32'b0,Xr_Yr};
ready <= ready1 & ready2 & ready3 & ready4;
end
end
endmodule
Also, sharing the testbench,
module top_booth_multiplier ();
logic clk;
logic rst;
logic valid;
logic signed [31:0] multiplicand;
logic signed [31:0] multiplier;
logic signed [63:0] product;
logic ready;
fast_multiplier booth (.*);
initial
begin
clk = 0;
forever #10 clk = ~clk;
end
initial
begin
rst = 0;
#7 rst = 1;
#(posedge clk) valid <= 1;
multiplier = 32'hA000000A;
multiplicand = 32'h000AA000;
#(posedge clk) valid <= 0;
while(ready == 0)
begin
#(posedge clk);
end
repeat (20) #(posedge clk);
$finish;
end
endmodule
You need to consider "signed" inputs in booth multiplier ONLY for X1Y1 instance. All other instances MUST use "unsigned" inputs. This change should help!

Resources