Verilog : uart on FPGA and simulation behavioural differences - verilog

EDIT: removed some redundancies, moved all assignments to non-blocking, inserted a reset mapped as one of the input buttons of my FPGA... but when I implement the code, it starts transmitting the same wrong character and gets stuck in a single state of my machine.
Post Synthesis and Post-Implementation simulations are identical,$time-wise
module UART (reset_button, sysclk_p, sysclk_n,TxD, Tx_busy, Tx_state_scope_external);
input reset_button, sysclk_p, sysclk_n;
output wire TxD, Tx_busy;
output wire [1:0]Tx_state_scope_external;
//internal communications signals
wire clk_internal;
//buffer unit control signals
wire [7:0]TxD_data_internal;
wire Tx_start_internal;
wire Tx_busy_internal;
wire reset_flag;
reset_buf RESET_BUFF (.reset_internal (reset_flag), .reset (reset_button));
differential_CK CK_GENERATION (.sysclk_p (sysclk_p), .sysclk_n(sysclk_n), .clk(clk_internal));
output_Dbuffer OB1 (.reset (reset_flag), .RTS_n (Tx_busy_internal), .clk(clk_internal), .TX_trigger (Tx_start_internal), .TX_data(TxD_data_internal));
async_transmitter TX1 (.reset (reset_flag), .clk (clk_internal), .TxD_data(TxD_data_internal), .Tx_start (Tx_start_internal), .TxD(TxD), .Tx_busy_flag(Tx_busy_internal), .Tx_state_scope(Tx_state_scope_external));
obuf_TX O_TX1( .Tx_busy(Tx_busy), .Tx_busy_flag(Tx_busy_internal));
endmodule
module reset_buf (
output reset_internal,
input reset
);
// IBUF: Single-ended Input Buffer
// 7 Series
// Xilinx HDL Libraries Guide, version 14.7
IBUF #(
.IBUF_LOW_PWR("TRUE"), // Low power (TRUE) vs. performance (FALSE) setting for referenced I/O standards
.IOSTANDARD("DEFAULT") // Specify the input I/O standard
) IBUF_inst (
.O(reset_internal), // Buffer output
.I(reset) // Buffer input (connect directly to top-level port)
);
// End of IBUF_inst instantiation
endmodule
module differential_CK(
input sysclk_p,
input sysclk_n,
output clk
);
// IBUFGDS: Differential Global Clock Input Buffer
// 7 Series
// Xilinx HDL Libraries Guide, version 14.7
IBUFGDS #(
.DIFF_TERM("FALSE"), // Differential Termination
.IBUF_LOW_PWR("TRUE"), // Low power="TRUE", Highest performance="FALSE"
.IOSTANDARD("DEFAULT") // Specify the input I/O standard
) IBUFGDS_inst (
.O(clk), // Clock buffer output
.I(sysclk_p), // Diff_p clock buffer input (connect directly to top-level port)
.IB(sysclk_n) // Diff_n clock buffer input (connect directly to top-level port)
);
// End of IBUFGDS_inst instantiation
endmodule
module output_Dbuffer (
input reset,
input RTS_n, //TX_BUSY flag of the transmitter is my ready to send flag
input clk, //ck needed for the FSM
output wire TX_trigger, //TX_START flag of the transmitter now comes from THIS unit instead of Receiver
output wire [7:0]TX_data //byte for transmission
);
//internal variables
reg [7:0] mem [0:9]; //memory init, 10 * 8 bit locations
integer m, n, i, j, k ; //M = row [a.k.a. bytes], N = column [a.k.a. single bits]
reg TX_trigger_int;
reg [7:0] TX_data_int, TX_complete;
//reg sum256_ok;
reg [7:0]checksum_buff ;
//buffer FSM required variables
localparam //state enumeration declaration
BUF_IDLE = 3'b000,
BUF_START = 3'b001,
BUF_BYTES = 3'b010,
BUF_BUSY = 3'b011,
BUF_TX_CHECKSUM = 3'b100;
reg [2:0] buf_state; //2 bits for 4 states
//static assignments of OUTPUTS : Transmission Flag and Transmission Data (content)
assign TX_trigger = TX_trigger_int;
assign TX_data = TX_data_int;
//Block for transmitting [here I manage the TX_Data and TX_Trigger functionality]
always #(posedge clk)
begin
if (reset)
begin
buf_state <= BUF_IDLE;
TX_trigger_int <= 0;
TX_data_int <= 8'b00000000;
end
else case (buf_state)
BUF_IDLE:
begin
TX_trigger_int <= 0;
TX_data_int <= 8'b00000000;
m <=0;
n <=0;
i <=0;
j <=0;
mem[9] <= 8'b01010001; //81
mem[8] <= 8'b01000000; //64
mem[7] <= 8'b00110001; //49
mem[6] <= 8'b00100100; //36
mem[5] <= 8'b00011001; //25
mem[4] <= 8'b00010000; //16
mem[3] <= 8'b00001001; //9
mem[2] <= 8'b00000100; //4
mem[1] <= 8'b00000001; //1
mem[0] <= 8'b00000010;//2
checksum_buff <= 8'd31;
//check if the TX is not busy
if (RTS_n == 0) buf_state <= BUF_START;
end
BUF_START:
begin
TX_trigger_int <= 0;
if ((i == 0) || ( (j - i) > 1 )) buf_state <= BUF_BYTES;
else begin
$display ("BUFFER BUSY #time:", $time);
buf_state <= BUF_BUSY;
end
end
BUF_BYTES:
begin
//check if the TX is busy
if (RTS_n==0)
begin
// TX_trigger_int = 1; 21.09 MOVED THE TRIGGER INSIDE THE ELSE N LINE 498
if (j > 9)
begin
TX_trigger_int <= 0;
buf_state <= BUF_TX_CHECKSUM;
end
else begin
TX_data_int <= mem[j];
TX_trigger_int <= 1;
j <= j+1;
//TX_trigger_int =0;
buf_state <= BUF_START;
end
end
else buf_state <= BUF_BYTES;
end
BUF_BUSY:
begin
if (RTS_n == 0)
begin
$display ("BUFFER AVAILABLE AGAIN #time:", $time);
buf_state <= BUF_START;
end
end
BUF_TX_CHECKSUM:
begin
if (RTS_n==0) begin
TX_data_int <= checksum_buff;
// sum256_ok = 0;
TX_trigger_int <= 1;
buf_state <= BUF_IDLE;
end
end
//default: buf_state <= BUF_IDLE;
endcase
end
endmodule
module async_transmitter(
input clk,
input reset,
//differential clock pair
input [7:0] TxD_data,
input Tx_start, // it is ==TX_TRIGGER
output wire TxD, //bit being sent to the USB
output reg Tx_busy_flag,
output wire [1:0]Tx_state_scope
);
localparam //state enumeration declaration
TX_IDLE = 2'b00,
TX_START_BIT = 2'b01,
TX_BITS = 2'b10,
TX_STOP_BIT = 2'b11;
parameter ClkFrequencyTx = 200000000; // 200MHz
parameter BaudTx = 9600;
reg [1:0] Tx_state; //2 bits for 4 states
integer bit_counter; //bit counter variable
reg [7:0]TxD_data_int, TxD_int;
integer i; //vector index for output data
wire TXSTART_Trigger;
StartDetectionUnitTX SDU_TX (.clk(clk), .state (Tx_state), .signal_in (Tx_start), . trigger (TXSTART_Trigger));
wire BitTick;
BaudTickGen #(ClkFrequencyTx, BaudTx) as (.clk(clk), .trigger (TXSTART_Trigger), .tick(BitTick));
//BitTick is 16times the frequency generated during the RX portion
assign TxD = TxD_int;
always #(posedge clk) begin
if (reset)
begin
Tx_state <= TX_IDLE;
TxD_int <= 1;
Tx_busy_flag <=0;
end
else case (Tx_state)
TX_IDLE:
begin //reinitialization and check on the trigger condition
bit_counter <= 0;
TxD_data_int <= 8'b00000000;
i <= 0;
TxD_int <= 1; //idle state
Tx_busy_flag <= 0;
if (TXSTART_Trigger) begin
Tx_state <= TX_START_BIT;
TxD_data_int <= TxD_data;
Tx_busy_flag <= 1;
bit_counter <= 8;
end
end
TX_START_BIT:
begin
if (BitTick)
begin
TxD_int <= 0 ; //start bit is a ZERO logical value
Tx_state <= TX_BITS;
end
end
TX_BITS:
begin
if (BitTick)
begin
bit_counter <= bit_counter -1;
TxD_int <= TxD_data_int[i];
// $display ("ho trasmesso dalla UART un bit di valore %b al tempo: ", TxD, $time);
i <= i+1;
if (bit_counter < 1) Tx_state <= TX_STOP_BIT;
end
end
TX_STOP_BIT:
begin
if (BitTick) begin
TxD_int <= 1; //STOP BIT is a logical '1'
Tx_busy_flag <= 0;
Tx_state <= TX_IDLE;
end
end
// default: Tx_state <= TX_IDLE;
endcase
end
assign Tx_state_scope = Tx_state;
endmodule
module obuf_TX (
output Tx_busy,
input Tx_busy_flag
);
// OBUF: Single-ended Output Buffer
// 7 Series
// Xilinx HDL Libraries Guide, version 14.7
OBUF #(
.DRIVE(12), // Specify the output drive strength
.IOSTANDARD("DEFAULT"), // Specify the output I/O standard
.SLEW("SLOW") // Specify the output slew rate
) OBUF_inst (
.O(Tx_busy), // Buffer output (connect directly to top-level port)
.I(Tx_busy_flag) // Buffer input
);
// End of OBUF_inst instantiation
endmodule
module StartDetectionUnitTX ( //detects a rising edge of the start bit == TRANSMISSION START, during the IDLE state = 0000
input clk, [1:0]state,
input signal_in,
output trigger
);
reg signal_d;
always #(posedge clk)
begin
signal_d <= signal_in;
end
assign trigger = signal_in & (!signal_d) & (!state);
endmodule
module BaudTickGen (
input clk, trigger,
output tick //generates a tick at a specified baud rate *oversampling
);
parameter ClkFrequency = 200000000; //sysclk at 200Mhz
parameter Baud = 9600;
parameter Oversampling = 1;
//20832 almost= ClkFrequency / Baud, to make it an integer number
integer counter = (20833/Oversampling)-1; //-1 so counter can get to 0
reg out;
always #(posedge clk)
begin
if (trigger)
begin
counter <= (20833/Oversampling)-1; //-1 so counter can get to 0
out <= 1;
end
if (counter == 0)
begin
counter <= (20833/Oversampling)-1; //-1 so counter can get to 0
out <= 1;
end
else begin
counter <= counter-1;
out <= 0;
end
end
assign tick = out;
endmodule
My FPGA is a Virtex-7 VC707 and I'm using Vivado for my design flow.
Here I am attaching an image of my looping error.
error image

What have you done? Have you just simulated the code? Are you saying that it fails on the board, but the post-implementation sim is Ok?
A difference between pre- and post-implementation sim could point to a race condition. Get rid of all your blocking assignments, replace with NBAs (why did you use blocking assignments?)
Don't go to Chipscope - it's just a red flag that you don't know what you're doing
The code is a mess - simplify it. The Xilinx-specific stuff is irrelevant - get rid of it if you want anyone to look at it, fix comments (2-bit state?!), fix your statement about getting stuck in '10', etc
Have you run this through Vivado? Seriously? You have multiple drivers on various signals. Get rid of the initial block, use a reset. Initialise the RAM in a way which is understood by the tools. Even if Vivado is capable of initialising stuff using a separate initial block, don't do it
Get rid of statements like 'else Tx_state = TX_IDLE' in the TX_IDLE branch - they're redundant, and just add verbosity
Write something which fails stand-alone, and post it again.

Related

UART Transmit and receive data does not start (Vivado)

I can't figure out why is it that when I set the clock frequency from 50MHz to 100MHz, by changing the clk period to 5 in the testbench, my output transmit and receive data stays at 0. Can anyone enlighten me on this? I need my clock frequency to be 100MHz. Your help will be much appreciated.
Testbench
`timescale 1ns / 1ps
module uart_tx_test();
parameter periodCLK_2 = 5;
parameter perioddump = 10;
parameter delay = 1;
parameter delay_in = 2;
reg CLK_TB = 0 ;
reg RSTN ;
reg [7:0] data = 0;
reg clk = 0;
reg enable = 0;
wire tx_busy;
wire rdy;
wire [7:0] rxdata;
wire loopback;
reg rdy_clr = 0;
uart test_uart(.din(data),
.wr_en(enable),
.clk_50m(clk),
.tx(loopback),
.tx_busy(tx_busy),
.rx(loopback),
.rdy(rdy),
.rdy_clr(rdy_clr),
.dout(rxdata));
initial begin
// $dumpfile("uart.vcd");
$dumpvars(0, uart_tx_test);
enable <= 1'b1;
#2 enable <= 1'b0;
end
always begin
#5 clk = ~clk; //I set period to 5; period was 1 previously.
end
always #(posedge rdy) begin
#2 rdy_clr <= 1;
#2 rdy_clr <= 0;
if (rxdata != data) begin
$display("FAIL: rx data %x does not match tx %x", rxdata, data);
$finish;
end else begin
if (rxdata == 8'hff) begin
$display("SUCCESS: all bytes verified");
$finish;
end
data <= data + 1'b1;
enable <= 1'b1;
#2 enable <= 1'b0;
end
end
endmodule
Design Sources
module uart(
input wire [7:0] din,
input wire wr_en,
input wire clk_50m,
output wire tx,
output wire tx_busy,
input wire rx,
input wire rdy_clr,
output wire rdy,
output wire [7:0] dout
);
wire rxclk_en, txclk_en;
baud_rate_gen uart_baud(
.clk_50m(clk_50m),
.rxclk_en(rxclk_en),
.txclk_en(txclk_en)
);
transmitter uart_tx(
.tx(tx),
.din(din),
.clk_50m(clk_50m),
.clken(txclk_en),
.wr_en(wr_en),
.tx_busy(tx_busy)
);
receiver uart_rx(
.rx(rx),
.data(dout),
.clk_50m(clk_50m),
.clken(rxclk_en),
.rdy(rdy),
.rdy_clr(rdy_clr)
);
endmodule
/*
* Hacky baud rate generator to divide a 50MHz clock into a 9600 baud
* rx/tx pair where the rx clcken oversamples by 16x.
*/
module baud_rate_gen(input wire clk_50m,
output wire rxclk_en,
output wire txclk_en);
parameter RX_ACC_MAX = 100000000 / (9600 * 16);
parameter TX_ACC_MAX = 100000000 / 9600;
parameter RX_ACC_WIDTH = $clog2(RX_ACC_MAX);
parameter TX_ACC_WIDTH = $clog2(TX_ACC_MAX);
reg [RX_ACC_WIDTH - 1:0] rx_acc = 0;
reg [TX_ACC_WIDTH - 1:0] tx_acc = 0;
assign rxclk_en = (rx_acc == 5'd0);
assign txclk_en = (tx_acc == 9'd0);
always #(posedge clk_50m) begin
if (rx_acc == RX_ACC_MAX[RX_ACC_WIDTH - 1:0])
rx_acc <= 0;
else
rx_acc <= rx_acc + 5'b1;
end
always #(posedge clk_50m) begin
if (tx_acc == TX_ACC_MAX[TX_ACC_WIDTH - 1:0])
tx_acc <= 0;
else
tx_acc <= tx_acc + 9'b1;
end
endmodule
module transmitter(
input wire [7:0] din,
input wire wr_en,
input wire clk_50m,
input wire clken,
output reg tx,
output wire tx_busy
);
initial begin
tx = 1'b1;
end
parameter STATE_IDLE = 2'b00;
parameter STATE_START = 2'b01;
parameter STATE_DATA = 2'b10;
parameter STATE_STOP = 2'b11;
reg [7:0] data = 8'h00;
reg [2:0] bitpos = 3'h0;
reg [1:0] state = STATE_IDLE;
always #(posedge clk_50m) begin
case (state)
STATE_IDLE: begin
if (wr_en) begin
state <= STATE_START;
data <= din;
bitpos <= 3'h0;
end
end
STATE_START: begin
if (clken) begin
tx <= 1'b0;
state <= STATE_DATA;
end
end
STATE_DATA: begin
if (clken) begin
if (bitpos == 3'h7)
state <= STATE_STOP;
else
bitpos <= bitpos + 3'h1;
tx <= data[bitpos];
end
end
STATE_STOP: begin
if (clken) begin
tx <= 1'b1;
state <= STATE_IDLE;
end
end
default: begin
tx <= 1'b1;
state <= STATE_IDLE;
end
endcase
end
assign tx_busy = (state != STATE_IDLE);
endmodule
module receiver(
input wire rx,
input wire rdy_clr,
input wire clk_50m,
input wire clken,
output reg rdy,
output reg [7:0] data
);
initial begin
rdy = 0;
data = 8'b0;
end
parameter RX_STATE_START = 2'b00;
parameter RX_STATE_DATA = 2'b01;
parameter RX_STATE_STOP = 2'b10;
reg [1:0] state = RX_STATE_START;
reg [3:0] sample = 0;
reg [3:0] bitpos = 0;
reg [7:0] scratch = 8'b0;
always #(posedge clk_50m) begin
if (rdy_clr)
rdy <= 0;
if (clken) begin
case (state)
RX_STATE_START: begin
/*
* Start counting from the first low sample, once we've
* sampled a full bit, start collecting data bits.
*/
if (!rx || sample != 0)
sample <= sample + 4'b1;
if (sample == 15) begin
state <= RX_STATE_DATA;
bitpos <= 0;
sample <= 0;
scratch <= 0;
end
end
RX_STATE_DATA: begin
sample <= sample + 4'b1;
if (sample == 4'h8) begin
scratch[bitpos[2:0]] <= rx;
bitpos <= bitpos + 4'b1;
end
if (bitpos == 8 && sample == 15)
state <= RX_STATE_STOP;
end
RX_STATE_STOP: begin
/*
* The baud clock may not be running at exactly the
* same rate as the transmitter. If we thing that
* we're at least half way into the stop bit, allow
* transition into handling the next start bit.
*/
if (sample == 15 || (sample >= 8 && !rx)) begin
state <= RX_STATE_START;
data <= scratch;
rdy <= 1'b1;
sample <= 0;
end else begin
sample <= sample + 4'b1;
end
end
default: begin
state <= RX_STATE_START;
end
endcase
end
end
endmodule
You need to scale all your other delays accordingly. Change all your #2 to #10, then you will see the SUCCESS: all bytes verified message.
With your original clock delay of #1, your other input signal pulses (enable and rdy_clr) were wide enough for your uart design module to sample properly. For example, on the 1st posedge of clk, your design properly sampled the enable input as 1, which started the TX state machine.
You increased the clock period by a factor of 5 when you changed the delay from #1 to #5. However, your enable pulse stayed the same width as before, which means that the design sampled enable as 0, not 1. So your TX state machine stayed in the IDLE state. By changing the enable delay from #2 to #10, you are able to properly sample enable as 1.
You can easily prove this to yourself by dumping a VCD file, and viewing the waveforms inside the design.
You could replace the numeric delays with a parameter to make it easier to change to different frequencies.
Note: You stated the clk delay was originally #1. This gives the clk signal a period of 2ns, which is 500MHz, not 50MHz.

how to properly connect receiver, sender and top and make them dependent from each other - RS232

I've got a simple project which requires me to write a code for RS232 receiver and sender, then put them together and, finally, test if it works properly. I've prepared code for both sender and receiver (and also connecting block - top). My problem is that I don't know how to connect them, so they could work with each other properly.
The main issue is that I can't "transfer" data from data_o to data_i because of the fact that one is reg and second - wire. I wouldn't like to use inout for these purposes. I can't figure out any possible modifications to make it work.
Another issue is putting some flags that could kind of follow idea like this: if receiving -> not sending, if sending -> not receiving.
Here's my code:
top.v
`timescale 1ns / 1ps
module top (
clk_i,
rst_i,
RXD_i,
data_i,
TXD_o,
data_o
);
input clk_i;
input rst_i;
input RXD_i;
output TXD_o;
//the problem is here, can't data_i <= data_o because output is reg
input [7:0] data_i;
output [7:0] data_o;
receiver r1(clk_i, RXD_i, data_o);
sender s1(clk_i, data_i, TXD_o);
endmodule
receiver.v
`timescale 1ns / 1ps
module receiver (
clk_i,
RXD_i,
data_o
);
//inputs and outputs
input clk_i;
input RXD_i;
output reg [7:0] data_o;
//counter values
parameter received_bit_period = 5208;
parameter half_received_bit_period = 2604;
//state definitions
parameter ready = 2'b00;
parameter start_bit = 2'b01;
parameter data_bits = 2'b10;
parameter stop_bit = 2'b11;
//operational regs
reg [12:0] counter = 0; //9765.625Hz
reg [7:0] received_data = 8'b00000000;
reg [3:0] data_bit_count = 0;
reg [1:0] state = ready;
//latching part
reg internal_RXD;
always #(posedge clk_i) //latch RXD_i value to internal_RXD
begin
internal_RXD = RXD_i;
end
always #(clk_i) //receiving process
begin
case (state)
ready :
begin
if (internal_RXD == 0)
begin
state <= start_bit;
counter <= counter + 1;
end
else
begin
state <= ready;
counter <= 0;
data_bit_count <= 0;
end
end
start_bit :
begin
if (counter == half_received_bit_period)
begin
if (internal_RXD == 0)
begin
state <= data_bits;
counter <= 0;
end
end
else
begin
state <= start_bit;
counter <= counter + 1;
end
end
data_bits :
begin
if (counter == received_bit_period)
begin
received_data[data_bit_count] <= internal_RXD;
data_bit_count <= data_bit_count + 1;
counter <= 0;
if (data_bit_count == 8)
state <= stop_bit;
end
else
counter <= counter + 1;
end
stop_bit:
begin
counter <= counter + 1;
if (counter == received_bit_period)
begin
state <= ready;
data_o <= received_data;
end
end
endcase
end
endmodule
sender.v
`timescale 1ns / 1ps
module sender (
clk_i,
data_i,
TXD_o
);
//inputs and outputs
input clk_i;
input [7:0] data_i;
output reg TXD_o;
//counter values
parameter received_bit_period = 5208;
parameter half_received_bit_period = 2604;
//state definitions
parameter ready = 1'b0;
parameter data_bits = 1'b1;
//operational regs
reg [12:0] counter = 0; //9765.625Hz
reg [9:0] framed_data = 0;
reg [3:0] data_bit_count = 0;
reg state = ready;
always #(posedge clk_i) //sending process
begin
case (state)
ready :
begin // flag needed?
state <= data_bits;
TXD_o <= 1;
framed_data[0] <= 1'b0;
framed_data[8:1] <= data_i;
framed_data[9] <= 1'b1;
counter <= 0;
end
data_bits :
begin
counter <= counter + 1;
if (data_bit_count == 10)
begin // flag needed?
state <= ready;
data_bit_count <= 0;
TXD_o <= 1;
end
else
begin
if (counter == received_bit_period)
begin
data_bit_count <= data_bit_count + 1;
end
TXD_o <= framed_data[data_bit_count];
end
end
endcase
end
endmodule
You don't!
In all CPU's and FPGA nowadays the read and write data path are separate buses. You will find that also with all CPU cores. Have a look at AXI or AHB bus protocols from ARM.
What is more worrying is the way you have implemented your functions. You would at least need some 'data valid' signal for the transmitter to know when there is valid data to transmit and for the receive when valid data has arrived.
Even that is not enough because for the TX the connecting logic would need to know when the data has been send and the next byte can go out.
You need to make a (preferably standard) CPU interface which talks to your UART. (For beginner I would not use AXI.)
As to your flags: they would come from within the CPU interface.
Last: a UART should be capable of transmitting and receiving at the same time.

Making Vivado Synthesis "A process triggered every clock cycle will not have functionality every clock cycle"

This is code for ALU that does addition and multiplication only. An addition is handled in same clock cycle but the multiplication result has to be delayed by 3 clock cycles.
module my_addmul(
//control signals
input i_clk,
input i_rst,
input i_en,
//add=01, mul=10
input [1:0] i_op,
//input and output registers
input [31:0] i_A,
input [31:0] i_B,
output [31:0] o_D,
//to signal if output is valid
output o_done
);
//registers to save output
reg [31:0] r_D;
reg [63:0] r_mul;//(*keep="true"*)
reg r_mul_done;
reg r_mul_done2;
reg r_done;
//updating outputs
assign o_D = r_D;
assign o_done = r_done;
always # (posedge i_clk)
begin
r_done <= 0;
r_mul_done <= 0;
if (i_rst) begin
r_D <= 0;
r_mul <= 0;
r_mul_done <= 0;
r_mul_done2 <= 0;
end else if (i_clk == 1) begin
if (i_en == 1) begin
//addition - assignment directly to OP registers
if (i_op == 01) begin
r_done <= 1;
r_D <= i_A + i_B;
//multiplication - indirect assignment to OP registers
end else if (i_op == 2'b10) begin
r_mul <= i_A * i_B;
r_mul_done <= 1;
end
end
//1-clock cycle delay
r_mul_done2 <= (r_mul_done == 1) ? 1 : 0;
//updating outputs in the 3rd cycle
if (r_mul_done2 == 1) begin
r_D <= r_mul[31:0];
r_done <= 1;
end
end
end
endmodule
The problem is that if the keep attribute is not used, the r_mul register that stores the multiplication output until 3rd clock cycle is optimised out. I read on the problem and realised that Vivado is thinking like this: "If the multiplication happens every clock cycle, the r_mul is over-written before it is sent to output. Therefore, it is a register being written but not read, Lets remove it!" Since I insert the 3 clock cycle wait in test bench, the simulation result is always accurate. I want to know what is the "Proper" way of doing this so I don't have to use the keep attribute. It is an ok solution but I think useful techniques should be learned so hacks don't have to be used. Any ideas or discussion welcome.
If I want to delay a signal, I'd probably insert flops for that. You can probably flop your mul_output like the way you do for the mul_done signal. Also, it is better to have different always blocks for doing the same. You can check the code below but it might be buggy since I haven't simulated/synthesized it -
module my_addmul(
//control signals
input i_clk,
input i_rst,
input i_en,
//add=01, mul=10
input [1:0] i_op,
//input and output registers
input [31:0] i_A,
input [31:0] i_B,
output [31:0] o_D,
//to signal if output is valid
output o_done
);
//registers to save output
reg [31:0] r_D;
reg [63:0] r_mul;//(*keep="true"*)
reg r_mul_1;
reg r_mul_2;
reg r_mul_done;
reg r_mul_done2;
reg r_done;
//updating outputs
assign o_D = r_D;
assign o_done = r_done;
always # (posedge i_clk)
begin
r_done <= 0;
r_mul_done <= 0;
if (i_rst) begin
r_D <= 0;
r_mul <= 0;
r_mul_done <= 0;
r_mul_done2 <= 0;
end else if (i_clk == 1) begin
if (i_en == 1) begin
//addition - assignment directly to OP registers
if (i_op == 01) begin
r_done <= 1;
r_D <= i_A + i_B;
//multiplication - indirect assignment to OP registers
end else if (i_op == 2'b10) begin
r_mul <= i_A * i_B;
r_mul_done <= 1;
end
end
end
end
always # (posedge i_clk)
begin
if (i_rst)
begin
r_mul_1 <= 0;
r_mul_done2 <= 0;
end
else
begin
r_mul_1 <= r_mul;
r_mul_done2 <= r_mul_done;
end
end
always # (posedge i_clk)
begin
if (i_rst)
begin
r_D <= 0;
r_done <= 0;
end
else
begin
r_D <= r_mul_1;
r_done <= r_mul_done2;
end
end
endmodule

I want to implement a circuit in my DE1-SOC based on the SDRAM, where should I start? (I already finished a part)

I want to make a simple project on which I load 10 numbers in SDRAM of my Altera DE1-SOC ready to be taken as input for a Logic Unit I am creating,
the logic unit only does a simple arithmetic " Y =(X+1)*(X-1), X is the input and Y is the output ".It will pick the values (one by one) from the SDRAM, calculate and spit out the result in another SDRAM arrangement.
Then the SDRAM should store this data, I wish to take this data out of the DE1-SOC to a PC, for example.
Until now I've done this code, (in case is necessary to check):
module mem_prue1 (rst_n, clk, fin);
input clk, rst_n;
output fin;
wire [6:0] data_X;
reg [6:0] sec_A, sec_B, s_sec_A, s_sec_B;
reg [13:0] rslt_Y, s_rslt_Y;
reg save_sec_A, save_sec_B, save_rslt_Y, set_ram;
reg clear, enable, next_num, no_num, fin, w_mem_out;
reg [1:0] state, nextstate;
reg [3:0] indx;
parameter S0 = 0; parameter S1 = 1; parameter S2 = 2; parameter S3 = 3;
RAM_IN RAM_IN_inst1 (
.data_X (data_X),
.indx(indx)
);
RAM_OUT RAM_OUT_inst1 (
.s_rslt_Y (s_rslt_Y),
.w_mem_out (w_mem_out),
.set_ram (set_ram)
);
always # (posedge clk or negedge rst_n)
begin
if (~rst_n)
begin
set_ram <= 1;
indx <= 0;
no_num <=0;
enable <= 1;
s_sec_A <= 0;
s_sec_B <= 0;
s_rslt_Y <= 0;
state <= S0;
end
else if (clear)
begin
enable <= 0;
state <= nextstate;
no_num <= 0;
indx <= 0;
set_ram <= 1;
fin <= 1;
end
else
begin
set_ram <= 0;
state <= nextstate;
if (save_sec_A)
s_sec_A <= sec_A;
if (save_sec_B)
s_sec_B <= sec_B;
if (save_rslt_Y)
s_rslt_Y <= rslt_Y;
if (next_num)
begin
if (indx >= 9)
begin
indx <= 0; /// resetea el indice de la memoria
no_num <= 1; // se informa que no hay numeros
end
else
indx <= indx + 4'b0001;
end
end
end
always # (*)
begin
w_mem_out = 0;
sec_A = 0; sec_B = 0; rslt_Y = 0;
save_sec_A = 0; save_sec_B = 0;
save_rslt_Y = 0; clear = 0;
next_num = 0;
case (state)
S0:
begin
if (~enable)
nextstate = S0;
else
begin
sec_A = data_X + 7'b0000001;
save_sec_A = 1;
nextstate = S1;
end
end
S1: begin
sec_B = data_X - 7'b0000001;
save_sec_B = 1;
nextstate = S2;
end
S2: begin
rslt_Y = s_sec_A * s_sec_B;
save_rslt_Y = 1;
nextstate = S3;
end
S3: begin
w_mem_out = 1;
next_num = 1;
nextstate = S0;
if (no_num == 1)
clear = 1;
end
default:
nextstate = S0;
endcase
end
endmodule
This is the memory I "simulated" as a RAM for input data :
module RAM_IN (data_X, indx);
input [0:3] indx;
output [6:0] data_X;
reg [6:0] data_X;
reg [6:0] in_ram [0:9];
always # (indx)
data_X = in_ram [indx];
initial
begin
$readmemb("C:/altera/15.0/PROYECTOS/mem_prue/in_ram.txt", in_ram);
end
endmodule
and this for output data:
module RAM_OUT (s_rslt_Y, w_mem_out, set_ram);
input [13:0]s_rslt_Y;
input set_ram, w_mem_out;
reg [3:0] addr_out; // tamano de 57600 datos
reg [13:0] mem_out [0:9];
always # (w_mem_out or set_ram)
begin
if (set_ram)
addr_out = 0;
else if (w_mem_out == 1)
begin
mem_out [addr_out] = s_rslt_Y;
addr_out = addr_out + 4'b0001;
end
else
addr_out = addr_out;
end
endmodule
and The test bench:
module mem_prue1_tb ();
wire fin;
reg clk, rst_n;
mem_prue1 mem_prue1_inst1 (
.clk(clk),
.rst_n (rst_n),
.fin (fin)
);
initial
begin
rst_n <= 1;
#1 rst_n <= 0;
#2 rst_n <= 1;
clk <= 1;
end
always
begin
#5 clk = ~clk;
end
//---------------------------
integer out,i;
initial begin
out=$fopen("C:/altera/15.0/PROYECTOS/mem_prue/mem_out.txt");
end
always#(posedge clk) begin
if(fin==1)
for(i=0;i<=9;i=i+1) begin
$fdisplay(out,"%b",mem_prue1_inst1.RAM_OUT_inst1.mem_out[i]);
if(i==9)begin
$stop;
end
end
end
endmodule
So, basically now I want to substitute that "simulated" RAM for real SDRAM, I don't know what is the most practical way to do it.
Should I use QSYS, NIOS-II, or only by learning the Megawizard IP library and generating a variation of the UniPHY. I'm just learning to use the FPGA, so I'm kinda confused at this part. I want to download the proper manuals and tutorials for learn this in detail but I wish you guys could orient me.
PD: My target would be to "isolate" my logic unit from the "simulated ram" because I'm guessing if I program just like I did, it will consume logic resources and my main goal is to calculate the Area, Energy and Speed consumption of my logic ONLY, without the memory burden.
Thanks.
Your keywords, (QSYS, megawizard, uniphy) indicate Altera. If you are just going to simulate the SDRAM, you should be okay. Sometimes, bringing up that interface in a real chip gets hairy the first time.
If you are just doing simulation, I would use QSYS to generate the SDRAM controller module. If you can do DDR3, that there is the ability to generate an Example Design. If you do that, you will be able to see how the interface to the DDR3 works. In fact it should be already to go.
As an FYI, there will be more latency on the read though, so you need to be able to either wait for the response, or you need to have a pipeline architecture, where you can have multiple reads in flight simultaneously.
The "FPGAs Now What?" tutorial offers some advice (for a Xilinx platform, which apparently doesn't match your particular case) on SDRAM simulation. Basically, it boils down to finding an SDRAM vendor with an available Verilog/VHDL model, and plugging it in to a simulation testbench. (Note that these models aren't going to be synthesizeable.)
http://www.xess.com/static/media/appnotes/FpgasNowWhatBook.pdf
Altera has a tutorial for connecting the SDRAM to a Nios II system (using Qsys) on the DE1-SoC board.
ftp://ftp.altera.com/up/pub/Altera_Material/16.0/Tutorials/Verilog/DE1-SoC/Using_the_SDRAM.pdf
If you're implementing your own controller (or using a HW only IP Core), the tutorial also has the timing information for the SDRAM as well.

Independent Nexys 4 clocks desynchronizing over time

We are developing a program which needs synchronized clocks on two devices to measure the fly time of ultrasound signals.
The problem is that when we synthesize the program and test it on two independent Nexys4 FPGAs the distance tends to decrease over time (0.13 cm/s). This ratio is constant, and always decreasing, leading us to think that the problem is in the code.
And when we synthesize the program in only one Nexys 4, no decrease is seen over time.
We have a module to listen for signals (3 signals per slave, and 3 slaves) called dataListener: the signal SendCommand is the control signal for an UART module which sends the direction and command to an SRF02 ultrasound, this module is exactly the same on both devices.
module dataListener(
input mclk,
input clkSync,
input reset,
input rxDataRdy,
output wire[7:0] command,
output reg[7:0] direction,
output reg read,
output reg sendCommand,
output reg dataChanged,
output reg [1:0] slave,
output reg [1:0] sensor
);
parameter dir0 = 8'd0;
parameter dir1 = 8'd3;
parameter dir2 = 8'd6;
parameter rangingCommand = 8'd87;
parameter readCommand = 8'd94;
//parameter clkTime = 0.000000001; // 1ns Simulation // 10ns FPGA
//parameter windowTime = 0.08; // 80 ms
//parameter listenTime = 0.07; // 70ms
parameter windowCyclesDuration = 8000000;
parameter listenCyclesDuration = 7000000;
reg [54:0] windowCounter;
reg emitSent;
reg readSent;
reg slave1;
reg slave2;
reg slave3;
assign command = emitSent ? readCommand : rangingCommand;
///////////////////////////////////////////////////////////////////
always #(posedge mclk) begin
if( reset )begin
sensor <= 2'b0;
windowCounter <= 55'b0;
emitSent <= 0;
readSent <= 0;
slave <=0;
end else begin
if( clkSync ) begin
if( windowCounter >= windowCyclesDuration )begin //Window ended
windowCounter <= 55'b0; //resetCounter
emitSent <= 0;
readSent <= 0;
if( sensor == 2'd2 )begin
sensor <= 2'b0;
if(slave == 2'd2)
slave <= 2'b0;
else
slave <= slave+1'b1;
end else begin
sensor <= sensor + 1'b1;
end
end else begin
windowCounter <= windowCounter + 1'b1; //Window in process
if(!emitSent)begin
sendCommand <= 1;
end
else if( (windowCounter >= listenCyclesDuration) && !readSent)begin //listen done, time to send the read command
sendCommand <= 1;
end
end
if(sendCommand)begin
sendCommand <= 0; //Shut down "sendCommand" signal.
if(!emitSent)
emitSent <= 1;
else
readSent <= 1;
end
end
/// Process incoming data
if( rxDataRdy )begin
read <= 1;
end else if( read )begin
read <= 0;
end
end
end
//////////////////////////////////////////////////////////////////
always #( sensor ) begin
case(sensor)
2'd0: begin
direction <= dir0;
end
2'd1: begin
direction <= dir1;
end
2'd2: begin
direction <= dir2;
end
default: begin
direction <= dir0;
end
endcase
end
endmodule
A module on the slave device which sends the commands:
module slave(
input mclk,
input clkSync,
input reset,
output [7:0] command,
output [7:0] direction,
output reg sendCommand,
output inWindow
);
parameter numSlave = 2'b0; //Between 0-2
parameter dir=8'd0; //Depends on the slaves direction
parameter comm=8'd92;
assign command = comm;
assign direction = dir;
parameter windowCyclesDuration = 8000000;
reg [54:0] windowCounter;
reg [1:0] sensor, slave;
reg commandSent;
assign inWindow = (slave == numSlave);
always #(posedge mclk) begin
if( reset )begin
windowCounter <= 55'b0;
sendCommand <=0;
commandSent <= 1;
slave <= 2'b0;
sensor <= 2'b0;
end else begin
if( clkSync ) begin
if( windowCounter >= windowCyclesDuration )begin //Window ended
windowCounter <= 55'b0; //resetCounter
commandSent <= 0;
if( sensor == 2'd2 )begin
sensor <= 2'b0;
if(slave == 2'd2)
slave <= 2'b0;
else
slave <= slave + 1'b1;
end else begin
sensor <= sensor + 1'b1;
end
end else begin
windowCounter <= windowCounter + 1'b1; //Window in process
if( inWindow && !commandSent)begin //im in my window and command not sent yet
sendCommand <= 1;//send when a new window is about to begin
commandSent <= 1;
end
end
if(sendCommand)begin
sendCommand <= 0; //Shut down "sendCommand" signal.
end
end
end
end
endmodule
The signal clkSync is only activated when both devices are 'Sync', which happend only at the start of functioning through a cable which is then remove to allow movement.
Here is the sync module of the master:
module SyncM(
input mclk,
input reset,
input response1,
input response2,
input response3,
output reg call1,
output reg call2,
output reg call3,
output reg clkSync,
output reg slave1,
output reg slave2,
output reg slave3
);
always # (posedge mclk) begin
if(reset)begin
clkSync <= 0;
slave1 <= 0;
slave2 <= 0;
slave3 <= 0;
call1 <= 0;
call2 <= 0;
call3 <= 0;
end else begin
if( btn && !call1 )begin
call1 <= 1;
call2 <= 1;
call3 <= 1;
clkSync <= 1;
end
if(response1)
slave1 <= 1;
if(response2)
slave2 <= 1;
if(response3)
slave3 <= 1;
end
end
endmodule
And the slave sync module, the call signal is send from the master to slave by a cable.
`timescale 1ns / 1ps
module SyncS(
input reset,
input call,
output reg clkSync,
output reg response
);
always # (reset or call) begin
if(reset) begin
clkSync <= 0;
response <= 0;
end else begin
if (call) begin
response <= 1;
clkSync <= 1;
end
end
end
endmodule
I haven't understand all of your code. But, the problems seems to be, that you rely on the external oscillator on your FPGA board. If you use two boards, both oscillators will not run on exactly the same frequency. So, if you compensate for the phase shift right only once after startup, the clocks will desynchronize after some time. That's why it worked with just one board.
There are two possible solutions:
Use just one clock source (oscillator) and forward the clock to other board.
Periodically compensate for phase shifts.
Both solutions require a more or less steady connection between both boards.

Resources