I would like to get a list of signal names in a given design hierarchy from a Verilog design using vpi. It is a simple net name browser interface from my custom tool that is written in C and Python.
How can I get a list of signal names from a Verilog design and which VPI calls I should use to walk through the design?
Any info would be greatly appreciated.
In addition to the answer already given this code walk through your hierarchy and store design object of type vpiLogic you can adapt it to your needs.
It stores the full names of the register in an unordered_map which has nice O(1) access time during simulation.
This code was developed for projects using both verilog and VHDL.
You'll also find that sometimes some IP's are protected which is handled gracefully, in addition the usage of scopes (vpiInternalScope) instead of vpiModule allows recursion inside generate statements.
It is c++ code but usage of extern "C" makes it callable from your EDA tools (tested using IUS).
#include "vhpi_user.h"
#include "vpi_user.h"
#include "vpi_user_cds.h"
#include "sv_vpi_user.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <unordered_map>
extern "C" {
static std::unordered_map<int , std::string> reg_map;
#define check_verilog(scopeH) (vpi_get(vpiLanguage,scopeH) == vpiVerilog)
#define check_is_protected(scopeH) (vpi_get(vpiIsProtected,scopeH))
#define check_protected(scopeH) (vpi_get(vpiProtected,scopeH))
#define check_vhdl(scopeH) (vpi_get(vpiLanguage,scopeH) == vpiVHDL)
bool is_vpi_protected(vpiHandle scopeH) {
switch(vpi_get(vpiType, scopeH)) {
case vpiClockingBlock:
case vpiNamedBegin:
case vpiTask:
return check_is_protected(scopeH);
default: {
return check_protected(scopeH);
}
}
}
bool is_valid_scope(vpiHandle scopeH) {
switch (vpi_get(vpiType, scopeH)) {
case vpiInstance:
case vpiModule:
case vpiGenScope:
case vpiGenScopeArray:
case vpiInstanceArray:
return true;
default:
return false;
}
}
void vpi_get_reg(vpiHandle module) {
vpiHandle itr_reg, reg;
if ((itr_reg = vpi_iterate(vpiReg,module))) {
while ((reg = vpi_scan(itr_reg))) {
std::string reg_name(vpi_get_str(vpiFullLSName, reg));
vpi_printf("** Verilog register Full Name:\t%s[%d]\n",reg_name.c_str(),vpi_get(vpiSize, reg));
reg_map[(int)reg_map.size()+1] = reg_name;
}
}
}
void vhpi_get_reg(vpiHandle module) {
vhpiHandleT itr_reg, reg;
if (vhpi_get(vhpiKindP,module) == vhpiCompInstStmtK) {
if ((itr_reg = vhpi_iterator(vhpiSigDecls,module))) {
while (reg = vhpi_scan(itr_reg)) {
std::string reg_name(vhpi_get_str(vhpiFullLSNameP, reg));
vhpi_printf("** VHDL register Full LS Name:\t%s[%d]\n",reg_name.c_str(),vhpi_get(vhpiSizeP, reg));
reg_map[(int)reg_map.size()+1] = reg_name;
}
}
}
}
void walk_down(vpiHandle parentScope) {
vpiHandle subScopeI, subScopeH;
if (check_verilog(parentScope) && is_valid_scope(parentScope)) {
vpi_get_reg(parentScope);
if ((subScopeI = vpi_iterate(vpiInternalScope, parentScope))) {
while ((subScopeH = vpi_scan(subScopeI))) {
if (is_vpi_protected(subScopeH)) {
if (vpi_get(vpiType, parentScope)!= vpiGenScope)
vpi_printf("** Verilog scope %s in %s is protected \n",vpi_get_str(vpiFullLSName, subScopeH),vpi_get_str(vpiDefFile,parentScope));
else
vpi_printf("** Verilog scope %s in %s is protected \n",vpi_get_str(vpiFullLSName, subScopeH),vpi_get_str(vpiFile,subScopeH));
}
else {
walk_down(subScopeH);
}
}
}
}
else if(check_vhdl(parentScope)) {
vhpi_get_reg(parentScope);
subScopeI = vhpi_iterator(vhpiInternalRegions, parentScope);
if (subScopeI) {
while ((subScopeH = vhpi_scan(subScopeI)))
walk_down(subScopeH);
}
}
}
void navigate_mixed(const char * scope) {
reg_map.clear();
vpiHandle topScopeI, topScopeH;
vpi_printf(".........Starting register discovery \n");
if ((topScopeH = vpi_handle_by_name((PLI_BYTE8 *)scope, NULL))) {
topScopeI = vpi_iterate(vpiModule, topScopeH);
while ((topScopeH = vpi_scan(topScopeI)))
walk_down(topScopeH);
}
if ((topScopeH = vhpi_handle_by_name((PLI_BYTE8 *)scope, NULL)))
walk_down(topScopeH);
vpi_printf("Completed register discovery........\n");
}
}
Related
I have the following code
#include <cuda.h>
#include <cuda_runtime.h>
#ifdef _MSC_VER
#include <intrin.h>
#else
#include <x86intrin.h>
#endif
//A bitset for the variable assignments
//The state for non existing variable 0 is stored as well, just to avoid +1/-1 adjustments
struct Atom_t {
enum where { device, host};
enum BoolOp {opXor, opOr, opAnd };
public: //TODO make private later
int VarCount;
bool isValid;
union {
uint32_t raw[1]; //don't worry about alignment, the compiler will not use aligned read/writes anyway.}
uint64_t raw64[1];
__m256i avxraw[1];
};
public:
__host__ __device__ friend bool operator==(const Atom_t& a, const Atom_t& b);
};
__host__ __device__ bool operator==(const Atom_t& a, const Atom_t& b) {
const auto IntCount = a.IntCount();
if (IntCount != b.IntCount()) { return false; }
#ifdef __CUDA_ARCH__
__shared__ bool isDifferent;
isDifferent = false;
for (auto i = ThreadId(); i < IntCount; i += BlockDim()) {
if (a.raw[i] != b.raw[i] || isDifferent) {
isDifferent = true;
break;
}
}
syncthreads();
return !isDifferent;
#else
auto result = true;
#ifdef _DEBUG
for (auto i = 0; i < IntCount; i++) {
if (a.raw[i] != b.raw[i]) { result = false; }
}
#endif
auto AvxCount = a.Avx2Count();
if (AvxCount != b.Avx2Count()) { if (result) { print("Atom_t == is incorrect"); } assert1(!result); return false; }
for (auto i = 0; i < AvxCount; i++) {
const auto packedCompare = _mm256_cmpeq_epi8(a.avxraw[i], b.avxraw[i]);
const auto bitmask = _mm256_movemask_epi8(packedCompare);
if (bitmask != -1) { if (result) { print("Atom_t == is incorrect"); } assert1(!result); return false; }
}
#endif
#ifndef __CUDA_ARCH__
assert(result);
#endif
return true;
}
The compiler complains
Description Resource Path Location Type
"__nv_bool (const Atom_t &, const Atom_t &)" contains a vector, which is not supported in device code
However, the vector is not in device code, only in the host code. How do I make this error go away in NSight Eclipse Edition 9.1 running CUDA 11.
I tried:
#ifdef __CUDA_ARCH__
# define DEAL_II_COMPILER_VECTORIZATION_LEVEL 0
#endif
But that does not work.
However, the vector is not in device code, only in the host code.
The error is coming about due to this line:
__m256i avxraw[1];
which is visible in both the host code and device code compilation trajectory.
According to my testing this may be a possible workaround:
$ cat t32.cpp
#ifdef _MSC_VER
#include <intrin.h>
#else
#include <x86intrin.h>
#endif
#include <iostream>
typedef char dummy[sizeof(__m256i)];
struct Atom_t {
enum where { device, host};
enum BoolOp {opXor, opOr, opAnd };
public: //TODO make private later
int VarCount;
bool isValid;
union {
uint32_t raw[1];
uint64_t raw64[1];
#ifndef FOO //hide the vectorized datastruct from cuda's view
__m256i avxraw[1];
#else
alignas(32) dummy foo[1];
#endif
};
};
int main(){
std::cout << sizeof(__m256i) << std::endl;
std::cout << sizeof(Atom_t) << std::endl;
}
$ g++ t32.cpp -o t32
$ ./t32
32
64
$ g++ t32.cpp -o t32 -DFOO
$ ./t32
32
64
(Fedora Core 29)
The alignas(32) directive is still probably somewhat fragile if the definition of __m256i changes dramatically. And, clearly, the above is not CUDA code in the exact frame that was presented. It would need to be adapted (e.g. replace #ifndef FOO with #ifndef __CUDA_ARCH__)
I'm not suggesting that this code is correct, defect-free, or suitable for any particular purpose; it is mostly code provided by OP. My objective here is to identify issues that I see and are asked about in the question, and suggest possible ways to address those issues. Use this at your own risk.
Found it!
The problem is not the code in the method, the problem is the presence of the _m256i within view of cuda.
The following patch fixes the issue:
struct Atom_t {
enum where { device, host};
enum BoolOp {opXor, opOr, opAnd };
public: //TODO make private later
int VarCount;
bool isValid;
union {
uint32_t raw[1]; //don't worry about alignment, the compiler will not use aligned read/writes anyway.}
uint64_t raw64[1];
#ifndef __CUDA_ARCH__ //hide the vectorized datastruct from cuda's view
__m256i avxraw[1];
#endif
};
Now that nvcc does not see the vectorized datatype it will stop worrying.
I am new to C++ and multithreading applications. I want to process a long list of data (potentially several thousands of entries) by dividing its entries among a few threads. I have retrieved a ThreadPool class and a Queue class from the web (it is my first time tackling the subject). I construct the threads and populate the queue in the following way (definitions at the end of the post):
ThreadPool *pool = new ThreadPool(8);
std::vector<std::function<void(int)>> *caller =
new std::vector<std::function<void(int)>>;
for (size_t i = 0; i < Nentries; ++i)
{
caller->push_back(
[=](int j){func(entries[i], j);});
pool->PushTask((*caller)[i]);
}
delete pool;
The problem is that only a number of entries equaling the number of created threads are processed, as if the program does not wait for the queue to be empty. Indeed, if I put
while (pool->GetWorkQueueLength()) {}
just before the pool destructor, the whole list is correctly processed. However, I am afraid I am consuming too many resources by using a while loop. Moreover, I have not found anyone doing anything like it, so I think this is the wrong approach and the classes I use have some error. Can anyone find the error (if present) or suggest another solution?
Here are the classes I use. I suppose the problem is in the implementation of the destructor, but I am not sure.
SynchronizeQueue.hh
#ifndef SYNCQUEUE_H
#define SYNCQUEUE_H
#include <list>
#include <mutex>
#include <condition_variable>
template<typename T>
class SynchronizedQueue
{
public:
SynchronizedQueue();
void Put(T const & data);
T Get();
size_t Size();
private:
SynchronizedQueue(SynchronizedQueue const &)=delete;
SynchronizedQueue & operator=(SynchronizedQueue const &)=delete;
std::list<T> queue;
std::mutex mut;
std::condition_variable condvar;
};
template<typename T>
SynchronizedQueue<T>::SynchronizedQueue()
{}
template<typename T>
void SynchronizedQueue<T>::Put(T const & data)
{
std::unique_lock<std::mutex> lck(mut);
queue.push_back(data);
condvar.notify_one();
}
template<typename T>
T SynchronizedQueue<T>::Get()
{
std::unique_lock<std::mutex> lck(mut);
while (queue.empty())
{
condvar.wait(lck);
}
T result = queue.front();
queue.pop_front();
return result;
}
template<typename T>
size_t SynchronizedQueue<T>::Size()
{
std::unique_lock<std::mutex> lck(mut);
return queue.size();
}
#endif
ThreadPool.hh
#ifndef THREADPOOL_H
#define THREADPOOL_H
#include "SynchronizedQueue.hh"
#include <atomic>
#include <functional>
#include <mutex>
#include <thread>
#include <vector>
class ThreadPool
{
public:
ThreadPool(int nThreads = 0);
virtual ~ThreadPool();
void PushTask(std::function<void(int)> func);
size_t GetWorkQueueLength();
private:
void WorkerThread(int i);
std::atomic<bool> done;
unsigned int threadCount;
SynchronizedQueue<std::function<void(int)>> workQueue;
std::vector<std::thread> threads;
};
#endif
ThreadPool.cc
#include "ThreadPool.hh"
#include "SynchronizedQueue.hh"
void doNothing(int i)
{}
ThreadPool::ThreadPool(int nThreads)
: done(false)
{
if (nThreads <= 0)
{
threadCount = std::thread::hardware_concurrency();
}
else
{
threadCount = nThreads;
}
for (unsigned int i = 0; i < threadCount; ++i)
{
threads.push_back(std::thread(&ThreadPool::WorkerThread, this, i));
}
}
ThreadPool::~ThreadPool()
{
done = true;
for (unsigned int i = 0; i < threadCount; ++i)
{
PushTask(&doNothing);
}
for (auto& th : threads)
{
if (th.joinable())
{
th.join();
}
}
}
void ThreadPool::PushTask(std::function<void(int)> func)
{
workQueue.Put(func);
}
void ThreadPool::WorkerThread(int i)
{
while (!done)
{
workQueue.Get()(i);
}
}
size_t ThreadPool::GetWorkQueueLength()
{
return workQueue.Size();
}
You can push tasks saying "done" instead of setting "done" via atomic variable.
So that each thread will exit by itself when seeing "done" task, and no earlier. In destructor you only need to push these tasks and join threads. This is called "poison pill".
Alternatively, if you insist on your current design with done variable, you can wait on the same condition you already have:
std::unique_lock<std::mutex> lck(mut);
while (!queue.empty())
{
condvar.wait(lck);
}
But then you'll need to change your notify_one to notify_all, and this may be sub-optimal.
I want to process a long list of data (potentially several thousands of entries) by dividing its entries among a few threads.
You can do that with parallel algorithms, like tbb::parallel_for:
#include <tbb/parallel_for.h>
#include <vector>
void func(int entry);
int main () {
std::vector<int> entries(1000000);
tbb::parallel_for(size_t{0}, entries.size(), [&](size_t i) { func(entries[i]); });
}
If you need sequential thread ids, you can do:
void func(int element, int thread_id);
template<class C>
inline auto make_range(C& c) -> decltype(tbb::blocked_range<decltype(c.begin())>(c.begin(), c.end())) {
return tbb::blocked_range<decltype(c.begin())>(c.begin(), c.end());
}
int main () {
std::vector<int> entries(1000000);
std::atomic<int> thread_counter{0};
tbb::parallel_for(make_range(entries), [&](auto sub_range) {
static thread_local int const thread_id = thread_counter.fetch_add(1, std::memory_order_relaxed);
for(auto& element : sub_range)
func(element, thread_id);
});
}
Alternatively, there is std::this_thread::get_id.
I am writing a base class to manage threads. The idea is to allow the thread function to be overridden in child class while the base class manages thread life cycle. I ran into a strange behavior which I don't understand - it seems that the virtual function mechanism does not work when the call is made from a thread. To illustrate my problem, I reduced my code to the following:
#include <iostream>
#include <thread>
using namespace std;
struct B
{
thread t;
void thread_func_non_virt()
{
thread_func();
}
virtual void thread_func()
{
cout << "B::thread_func\n";
}
B(): t(thread(&B::thread_func_non_virt, this)) { }
void join() { t.join(); }
};
struct C : B
{
virtual void thread_func() override
{
cout << "C::thread_func\n";
}
};
int main()
{
C c; // output is "B::thread_func" but "C::thread_func" is expected
c.join();
c.thread_func_non_virt(); // output "C::thread_func" as expected
}
I tried with both Visual studio 2017 and g++ 5.4 (Ubuntu 16) and found the behavior is consistent. Can someone point out where I got wrong?
== UPDATE ==
Based on Igor's answer, I moved the thread creation out of the constructor into a separate method and calling that method after the constructor and got the desired behavior.
Your program exhibits undefined behavior. There's a race on *this between thread_func and C's (implicitly defined) constructor.
#include <iostream>
#include <thread>
using namespace std;
struct B
{
thread t;
void thread_func_non_virt()
{
thread_func();
}
virtual void thread_func()
{
cout << "B::thread_func\n";
}
B(B*ptr): t(thread(&B::thread_func_non_virt, ptr))
{
}
void join() { t.join(); }
};
struct C:public B
{
C():B(this){}
virtual void thread_func() override
{
cout << "C::thread_func\n";
}
};
int main()
{
C c; // "C::thread_func" is expected as expected
c.join();
c.thread_func_non_virt(); // output "C::thread_func" as expected
}
In a single thread, I have this beautiful class that redirects all cout output to a QTextEdit
#include <iostream>
#include <streambuf>
#include <string>
#include <QScrollBar>
#include "QTextEdit"
#include "QDateTime"
class ThreadLogStream : public std::basic_streambuf<char>, QObject
{
Q_OBJECT
public:
ThreadLogStream(std::ostream &stream) : m_stream(stream)
{
m_old_buf = stream.rdbuf();
stream.rdbuf(this);
}
~ThreadLogStream()
{
// output anything that is left
if (!m_string.empty())
{
log_window->append(m_string.c_str());
}
m_stream.rdbuf(m_old_buf);
}
protected:
virtual int_type overflow(int_type v)
{
if (v == '\n')
{
log_window->append(m_string.c_str());
m_string.erase(m_string.begin(), m_string.end());
}
else
m_string += v;
return v;
}
virtual std::streamsize xsputn(const char *p, std::streamsize n)
{
m_string.append(p, p + n);
long pos = 0;
while (pos != static_cast<long>(std::string::npos))
{
pos = m_string.find('\n');
if (pos != static_cast<long>(std::string::npos))
{
std::string tmp(m_string.begin(), m_string.begin() + pos);
log_window->append(tmp.c_str());
m_string.erase(m_string.begin(), m_string.begin() + pos + 1);
}
}
return n;
}
private:
std::ostream &m_stream;
std::streambuf *m_old_buf;
std::string m_string;
QTextEdit* log_window;
};
However, this doesn't work if ANY thread (QThread) is initiated with a cout. This is because all pointers are messed up, and one has to use signals and slots for allowing transfer of data between the sub-thread and the main thread.
I would like to modify this class to emit a signal rather than write to a text file. This requires that this class becomes a Q_OBJECT and be inherited from one. I tried to inherit from QObject in addition to std::basic_streambuf<char> and added Q_OBJECT macro in the body but it didn't compile.
Could you please help me to achieve this? What should I do to get this class to emit signals that I can connect to and that are thread safe?
For those who need the full "working" answer, here it's. I just copied it because #GraemeRock asked for it.
#ifndef ThreadLogStream_H
#define ThreadLogStream_H
#include <iostream>
#include <streambuf>
#include <string>
#include <QScrollBar>
#include "QTextEdit"
#include "QDateTime"
class ThreadLogStream : public QObject, public std::basic_streambuf<char>
{
Q_OBJECT
public:
ThreadLogStream(std::ostream &stream) : m_stream(stream)
{
m_old_buf = stream.rdbuf();
stream.rdbuf(this);
}
~ThreadLogStream()
{
// output anything that is left
if (!m_string.empty())
{
emit sendLogString(QString::fromStdString(m_string));
}
m_stream.rdbuf(m_old_buf);
}
protected:
virtual int_type overflow(int_type v)
{
if (v == '\n')
{
emit sendLogString(QString::fromStdString(m_string));
m_string.erase(m_string.begin(), m_string.end());
}
else
m_string += v;
return v;
}
virtual std::streamsize xsputn(const char *p, std::streamsize n)
{
m_string.append(p, p + n);
long pos = 0;
while (pos != static_cast<long>(std::string::npos))
{
pos = static_cast<long>(m_string.find('\n'));
if (pos != static_cast<long>(std::string::npos))
{
std::string tmp(m_string.begin(), m_string.begin() + pos);
emit sendLogString(QString::fromStdString(tmp));
m_string.erase(m_string.begin(), m_string.begin() + pos + 1);
}
}
return n;
}
private:
std::ostream &m_stream;
std::streambuf *m_old_buf;
std::string m_string;
signals:
void sendLogString(const QString& str);
};
#endif // ThreadLogStream_H
The derivation needs to happen QObject-first:
class LogStream : public QObject, std::basic_streambuf<char> {
Q_OBJECT
...
};
...
If the goal was to minimally modify your code, there's a simpler way. You don't need to inherit QObject to emit signals iff you know exactly what slots the signals are going to. All you need to do is to invoke the slot in a thread safe way:
QMetaObject::invokeMethod(log_window, "append", Qt::QueuedConnection,
Q_ARG(QString, tmp.c_str()));
To speed things up, you can cache the method so that it doesn't have to be looked up every time:
class LogStream ... {
QPointer<QTextEdit> m_logWindow;
QMetaMethod m_append;
LogStream::LogStream(...) :
m_logWindow(...),
m_append(m_logWindow->metaObject()->method(
m_logWindow->metaObject()->indexOfSlot("append(QString)") )) {
...
}
};
You can then invoke it more efficiently:
m_append.invoke(m_logWindow, Qt::QueuedConnection, Q_ARG(QString, tmp.c_str()));
Finally, whenever you're holding pointers to objects whose lifetimes are not under your control, it's helpful to use QPointer since it never dangles. A QPointer resets itself to 0 when the pointed-to object gets destructed. It will at least prevent you from dereferencing a dangling pointer, since it never dangles.
How do I take string input as a switch case parameter? I am able to do it with an int but not string.
The below code would be working if I was using an int input, but if I change to string it won't work.
#include <iostream>
#include <sstream>
#include <string>
#include <math.h>
class MissionPlan //start of MissionPlan class
{
public:
MissionPlan();
float computeCivIndex(string,int,int,float,float);
}; //end of MissionPlan class
LocationData::LocationData()
{
switch(sunType)
{
case "Type A": //compute
break;
case "Type B": //compute
break;
//and many more case..
default: break;
}
}
int main()
{
for(;;)
{
MissionPlan plan;
}
return 0;
}
You cannot use a switch statement on a string in C++, sorry. You're best bet here is to use an enum. If you don't want to use an enum, then your only other option would be to do a bunch of if elses that check the strings for equality.
C/C++ doesn't support switch statements with strings. Use if-else-if instead:
if (sunType.compare("Type A") == 0) {
//compute
} else if (sunType.compare("Type B") == 0) {
// compute
} else {
// default
}