Why seem my program memory write faster than read?

Why seem my program memory write faster than read? - linux

My simple program:
//usage:
//indent ./a.c;gcc -O0 ./a.c
//./a.out max r/w repeat timeout
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
int
main (int argc, char **argv)
{
time_t const start_time = time (NULL);
time_t timeout;
int max;
int repeat;
if (argc == 5)
{
max = atoi (argv[1]);
repeat = atoi (argv[3]);
timeout = ((time_t) (atoi (argv[4])));
}
else
return 1;
unsigned char **block_array =
calloc (sizeof (unsigned char *), (size_t) (max));
size_t block_length = (size_t) (1024u * 1024u);
unsigned char data[3];
data[0] = 'a';
data[1] = 'b';
data[2] = 'c';
unsigned i = 0u;
//initialize block_array
for (i = 0u; i < max; i++)
{
do
{
if ((timeout > ((time_t) (0)))
&& ((time (NULL) - start_time) > timeout))
{
puts ("timeouted!");
return 0;
}
block_array[i] = malloc (block_length);
if (block_array[i] != NULL)
{
unsigned bi = 0u;
for (bi = 0u; bi < block_length; bi++)
block_array[i][bi] = data[bi % ((unsigned) (sizeof (data)))];
}
else
{
printf ("%u error\n", i);
}
}
while (NULL == block_array[i]);
}
puts ("init ok");
unsigned score = 0u;
//do page read test
if ('r' == argv[2][0])
for (;;)
{
for (i = 0u; i < max; i++)
{
if ((timeout > ((time_t) (0)))
&& ((time (NULL) - start_time) > timeout))
{
puts ("timeouted!");
goto show_score;
}
unsigned bi = 0u;
for (bi = 0u; bi < block_length; bi++)
{
data[bi % ((unsigned) (sizeof (data)))] = block_array[i][bi];
}
score++;
}
if (repeat >= 0)
{
repeat--;
if (0 == repeat)
goto show_score;
}
}
//do page write test
else if ('w' == argv[2][0])
for (;;)
{
for (i = 0u; i < max; i++)
{
if ((timeout > ((time_t) (0)))
&& ((time (NULL) - start_time) > timeout))
{
puts ("timeouted!");
goto show_score;
}
unsigned bi = 0u;
for (bi = 0u; bi < block_length; bi++)
{
block_array[i][bi] = data[bi % ((unsigned) (sizeof (data)))];
}
score++;
}
if (repeat >= 0)
{
repeat--;
if (0 == repeat)
goto show_score;
}
}
show_score:
printf ("score:%u\n", score);
return 0;
}
I also same test Debian Jessie(Linux 3.16)(lesser test) and Debian Stretch(Linux 4.9)(more test to sure)
I already repeat many times same test to sure this,so I only post a short result.
Test result:
$ cat /proc/meminfo |grep SwapTotal
SwapTotal: 0 kB
$ time ./a.out 100 r 5 -1
init ok
score:500
real 0m2.689s
user 0m2.604s
sys 0m0.080s
$ time ./a.out 100 w 5 -1
init ok
score:500
real 0m2.567s
user 0m2.496s
sys 0m0.060s
$

The main assignment inside the loop in both the 'r' and 'w' cases reads from memory and writes back to memory i.e. they are essentially the same - you aren't really testing memory read versus memory write. This is borne out by the fact that the times in each case are pretty close.
The 'w' case may be slightly faster because the cache probably contains the value you want to read from memory, since you aren't changing the source address in that case.

Related

Audio Recording and Playback in C : problem with audio gain

The question essentially is how to correctly apply gain to an audio sample?
I'm programming on FreeBSD and OSS, but manipulate volume in audio sample is probably the same for other OS and applications.
I'm studying others' applications internals like ecasound (in C++) and SoX (in C) but I don't know whats wrong when I read a sample and apply gain to it : it becomes distorted and noisy. My point is to understand why it is not working to turn the volume down (gain lesser than 1).
I'm working with stereo 16 bit LE samples. Without applying gain, it works perfectly (recording and playback).
I thought that I should convert an integer sample to float; multiply by a gain factor and restore it to integer. But it is not working. And it seems to be the exact same approach for SoX in src/vol.c in function static int flow.
Below is my code (no additional libs used). The function playback is where I'm applying gain.
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include "/usr/include/sys/soundcard.h"
#include <sys/ioctl.h>
#include <sys/time.h>
#include <sys/stat.h> //man 2 chmod
#include <signal.h>
#define DEBUG 1
#define log(msg) if (DEBUG) printf("[LOG] %s\n",msg)
#define err(msg) {printf("[ERR] %s\n",msg); exit(1); }
const char *device = "/dev/dsp3.1"; //Audio device
char *rawFile = "/tmp/raw-file.wav"; //Raw file to record and playback
int fragmentSize = 256;
int b_continue = 1;
void signalHandler(int sigNum){
log("Signal captured");
b_continue = 0;
}
void configDevice(int fdDsp){
int ossCapabilities = 0;
if(fdDsp == -1)
err("can't open device");
if( ioctl(fdDsp, SNDCTL_DSP_GETCAPS, &ossCapabilities) == -1)
err("unsupported: SNDCTL_DSP_GETCAPS");
/*
* http://www.opensound.com/pguide/audio2.html
*/
if(ossCapabilities & DSP_CAP_TRIGGER != DSP_CAP_TRIGGER){
err("Triggering of recording/playback is not possible with this OSS device.");
}
if(ossCapabilities & DSP_CAP_REALTIME != DSP_CAP_REALTIME){
err("No DSP_CAP_REALTIME.");
}
if(ioctl(fdDsp, SNDCTL_DSP_SETDUPLEX, &ossCapabilities) == -1)
err("can't SNDCTL_DSP_SETDUPLEX");
if(ossCapabilities & DSP_CAP_DUPLEX != DSP_CAP_DUPLEX)
err("can't DSP_CAP_DUPLEX");
int format = AFMT_S16_LE; //set format
if(ioctl(fdDsp, SNDCTL_DSP_SETFMT, &format ) == -1){
err("Error setting format.");
}
int channels = 1; //mono=0 stereo=1
if(ioctl(fdDsp, SNDCTL_DSP_STEREO, &channels ) == -1){
err("Error setting channels." );
}
// FREQUENCY RATE
int speed = 44100;
if(ioctl(fdDsp, SNDCTL_DSP_SPEED, &speed ) == -1){
err("Error setting speed.");
}
// FRAGMENT SIZE
if(ioctl(fdDsp, SNDCTL_DSP_SETBLKSIZE, &fragmentSize) == -1){ //normalmente 2048 bits
err("Cannot SNDCTL_DSP_SETBLKSIZE.");
}
}
void record(){
int fdDsp = open(device, O_RDONLY);
configDevice(fdDsp);
//create file for writing
const int fdOutput = open(rawFile, O_WRONLY | O_CREAT, S_IWUSR | S_IRUSR);
if(fdOutput ==-1)
err("can't open file to write");
log("Recording...");
do{
// Triggers recording
int enableBits = PCM_ENABLE_INPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1)
err("Can't record: SNDCTL_DSP_SETTRIGGER");
int *buf[fragmentSize];
read(fdDsp, buf, fragmentSize);
write(fdOutput, buf, fragmentSize);
} while(b_continue == 1);
close(fdOutput);
close(fdDsp);
}
void playback(){
log("Opening file:");
log(rawFile);
log("On device:");
log(device);
int fdDsp = open(device, O_WRONLY);
configDevice(fdDsp);
const int fdInput = open(rawFile, O_RDONLY);
if(fdInput ==-1)
err("can't open file");
log("Playing...");
int eof = 0;
do{
// TRIGGERs PLAYBACK
int enableBits = PCM_ENABLE_OUTPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1){
err("Cannot SNDCTL_DSP_SETTRIGGER.");
}
int buf[fragmentSize];
eof = read(fdInput, buf, fragmentSize); //bytes read or -1 if EOF
// audio processing:
for(int i=0;i<fragmentSize;i++){
// learning how to get left and right channels from buffer
int l = (buf)[i] & 0xffff;
int r = ((buf)[i] >> 16) & 0xffff ;
// FIXME: it is causing distortion:
float fl = l;
float fr = r;
fl *= 1.0;
fr *= 0.3; //if different than 1, sounds distorted and noisy
l = fl;
r = fr;
// OK: unite Left and Right channels again
int lr = (l ) | (r << 16);
// OK: other options to mix these two channels:
int lleft = l; //Just the left channel
int rright = (r << 16); //Just the right channel
int lmono = (l << 16) | l; //Left ch. on both channels
int rmono = (r << 16) | r; //Right ch. on both channels
// the output:
(buf)[i] = lr;
}
write(fdDsp, buf, fragmentSize);
if(b_continue == 0) break;
} while(eof > 0);
close(fdInput);
close(fdDsp);
}
int main(int argc, char *argv[])
{
signal(SIGINT, signalHandler);
log("Ctrl^C to stop recording/playback");
record();
b_continue = 1; playback();
log("Stopped.");
return 0;
}
UPDATE:
As pointed out by CL, I was using the wrong type and the last parameter of read()/write() is greater than the size of the buffer.
So, in FreeBSD I changed the buffer type to int16_t (short) defined in #include <stdint.h> .
Now I can correctly apply a gain as desired:
float fl = l;
float fr = r;
fl *= 1.0f;
fr *= 1.5f;
l = fl;
r = fr;
I'll accept CL's answer.
Now the audio processing loop is working with one sample per time (left and right interleaved).
Updated code:
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include "/usr/include/sys/soundcard.h"
#include <sys/ioctl.h>
#include <sys/time.h>
#include <sys/stat.h> //man 2 chmod
#include <signal.h>
#include <stdint.h> //has type int16_t (short)
#define DEBUG 1
#define log(msg) if (DEBUG) printf("[LOG] %s\n",msg)
#define err(msg) {printf("[ERR] %s\n",msg); exit(1); }
const char *device = "/dev/dsp3.1"; //Audio device
char *rawFile = "/tmp/stereo.wav"; //Raw file to record and playback
int fragmentSize = 256;
int b_continue = 1;
void signalHandler(int sigNum){
log("Signal captured");
b_continue = 0;
}
void configDevice(int fdDsp){
int ossCapabilities = 0;
if(fdDsp == -1)
err("can't open device");
if( ioctl(fdDsp, SNDCTL_DSP_GETCAPS, &ossCapabilities) == -1)
err("unsupported: SNDCTL_DSP_GETCAPS");
/*
* http://www.opensound.com/pguide/audio2.html
*/
if(ossCapabilities & DSP_CAP_TRIGGER != DSP_CAP_TRIGGER){
err("Triggering of recording/playback is not possible with this OSS device.");
}
if(ossCapabilities & DSP_CAP_REALTIME != DSP_CAP_REALTIME){
err("No DSP_CAP_REALTIME.");
}
if(ioctl(fdDsp, SNDCTL_DSP_SETDUPLEX, &ossCapabilities) == -1)
err("can't SNDCTL_DSP_SETDUPLEX");
if(ossCapabilities & DSP_CAP_DUPLEX != DSP_CAP_DUPLEX)
err("can't DSP_CAP_DUPLEX");
int format = AFMT_S16_LE; //set format
if(ioctl(fdDsp, SNDCTL_DSP_SETFMT, &format ) == -1){
err("Error setting format.");
}
int channels = 1; //mono=0 stereo=1
if(ioctl(fdDsp, SNDCTL_DSP_STEREO, &channels ) == -1){
err("Error setting channels." );
}
// FREQUENCY RATE
int speed = 44100;
if(ioctl(fdDsp, SNDCTL_DSP_SPEED, &speed ) == -1){
err("Error setting speed.");
}
// FRAGMENT SIZE
if(ioctl(fdDsp, SNDCTL_DSP_SETBLKSIZE, &fragmentSize) == -1){ //normalmente 2048 bits
err("Cannot SNDCTL_DSP_SETBLKSIZE.");
}
}
void record(){
int fdDsp = open(device, O_RDONLY);
configDevice(fdDsp);
//create file for writing
const int fdOutput = open(rawFile, O_WRONLY | O_CREAT, S_IWUSR | S_IRUSR);
if(fdOutput ==-1)
err("can't open file to write");
log("Recording...");
do{
// Triggers recording
int enableBits = PCM_ENABLE_INPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1)
err("Can't record: SNDCTL_DSP_SETTRIGGER");
// Wrong:
// int *buf[fragmentSize];
// read(fdDsp, buf, fragmentSize);
// write(fdOutput, buf, fragmentSize);
int16_t *buf[fragmentSize/sizeof (int16_t)];
read(fdDsp, buf, fragmentSize/sizeof (int16_t));
write(fdOutput, buf, fragmentSize/sizeof (int16_t));
} while(b_continue == 1);
close(fdOutput);
close(fdDsp);
}
void playback(){
log("Opening file:");
log(rawFile);
log("On device:");
log(device);
int fdDsp = open(device, O_WRONLY);
configDevice(fdDsp);
const int fdInput = open(rawFile, O_RDONLY);
if(fdInput ==-1)
err("can't open file");
log("Playing...");
int eof = 0;
do{
// TRIGGERs PLAYBACK
int enableBits = PCM_ENABLE_OUTPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1){
err("Cannot SNDCTL_DSP_SETTRIGGER.");
}
//Wrong buffer type (too large) and wrong last parameter for read():
// int buf[fragmentSize];
// eof = read(fdInput, buf, fragmentSize);
int16_t buf[fragmentSize/sizeof (int16_t)];
eof = read(fdInput, buf, fragmentSize/sizeof (int16_t));
// audio processing:
for(int i=0;i<fragmentSize/sizeof (int16_t);i++){
int16_t l = buf[i];
int16_t r = buf[i+1];
// Using int16_t (short) buffer, gain works but stereo is inverted with factor >= 1.4f
float fl = l;
float fr = r;
fl *= 2.0f;
fr *= 3.0f;
l = fl;
r = fr;
// the output:
(buf)[i] = l;
i++;
(buf)[i] = r;
}
// write(fdDsp, buf, fragmentSize); //wrong
write(fdDsp, buf, fragmentSize/sizeof (int16_t));
if(b_continue == 0) break;
} while(eof > 0);
close(fdInput);
close(fdDsp);
}
int main(int argc, char *argv[])
{
signal(SIGINT, signalHandler);
log("Ctrl^C to stop recording/playback");
record();
b_continue = 1; playback();
log("Stopped.");
return 0;
}
Thanks,

The last parameter of read()/write() is the number of bytes, but an entry in buf[] has more than one byte.
In the two's complement representation of binary numbers, negative values are (or must be) sign extended, i.e., the most significant bits are ones. In this code, neither extracting L/R channels nor combining them works correctly for negative samples.
The easiest way of handling negative samples would be to use one array entry per sample, i.e., short int.

How to write to multiple files on different disks simultaneously in one thread with DMA?

I use aio to write multiple files on different disk in one thread. When I use buffered writing, IO processing is concurrent. But cpu loads is very high. When I open files with DIRECT flag, IO processing isn't concurrent.
How to write to multiple files on different disks simultaneously in one thread with DMA?
#include <malloc.h>
#include <stdio.h>
#include <string.h>
#include <iostream>
#include <sstream>
#include <inttypes.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/syscall.h>
#include <linux/aio_abi.h>
using namespace std;
long double timeDiff(timespec start, timespec end) {
const long double s = start.tv_sec + start.tv_nsec * 1.0e-9;
const long double e = end.tv_sec + end.tv_nsec * 1.0e-9;
return e - s;
}
// nr: maximum number of requests that can simultaneously reside in the context.
inline int io_setup(unsigned nr, aio_context_t *ctxp) {
return syscall(__NR_io_setup, nr, ctxp);
}
inline int io_destroy(aio_context_t ctx) {
return syscall(__NR_io_destroy, ctx);
}
// Every I/O request that is submitted to
inline int io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp) {
return syscall(__NR_io_submit, ctx, nr, iocbpp);
}
// For every completed I/O request kernel creates an io_event structure.
// minimal number of events one wants to get.
// maximum number of events one wants to get.
inline int io_getevents(aio_context_t ctx, long min_nr, long max_nr,
struct io_event *events, struct timespec *timeout) {
return syscall(__NR_io_getevents, ctx, min_nr, max_nr, events, timeout);
}
int main(int argc, char *argv[]) {
// prepare data
const unsigned int kAlignment = 4096;
const long data_size = 1600 * 1024 * 12 / 8;
//const long data_size = 2448 * 1344 * 12 / 8;
void * data = memalign(kAlignment, data_size);
memset(data, 0, data_size);
//for (int i = 0; i < data_size; ++i)
// data[i] = 'A';
// prepare fd
//const int file_num = 3;
const int file_num = 2;
int fd_arr[file_num];
for (int i = 0; i < file_num; ++i) {
ostringstream filename;
if (i == 0) {
//filename << "/data/test";
filename << "/test";
} else {
filename << "/data" << i << "/test";
}
//filename << "/data/test" << i;
int fd = open(filename.str().c_str(), O_WRONLY | O_NONBLOCK | O_CREAT | O_DIRECT | O_APPEND, 0644);
//int fd = open(filename.str().c_str(), O_WRONLY | O_NONBLOCK | O_CREAT | O_DIRECT, 0644);
//int fd = open(filename.str().c_str(), O_WRONLY | O_NONBLOCK | O_CREAT, 0644);
if (fd < 0) {
perror("open");
return -1;
}
fd_arr[i] = fd;
}
aio_context_t ctx;
struct io_event events[file_num];
int ret;
ctx = 0;
ret = io_setup(1000, &ctx);
if (ret < 0) {
perror("io_setup");
return -1;
}
struct iocb cbs[file_num];
for (int i = 0; i < file_num; ++i) {
memset(&cbs[i], 0, sizeof(cbs[i]));
}
struct iocb * cbs_pointer[file_num];
for (int i = 0; i < file_num; ++i) {
/* setup I/O control block */
cbs_pointer[i] = &cbs[i];
cbs[i].aio_fildes = fd_arr[i];
cbs[i].aio_lio_opcode = IOCB_CMD_PWRITE; // IOCV_CMD
cbs[i].aio_nbytes = data_size;
}
timespec tStart, tCurr;
clock_gettime(CLOCK_REALTIME, &tStart);
const int frame_num = 10000;
for (int k = 0; k < frame_num; ++k) {
for (int i = 0; i < file_num; ++i) {
/* setup I/O control block */
cbs[i].aio_buf = (uint64_t)data;
//cbs[i].aio_offset = k * data_size;
}
ret = io_submit(ctx, file_num, cbs_pointer);
if (ret < 0) {
perror("io_submit");
return -1;
}
/* get reply */
ret = io_getevents(ctx, file_num, file_num, events, NULL);
//printf("events: %d, k: %d\n", ret, k);
}
clock_gettime(CLOCK_REALTIME, &tCurr);
cout << "frame: " << frame_num << " time: " << timeDiff(tStart, tCurr) << endl;
ret = io_destroy(ctx);
if (ret < 0) {
perror("io_destroy");
return -1;
}
// close fd
for (int i = 0; i < file_num; ++i) {
fsync(fd_arr[i]);
close(fd_arr[i]);
}
return 0;
}

Linux can make writes actually async if and only if the physical extents being written are allocated on the disc already. Otherwise it has to take a mutex and do the allocation first, thus everything becomes synchronous.
Note that truncating the file to a new length usually doesn't actually allocate the underlying extents. You need to prewrite the contents first. Thereafter, rewriting the same extents will now be done async and thus become concurrent.
As you might be gathering, async file i/o on Linux is not great, though it keeps on getting better over time. Windows or FreeBSD have far superior implementations. Even OS X is not terrible. Use any of those instead.

Find all string permutations of given string in given source string

We are given a pattern string: 'foo' and a source string: 'foobaroofzaqofom' and we need to find all occurrences of word pattern string in any order of letters. So for a given example solution will looks like: ['foo', 'oof', 'ofo'].
I have a solution, but i'm not sure that it is the most efficient one:
Create hash_map of chars of pattern string where each char is a key and each value is a counter of chars in pattern. For a given example it would be {{f: 1}, {o: 2}}
Look through the source string and if found one of the elements from hash_map, than try to find all the rest elements of pattern
If all elements are found than it is our solution, if not going forward
Here is an implementation in c++:
set<string> FindSubstringPermutations(string& s, string& p)
{
set<string> result;
unordered_map<char, int> um;
for (auto ch : p)
{
auto it = um.find(ch);
if (it == um.end())
um.insert({ ch, 1 });
else
um[ch] += 1;
}
for (int i = 0; i < (s.size() - p.size() + 1); ++i)
{
auto it = um.find(s[i]);
if (it != um.end())
{
decltype (um) um_c = um;
um_c[s[i]] -= 1;
for (int t = (i + 1); t < i + p.size(); ++t)
{
auto it = um_c.find(s[t]);
if (it == um_c.end())
break;
else if (it->second == 0)
break;
else
it->second -= 1;
}
int sum = 0;
for (auto c : um_c)
sum += c.second;
if (sum == 0)
result.insert(s.substr(i, p.size()));
}
}
return result;
}
Complexity is near O(n), i don't know how to calculate more precisely.
So the question: is there any efficient solution, because using hash_map is a bit of hacks and i think there may be more efficient solution using simple arrays and flags of found elements.

You could use a order-invariant hash-algorithm that works with a sliding window to optimize things a bit.
An example for such a hash-algorithm could be
int hash(string s){
int result = 0;
for(int i = 0; i < s.length(); i++)
result += s[i];
return result;
}
This algorithm is a bit over-simplistic and is rather horrible in all points except performance (i.e. distribution and number of possible hash-values), but that isn't too hard to change.
The advantage with such a hash-algorithm would be:
hash("abc") == hash("acb") == hash("bac") == ...
and using a sliding-window with this algorithm is pretty simple:
string s = "abcd";
hash(s.substring(0, 3)) + 'd' - 'a' == hash(s.substring(1, 3));
These two properties of such hashing approaches allow us to do something like this:
int hash(string s){
return sum(s.chars);
}
int slideHash(int oldHash, char slideOut, char slideIn){
return oldHash - slideOut + slideIn;
}
int findPermuted(string s, string pattern){
int patternHash = hash(pattern);
int slidingHash = hash(s.substring(0, pattern.length()));
if(patternHash == slidingHash && isPermutation(pattern, s.substring(0, pattern.length())
return 0;
for(int i = 0; i < s.length() - pattern.length(); i++){
slidingHash = slideHash(slidingHash, s[i], s[i + pattern.length()]);
if(patternHash == slidingHash)
if(isPermutation(pattern, s.substring(i + 1, pattern.length())
return i + 1;
}
return -1;
}
This is basically an altered version of the Rabin-Karp-algorithm that works for permuted strings. The main-advantage of this approach is that less strings actually have to be compared, which brings quite a bit of an advantage. This especially applies here, since the comparison (checking if a string is a permutation of another string) is quite expensive itself already.
NOTE:
The above code is only supposed as a demonstration of an idea. It's aimed at being easy to understand rather than performance and shouldn't be directly used.
EDIT:
The above "implementation" of an order-invariant rolling hash algorithm shouldn't be used, since it performs extremely poor in terms of data-distribution. Of course there are obviously a few problems with this kind of hash: the only thing from which the hash can be generated is the actual value of the characters (no indices!), which need to be accumulated using a reversible operation.
A better approach would be to map each character to a prime (don't use 2!!!). Since all operations are modulo 2^(8 * sizeof(hashtype)) (integer overflow), we need to generate a table of the multiplicative inverses modulo 2^(8 * sizeof(hashtype)) for all used primes. I won't cover generating these tables, as there's plenty of resources available on that topic here already.
The final hash would then look like this:
map<char, int> primes = generatePrimTable();
map<int, int> inverse = generateMultiplicativeInverses(primes);
unsigned int hash(string s){
unsigned int hash = 1;
for(int i = 0; i < s.length(); i++)
hash *= primes[s[i]];
return hash;
}
unsigned int slideHash(unsigned int oldHash, char slideOut, char slideIn){
return oldHash * inverse[primes[slideOut]] * primes[slideIn];
}
Keep in mind that this solution works with unsigned integers.

Typical rolling hashfunction for anagrams
using product of primes
This will only work for relatively short patterns
The hashvalues for allmost all normal words will fit into a 64 bit value without overflow.
Based on this anagram matcher
/* braek; */
/* 'foobaroofzaqofom' */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
typedef unsigned long long HashVal;
static HashVal hashchar (unsigned char ch);
static HashVal hashmem (void *ptr, size_t len);
unsigned char primes26[] =
{ 5,71,79,19,2,83,31,43,11,53,37,23,41,3,13,73,101,17,29,7,59,47,61,97,89,67, };
/*********************************************/
static HashVal hashchar (unsigned char ch)
{
HashVal val=1;
if (ch >= 'A' && ch <= 'Z' ) val = primes26[ ch - 'A'];
else if (ch >= 'a' && ch <= 'z' ) val = primes26[ ch - 'a'];
return val;
}
static HashVal hashmem (void *ptr, size_t len)
{
size_t idx;
unsigned char *str = ptr;
HashVal val=1;
if (!len) return 0;
for (idx = 0; idx < len; idx++) {
val *= hashchar ( str[idx] );
}
return val;
}
/*********************************************/
unsigned char buff [4096];
int main (int argc, char **argv)
{
size_t patlen,len,pos,rotor;
int ch;
HashVal patval;
HashVal rothash=1;
patlen = strlen(argv[1]);
patval = hashmem( argv[1], patlen);
// fprintf(stderr, "Pat=%s, len=%zu, Hash=%llx\n", argv[1], patlen, patval);
for (rotor=pos=len =0; ; len++) {
ch=getc(stdin);
if (ch == EOF) break;
if (ch < 'A' || ch > 'z') { pos = 0; rothash = 1; continue; }
if (ch > 'Z' && ch < 'a') { pos = 0; rothash = 1; continue; }
/* remove old char from rolling hash */
if (pos >= patlen) { rothash /= hashchar(buff[rotor]); }
/* add new char to rolling hash */
buff[rotor] = ch;
rothash *= hashchar(buff[rotor]);
// fprintf(stderr, "%zu: [rot=%zu]pos=%zu, Hash=%llx\n", len, rotor, pos, rothash);
rotor = (rotor+1) % patlen;
/* matched enough characters ? */
if (++pos < patlen) continue;
/* correct hash value ? */
if (rothash != patval) continue;
fprintf(stdout, "Pos=%zu\n", len);
}
return 0;
}
Output/result:
$ ./a.out foo < anascan.c
Pos=21
Pos=27
Pos=33
Update. For people who don't like product of primes, here is a taxinumber sum of cubes (+ additional histogram check) implementation. This is also supposed to be 8-bit clean. Note the cubes are not necessary; it wotks equally well with squares. Or just the sum. (the final histogram check will have some more work todo)
/* braek; */
/* 'foobaroofzaqofom' */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
typedef unsigned long long HashVal;
static HashVal hashchar (unsigned char ch);
static HashVal hashmem (void *ptr, size_t len);
/*********************************************/
static HashVal hashchar (unsigned char ch)
{
HashVal val=1+ch;
return val*val*val;
}
static HashVal hashmem (void *ptr, size_t len)
{
size_t idx;
unsigned char *str = ptr;
HashVal val=1;
if (!len) return 0;
for (idx = 0; idx < len; idx++) {
val += hashchar ( str[idx] );
}
return val;
}
/*********************************************/
int main (int argc, char **argv)
{
size_t patlen,len,rotor;
int ch;
HashVal patval;
HashVal rothash=1;
unsigned char *patstr;
unsigned pathist[256] = {0};
unsigned rothist[256] = {0};
unsigned char cycbuff[1024];
patstr = (unsigned char*) argv[1];
patlen = strlen((const char*) patstr);
patval = hashmem( patstr, patlen);
for(rotor=0; rotor < patlen; rotor++) {
pathist [ patstr[rotor] ] += 1;
}
fprintf(stderr, "Pat=%s, len=%zu, Hash=%llx\n", argv[1], patlen, patval);
for (rotor=len =0; ; len++) {
ch=getc(stdin);
if (ch == EOF) break;
/* remove old char from rolling hash */
if (len >= patlen) {
rothash -= hashchar(cycbuff[rotor]);
rothist [ cycbuff[rotor] ] -= 1;
}
/* add new char to rolling hash */
cycbuff[rotor] = ch;
rothash += hashchar(cycbuff[rotor]);
rothist [ cycbuff[rotor] ] += 1;
// fprintf(stderr, "%zu: [rot=%zu], Hash=%llx\n", len, rotor, rothash);
rotor = (rotor+1) % patlen;
/* matched enough characters ? */
if (len < patlen) continue;
/* correct hash value ? */
if (rothash != patval) continue;
/* correct histogram? */
if (memcmp(rothist,pathist, sizeof pathist)) continue;
fprintf(stdout, "Pos=%zu\n", len-patlen);
}
return 0;
}

NUMA Memory Page Migration Overhead

I have to find the overhead associated with NUMA memory page migration under Linux.
Can you please tell me which tools I can use?
If possible could you show an example.

If you want to understand whether your system is doing excessive remote node memory accesses and you're using intel CPUs, Intel's PMU has a utility called vtbwrun to report the QPI/uncore activity.
If you want to see how long it takes to execute a page migration, you can measure the duration of calls to numa_move_pages (provided by the numactl package).
Here's an example:
/*
* Test program to test the moving of a processes pages.
*
* (C) 2006 Silicon Graphics, Inc.
* Christoph Lameter <clameter#sgi.com>
*/
#include <stdio.h>
#include <stdlib.h>
#include "../numa.h"
#include <unistd.h>
#include <errno.h>
unsigned int pagesize;
unsigned int page_count = 32;
char *page_base;
char *pages;
void **addr;
int *status;
int *nodes;
int errors;
int nr_nodes;
struct bitmask *old_nodes;
struct bitmask *new_nodes;
int main(int argc, char **argv)
{
int i, rc;
pagesize = getpagesize();
nr_nodes = numa_max_node()+1;
old_nodes = numa_bitmask_alloc(nr_nodes);
new_nodes = numa_bitmask_alloc(nr_nodes);
numa_bitmask_setbit(old_nodes, 1);
numa_bitmask_setbit(new_nodes, 0);
if (nr_nodes < 2) {
printf("A minimum of 2 nodes is required for this test.\n");
exit(1);
}
setbuf(stdout, NULL);
printf("migrate_pages() test ......\n");
if (argc > 1)
sscanf(argv[1], "%d", &page_count);
page_base = malloc((pagesize + 1) * page_count);
addr = malloc(sizeof(char *) * page_count);
status = malloc(sizeof(int *) * page_count);
nodes = malloc(sizeof(int *) * page_count);
if (!page_base || !addr || !status || !nodes) {
printf("Unable to allocate memory\n");
exit(1);
}
pages = (void *) ((((long)page_base) & ~((long)(pagesize - 1))) + pagesize);
for (i = 0; i < page_count; i++) {
if (i != 2)
/* We leave page 2 unallocated */
pages[ i * pagesize ] = (char) i;
addr[i] = pages + i * pagesize;
nodes[i] = 1;
status[i] = -123;
}
/* Move to starting node */
rc = numa_move_pages(0, page_count, addr, nodes, status, 0);
if (rc < 0 && errno != ENOENT) {
perror("move_pages");
exit(1);
}
/* Verify correct startup locations */
printf("Page location at the beginning of the test\n");
printf("------------------------------------------\n");
numa_move_pages(0, page_count, addr, NULL, status, 0);
for (i = 0; i < page_count; i++) {
printf("Page %d vaddr=%p node=%d\n", i, pages + i * pagesize, status[i]);
if (i != 2 && status[i] != 1) {
printf("Bad page state before migrate_pages. Page %d status %d\n",i, status[i]);
exit(1);
}
}
/* Move to node zero */
numa_move_pages(0, page_count, addr, nodes, status, 0);
printf("\nMigrating the current processes pages ...\n");
rc = numa_migrate_pages(0, old_nodes, new_nodes);
if (rc < 0) {
perror("numa_migrate_pages failed");
errors++;
}
/* Get page state after migration */
numa_move_pages(0, page_count, addr, NULL, status, 0);
for (i = 0; i < page_count; i++) {
printf("Page %d vaddr=%lx node=%d\n", i,
(unsigned long)(pages + i * pagesize), status[i]);
if (i != 2) {
if (pages[ i* pagesize ] != (char) i) {
printf("*** Page contents corrupted.\n");
errors++;
} else if (status[i]) {
printf("*** Page on the wrong node\n");
errors++;
}
}
}
if (!errors)
printf("Test successful.\n");
else
printf("%d errors.\n", errors);
return errors > 0 ? 1 : 0;
}

I am trying to make a math quiz program in C, I have this so far but I cant figure out what is wrong

After the enter the first answer the code crashes.
Also it states that the memory is unsuccessful allocated. How can i fix this?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
int main(void)
{
int i;
srand(time(NULL));
int *num1;
int *num2;
int response;
int *answer;
char *result;
printf("\nMath Quiz\n");
printf("Enter # of problems: ");
scanf("%d", &response);
based on the number of questions the user wishes to take, allocate enough memory to hold question data
num1 = (int *)calloc(response, sizeof(int));
num2 = (int *)calloc(response, sizeof(int));
answer = (int *)calloc(response, sizeof(int));
result - (char *)calloc(response, sizeof(char));
if(num1 == NULL || num2 == NULL || answer == NULL || result == NULL)
{
printf("memory allocation unsucessful\n");
} //end if
for(i=0; i<response; i++)
{
num1[i] = (rand() % 12)+1;
num2[i] = (rand() % 12)+1;
printf("%d * %d = ", num1[i], num2[i]); //somewhere at this point the program messes up
scanf("%d", &answer[i]);
if(answer[i]= num1[i] * num2[i])
{
result[i] = 'c';
}
else
{
result[i] = 'i';
}
} //end for loop
printf("Quiz Results\n");
printf("Question\tYour Answer\tCorrect");
for(i=0; i<response; i++);
{
if(result[i] == 'c')
{
printf("%d * %d\t\t%d\t\tYES",num1[i],num2[i],answer[i]);
}
else
{
printf("%d * %d\t\t%d\t\tNo",num1[i],num2[i],answer[i]);
}
} //end for loop
free(num1);
free(num2);
free(answer);
free(result);
system("pause");
return 0;
} //end main

answer[i]= num1[i] * num2[i]
should read
answer[i] == num1[i] * num2[i]
= is for assignments, == is for comparisons.
and result - (char *)calloc(response, sizeof(char));
should read
result = (char *)calloc(response, sizeof(char));
If there are other problems, you need to be more specific than "the program messes up".
Also, don't cast the return value of malloc or calloc. Read Do I cast the result of malloc? .

Might this be the answer:
result - (char *)calloc(response, sizeof(char));
The '-' should be an '='.

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Why seem my program memory write faster than read? - linux

Related

Audio Recording and Playback in C : problem with audio gain

How to write to multiple files on different disks simultaneously in one thread with DMA?

Find all string permutations of given string in given source string

NUMA Memory Page Migration Overhead

I am trying to make a math quiz program in C, I have this so far but I cant figure out what is wrong

Categories

Resources