why file with hole has smaller disk block than file without hole? - linux

#include <fcntl.h>
#include <unistd.h>
char buf1[] = "abcdefghij";
char buf2[] = "ABCDEFGHIJ";
char buf3[10];
int
main(void)
{
int fd;
if ((fd = creat("file.hole", FILE_MODE)) < 0) {
err_sys("creat error");
}
if (write(fd, buf1, 10) != 10) { // offset is now = 10
err_sys("buf1 write error");
}
if (lseek(fd, 16380, SEEK_SET) == -1) { // offset now = 16380
err_sys("lseek error");
}
if (write(fd, buf2, 10) != 10) { // offset now = 16390
err_sys("buf2 write error");
}
close(fd);
if ((fd = open("file.hole", O_RDWR)) == -1) {
err_sys("failed to re-open file");
}
ssize_t n;
ssize_t m;
while ((n = read(fd, buf3, 10)) > 0) {
if ((m = write(STDOUT_FILENO, buf3, 10)) != 10) {
err_sys("stdout write error");
}
}
if (n == -1) {
err_sys("buf3 read error");
}
exit(0);
}
I'm newbie in unix system programming
There is code making file with hole.
Output result is:
$ls -ls file.hole file.nohole
8 -rw-r--r-- 1 sar 16394 time file.hole
20 -rw-r--r-- 1 sar 16394 time file.nohole
Why file with hole has fewer disk block than file without hole?
In my thinking, file without hole takes smaller disk blocks
Because file with hole is more spreaded than without hole..
From "Advanced Programming in the UNIX Environment 3rd-Stevens Rago, example 3.2"

Why do you think that a file without hole takes smaller space ? This exactly the contrary.
If the file has holes, then it is not necessary to reserve disk blocks for that space.
The number of disk blocks is not related to the spreading of the file, but directly related to the size of the data you wrote in the file.

The distribution of the data blocks on the hard disk doesn't count against the number of blocks which the file system needs to store the data. It really doesn't matter if the blocks are close together or far away since the file system can use the blocks between for different files.
So the output shows you that file.hole only occupies 8 blocks in the hard disk, not where they are.

Related

Get the serial number of the volume udf cd / dvd disk?

I'm writing a program in linux which counts the serial number (xxxx-xxxx) of the volume of the CD in Windows 7. My program correctly determines the serial number of the volume on disks with the filesystems iso9660 and joilet. But how to define a disk volume sniffer with a file system udf? Can someone tell me ....
ps if anyone does not understand I'm talking about the serial number of this kind https://extra-torrent.jimdo.com/2016/01/23/hard-disk-volume-serial-number-change/
#include <QCoreApplication>
#include <stdio.h>
#include <sys/ioctl.h>
#include <linux/cdrom.h>
#include <string.h>
#include <szi/szimac.h>
#include <qfile.h>
#include <iostream>
#include <QDir>
#include <unistd.h>
#define SEC_SIZE 2048
#define VD_N 16
#define VD_TYPE_SUPP 2
#define VD_TYPE_END 255
#define ESC_IDX 88
#define ESC_LEN 3
#define ESC_UCS2L1 "%/#"
#define ESC_UCS2L2 "%/C"
#define ESC_UCS2L3 "%/E"
using namespace std;
int cdid(unsigned char pvd[SEC_SIZE])
{
unsigned char part[4] = {0};
int i;
for(i = 0; i < SEC_SIZE; i += 4)
{
part[3] += pvd[i + 0];
part[2] += pvd[i + 1];
part[1] += pvd[i + 2];
part[0] += pvd[i + 3];
}
return (part[3] << 24) + (part[2] << 16) + (part[1] << 8) + part[0];
}
int main(int argc, char *argv[])
{
FILE *in;
unsigned char buf[SEC_SIZE];
struct cdrom_multisession msinfo;
long session_start;
int id;
QString home=QString(getenv("HOME"))+QString("/chteniestorm");
QFile file(home);
ustr="/dev/sr0";
in = fopen(ustr.toLocal8Bit().data(), "rb");
if(in == NULL)
{
if (file.open(QIODevice::WriteOnly))
{
file.write("sernom=1");
file.close();
}
cout<<"netdiska"<<endl;
return 0;
}
/* Get session info */
msinfo.addr_format = CDROM_LBA;
if(ioctl(fileno(in), CDROMMULTISESSION, &msinfo) != 0)
{
fprintf(stderr, "WARNING: Can't get multisession info\n");
perror(NULL);
session_start = 0;
}
else
{
session_start = msinfo.addr.lba;
}
fseek(in, 0, SEEK_SET); //to the begining
/* Seek to primary volume descriptor */
if(fseek(in, (session_start + VD_N) * SEC_SIZE, SEEK_SET) != 0)
{
if (file.open(QIODevice::WriteOnly))
{
file.write("sernom=2");
file.close();
}
fclose(in);
return 0;
}
/* Read descriptor */
if(fread(buf, 1, SEC_SIZE, in) != SEC_SIZE)
{
if (file.open(QIODevice::WriteOnly))
{
file.write("sernom=3");
file.close();
}
fclose(in);
return 0;
}
/* Caclculate disc id */
id = cdid(buf);
/* Search for Joliet extension */
while(buf[0] != VD_TYPE_END)
{
/* Read descriptor */
if(fread(buf, 1, SEC_SIZE, in) != SEC_SIZE)
{
perror(NULL);
return 0;
}
if(buf[0] == VD_TYPE_SUPP
&& (memcmp(buf + ESC_IDX, ESC_UCS2L1, ESC_LEN) == 0
|| memcmp(buf + ESC_IDX, ESC_UCS2L2, ESC_LEN) == 0
|| memcmp(buf + ESC_IDX, ESC_UCS2L3, ESC_LEN) == 0)
)
{
/* Joliet found */
id = cdid(buf);
}
}
fclose(in);
}
It looks like this question was asked on more places [1], [2], [3], [4] but nowhere was answered yet. So I will do it here.
In some of those posts people decoded serial number generation algorithm. It is just checksum which you already have found and put into your cdid() function. Same checksum algorithm is used for both ISO9660 and UDF filesystems on Windows. You have already figured out from which ISO9660 structures is that checksum calculated.
So your question remain just for UDF filesystem. For UDF filesystem on Windows that checksum is calculated from the 512 bytes long File Set Descriptor (FSD) structure. I would suggest you to read OSTA UDF specification how to locale that FSD on UDF disc.
Basically for plain UDF which do not use Virtual Allocation Table (VAT), Sparing Table or Metadata Partition, location of the FSD is stored in Logical Volume Descriptor (LVD) structure, in field LogicalVolumeContentsUse (it is of type long_ad). LVD is stored in the Volume Descriptor Sequence (VDS). VDS's location is stored in Anchor Volume Descriptor Pointer (AVDP), in field MainVolumeDescriptorSequenceExtent. AVDP itself is located at sector 256 of medium. Optical media have sector size 2048 bytes and common hard disk 512 bytes.
For UDF with VAT (e.g. on CD-R/DVD-R/BD-R), Sparing Table (e.g. on CD-RW/DVD-RW) or Metadata Partition (e.g. on Blu-ray), it is much more complicated. You need to look into Virtual, Sparable or Metadata Partition to figure out how to translate logical location of the FSD to physical location of media.
In udftools project starting with version 2.0, there is a new tool udfinfo which provides various information about UDF filesystem. It shows also that Windows specific Volume Serial Number from your question under winserialnum key. Note that udfinfo cannot read FSD from UDF filesystem with VAT or Metadata yet.

How does limits on the shared memory work on Linux

I was looking into the Linux kernel limits on the shared memory
/proc/sys/kernel/shmall
specifies the maximum amount of pages that can be allocated. Considering this number as x and the page size as p. I assume that "x * p" bytes is the limit on the system wide shared memory.
Now I wrote a small program to create a shared memory segment and i attached to that shared memory segment twice as below
shm_id = shmget(IPC_PRIVATE, 4*sizeof(int), IPC_CREAT | 0666);
if (shm_id < 0) {
printf("shmget error\n");
exit(1);
}
printf("\n The shared memory created is %d",shm_id);
ptr = shmat(shm_id,NULL,0);
ptr_info = shmat(shm_id,NULL,0);
In the above program ptr and ptr_info were different. So the shared memory is mapped to 2 virtual addresses in my process address space.
When I do an ipcs it looks like this
...
0x00000000 1638416 sun 666 16000000 2
...
Now coming to the shmall limit x * p noted above in my question. Is this limit applicable on the sum of all the virtual memory allocated for every shared memory segment? or does this limit apply on the physical memory?
Physical memory is only one here (shared memory) and from the program above when I do 2 shmat's there is twice the amount of memory allocated in my process address space. So this limit will hit soon if do continuous shmat's on a single shared memory segment?
The limit only applies to physical memory, that is the real shared memory allocated for all segments, because shmat() just maps that allocated segment into process address space.
You can trace it in the kernel, there is only one place where this limit is checked — in the newseg() function that allocates new segments (ns->shm_ctlall comparison). shmat() implementation is busy with a lot of stuff, but doesn't care at all about shmall limit, so you can map one segment as many times as you want to (well, address space is also limited, but in practice you rarely care about this limit).
You can also try some test from userspace with a simple program like this one:
#define _GNU_SOURCE
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <unistd.h>
unsigned long int get_shmall() {
FILE *f = NULL;
char buf[512];
unsigned long int value = 0;
if ((f = fopen("/proc/sys/kernel/shmall", "r")) != NULL) {
if (fgets(buf, sizeof(buf), f) != NULL)
value = strtoul(buf, NULL, 10); // no proper checks
fclose(f); // no return value check
}
return value;
}
int set_shmall(unsigned long int value) {
FILE *f = NULL;
char buf[512];
int retval = 0;
if ((f = fopen("/proc/sys/kernel/shmall", "w")) != NULL) {
if (snprintf(buf, sizeof(buf), "%lu\n", value) >= sizeof(buf) ||
fwrite(buf, 1, strlen(buf), f) != strlen(buf))
retval = -1;
fclose(f); // fingers crossed
} else
retval = -1;
return retval;
}
int main()
{
int shm_id1 = -1, shm_id2 = -1;
unsigned long int shmall = 0, shmused, newshmall;
void *ptr1, *ptr2;
struct shm_info shminf;
if ((shmall = get_shmall()) == 0) {
printf("can't get shmall\n");
goto out;
}
printf("original shmall: %lu pages\n", shmall);
if (shmctl(0, SHM_INFO, (struct shmid_ds *)&shminf) < 0) {
printf("can't get SHM_INFO\n");
goto out;
}
shmused = shminf.shm_tot * getpagesize();
printf("shmused: %lu pages (%lu bytes)\n", shminf.shm_tot, shmused);
newshmall = shminf.shm_tot + 1;
if (set_shmall(newshmall) != 0) {
printf("can't set shmall\n");
goto out;
}
if (get_shmall() != newshmall) {
printf("something went wrong with shmall setting\n");
goto out;
}
printf("new shmall: %lu pages (%lu bytes)\n", newshmall, newshmall * getpagesize());
printf("shmget() for %u bytes: ", (unsigned int) getpagesize());
shm_id1 = shmget(IPC_PRIVATE, (size_t)getpagesize(), IPC_CREAT | 0666);
if (shm_id1 < 0) {
printf("failed: %s\n", strerror(errno));
goto out;
}
printf("ok\nshmat 1: ");
ptr1 = shmat(shm_id1, NULL, 0);
if (ptr1 == 0) {
printf("failed\n");
goto out;
}
printf("ok\nshmat 2: ");
ptr2 = shmat(shm_id1, NULL, 0);
if (ptr2 == 0) {
printf("failed\n");
goto out;
}
printf("ok\n");
if (ptr1 == ptr2) {
printf("ptr1 and ptr2 are the same with shm_id1\n");
goto out;
}
printf("shmget() for %u bytes: ", (unsigned int) getpagesize());
shm_id2 = shmget(IPC_PRIVATE, (size_t)getpagesize(), IPC_CREAT | 0666);
if (shm_id2 < 0)
printf("failed: %s\n", strerror(errno));
else
printf("ok, although it's wrong\n");
out:
if (shmall != 0 && set_shmall(shmall) != 0)
printf("failed to restrore shmall\n");
if (shm_id1 >= 0 && shmctl(shm_id1, IPC_RMID, NULL) < 0)
printf("failed to remove shm_id1\n");
if (shm_id2 >= 0 && shmctl(shm_id2, IPC_RMID, NULL) < 0)
printf("failed to remove shm_id2\n");
return 0;
}
What is does is it sets the shmall limit just one page above what is currently used by the system, then tries to get page-sized new segment and map it twice (all successfully), then tries to get one more page-sized segment and fails to do that (execute the program as superuser because it writes to /proc/sys/kernel/shmall):
$ sudo ./a.out
original shmall: 18446744073708503040 pages
shmused: 21053 pages (86233088 bytes)
new shmall: 21054 pages (86237184 bytes)
shmget() for 4096 bytes: ok
shmat 1: ok
shmat 2: ok
shmget() for 4096 bytes: failed: No space left on device
I did not find any Physical memory allocation at do_shmat function (linux/ipc/shm.c)
https://github.com/torvalds/linux/blob/5469dc270cd44c451590d40c031e6a71c1f637e8/ipc/shm.c
so shmat consumes only vm (your process address space),
the main function of shmat is mmap

linux redirect 100GB stdout to file fails

I have this command that writes over 100GB of data to a file.
zfs send snap1 > file
Something appears to go wrong several hours into the process. E.g., if I run the job twice, the output is slightly different. If I try to process the file with
zfs receive snap2 < file
an error is reported after several hours.
For debugging purposes, I'm guessing that there's some low probability failure in the shell redirection. Has anyone else seen problems with redirecting massive amounts of data? Any suggestions about where to proceed?
Debugging this is tedious because small examples work, and running the large case takes over 3 hours each time.
Earlier I had tried pipes:
zfs send snap1| zfs receive snap2
However this always failed with much smaller examples, for which
zfs send snap1 > file; zfs receive snap2 < file
worked. (I posted a question about that, but got no useful responses.) This is another reason that I suspect the shell.
Thanks.
The probability that the failure is in the shell (or OS) is negligible compared to a bug in zfs or a problem in how you are using it.
It just takes some minutes to test your hypothesis: compile this stupid program:
#include<unistd.h>
#include<string.h>
#define BUF 1<<20
#define INPUT 56
int main(int argc, char* argv[]) {
char buf[BUF], rbuf[BUF], *a, *b;
int len, i;
memset(buf, INPUT, sizeof(buf));
if (argc == 1)
{
while ((len = read(0, rbuf, sizeof(rbuf))) > 0)
{
a = buf; b = rbuf;
for (i = 0; i < len; ++i)
{
if (*a != *b)
return 1;
++a; ++b;
}
}
}
else
{
while (write(1, buf, sizeof(buf)) > 0);
}
return 0;
}
then try mkfifo a; ./a.out w > a in a shell and pv < a | ./a.out in another one, see how long does it take to get any bit flip.
It should get in the TiB region relatively fast...

rapidly writing to a temp file and renaming it... is that a good idea?

I have a daemon / service on a linux box (Debian 6) that reads from a hardware device, does some calculations and then updates a file with some relevant values. This happens about 5 times per second.
Any process that is reading the file always sees nicely structured and recent values in the file.
Here is the relevant daemon code:
while(1)
{
int rename_ret;
char tmpname[] = "/var/something/readout.tmp";
char txtname[] = "/var/something/readout.txt";
FILE *f = fopen(tmpname, "w");
if (f == NULL)
{
printf("Error opening file!\n");
exit(1);
}
# ... reading from hardware, some calculation ...
# then print to the tmp file:
fprintf(f, "%12.4f\n", CntVal1);
fprintf(f, "%12.4f\n", CntVal2);
fclose(f);
rename_ret = rename(tmpname, txtname);
if(rename_ret != 0)
{
printf("Error: unable to rename the file");
exit(1);
}
nanosleep((struct timespec[]){{0, 200000000}}, NULL); // 0.2 sec
}
This works fine, but it feels kind of... wronggg?
Note that this is not the device driver, but instead it reads from the driver and processes the values for other processes to read.
So my question is:
is this a bad idea?
what's the proper way to go about it? I like the idea to be able to "just read a file" and get fairly recent values...

How to get the size of a gunzipped file in vim

When viewing (or editing) a .gz file, vim knows to locate gunzip and display the file properly.
In such cases, getfsize(expand("%")) would be the size of the gzipped file.
Is there a way to get the size of the expanded file?
[EDIT]
Another way to solve this might be getting the size of current buffer, but there seems to be no such function in vim. Am I missing something?
There's no easy way to get the uncompressed size of a gzipped file, short of uncompressing it and using the getfsize() function. That might not be what you want. I took at a look at RFC 1952 - GZIP File Format Specification, and the only thing that might be useful is the ISIZE field, which contains "...the size of the original (uncompressed) input data modulo 2^32".
EDIT:
I don't know if this helps, but here's some proof-of-concept C code I threw together that retrieves the value of the ISIZE field in a gzip'd file. It works for me using Linux and gcc, but your mileage may vary. If you compile the code, and then pass in a gzip'd filename as a parameter, it will tell you the uncompressed size of the original file.
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
int main(int argc, char *argv[])
{
FILE *fp = NULL;
int i=0;
if ( argc != 2 ) {
fprintf(stderr, "Must specify file to process.\n" );
return -1;
}
// Open the file for reading
if (( fp = fopen( argv[1], "r" )) == NULL ) {
fprintf( stderr, "Unable to open %s for reading: %s\n", argv[1], strerror(errno));
return -1;
}
// Look at the first two bytes and make sure it's a gzip file
int c1 = fgetc(fp);
int c2 = fgetc(fp);
if ( c1 != 0x1f || c2 != 0x8b ) {
fprintf( stderr, "File is not a gzipped file.\n" );
return -1;
}
// Seek to four bytes from the end of the file
fseek(fp, -4L, SEEK_END);
// Array containing the last four bytes
unsigned char read[4];
for (i=0; i<4; ++i ) {
int charRead = 0;
if ((charRead = fgetc(fp)) == EOF ) {
// This shouldn't happen
fprintf( stderr, "Read end-of-file" );
exit(1);
}
else
read[i] = (unsigned char)charRead;
}
// Copy the last four bytes into an int. This could also be done
// using a union.
int intval = 0;
memcpy( &intval, &read, 4 );
printf( "The uncompressed filesize was %d bytes (0x%02x hex)\n", intval, intval );
fclose(fp);
return 0;
}
This appears to work for getting the byte count of a buffer
(line2byte(line("$")+1)-1)
If you're on Unix/linux, try
:%!wc -c
That's in bytes. (It works on windows, if you have e.g. cygwin installed.) Then hit u to get your content back.
HTH
From within vim editor, try this:
<Esc>:!wc -c my_zip_file.gz
That will display you the number of bytes the file is having.

Resources