whether we have to use
f_bfree × f_frsize
or
f_bfree × f_bsize
to calculate the free disk space in a file system
struct statvfs {
unsigned long f_bsize; /* file system block size */
unsigned long f_frsize; /* fragment size */
fsblkcnt_t f_blocks; /* size of fs in f_frsize units */
fsblkcnt_t f_bfree; /* # free blocks */
fsblkcnt_t f_bavail; /* # free blocks for unprivileged users */
fsfilcnt_t f_files; /* # inodes */
fsfilcnt_t f_ffree; /* # free inodes */
fsfilcnt_t f_favail; /* # free inodes for unprivileged users */
unsigned long f_fsid; /* file system ID */
unsigned long f_flag; /* mount flags */
unsigned long f_namemax; /* maximum filename length */
};
Related
I learned that linux kernel manages the memory and the unit for allocate/deallocate the memory is 4KB, which is the page size. And I know that this pages are handled by struct page.
I got a actual code here.
struct page {
unsigned long flags; /* Atomic flags, some possibly
* updated asynchronously */
/*
* Five words (20/40 bytes) are available in this union.
* WARNING: bit 0 of the first word is used for PageTail(). That
* means the other users of this union MUST NOT use the bit to
* avoid collision and false-positive PageTail().
*/
union {
struct { /* Page cache and anonymous pages */
/**
* #lru: Pageout list, eg. active_list protected by
* pgdat->lru_lock. Sometimes used as a generic list
* by the page owner.
*/
struct list_head lru;
/* See page-flags.h for PAGE_MAPPING_FLAGS */
struct address_space *mapping;
pgoff_t index; /* Our offset within mapping. */
/**
* #private: Mapping-private opaque data.
* Usually used for buffer_heads if PagePrivate.
* Used for swp_entry_t if PageSwapCache.
* Indicates order in the buddy system if PageBuddy.
*/
unsigned long private;
};
struct { /* page_pool used by netstack */
/**
* #dma_addr: might require a 64-bit value even on
* 32-bit architectures.
*/
dma_addr_t dma_addr;
};
struct { /* slab, slob and slub */
union {
struct list_head slab_list;
struct { /* Partial pages */
struct page *next;
#ifdef CONFIG_64BIT
int pages; /* Nr of pages left */
int pobjects; /* Approximate count */
#else
short int pages;
short int pobjects;
#endif
};
};
struct kmem_cache *slab_cache; /* not slob */
/* Double-word boundary */
void *freelist; /* first free object */
union {
void *s_mem; /* slab: first object */
unsigned long counters; /* SLUB */
struct { /* SLUB */
unsigned inuse:16;
unsigned objects:15;
unsigned frozen:1;
};
};
};
struct { /* Tail pages of compound page */
unsigned long compound_head; /* Bit zero is set */
/* First tail page only */
unsigned char compound_dtor;
unsigned char compound_order;
atomic_t compound_mapcount;
};
struct { /* Second tail page of compound page */
unsigned long _compound_pad_1; /* compound_head */
atomic_t hpage_pinned_refcount;
/* For both global and memcg */
struct list_head deferred_list;
};
struct { /* Page table pages */
unsigned long _pt_pad_1; /* compound_head */
pgtable_t pmd_huge_pte; /* protected by page->ptl */
unsigned long _pt_pad_2; /* mapping */
union {
struct mm_struct *pt_mm; /* x86 pgds only */
atomic_t pt_frag_refcount; /* powerpc */
};
#if ALLOC_SPLIT_PTLOCKS
spinlock_t *ptl;
#else
spinlock_t ptl;
#endif
};
struct { /* ZONE_DEVICE pages */
/** #pgmap: Points to the hosting device page map. */
struct dev_pagemap *pgmap;
void *zone_device_data;
/*
* ZONE_DEVICE private pages are counted as being
* mapped so the next 3 words hold the mapping, index,
* and private fields from the source anonymous or
* page cache page while the page is migrated to device
* private memory.
* ZONE_DEVICE MEMORY_DEVICE_FS_DAX pages also
* use the mapping, index, and private fields when
* pmem backed DAX files are mapped.
*/
};
/** #rcu_head: You can use this to free a page by RCU. */
struct rcu_head rcu_head;
};
union { /* This union is 4 bytes in size. */
/*
* If the page can be mapped to userspace, encodes the number
* of times this page is referenced by a page table.
*/
atomic_t _mapcount;
/*
* If the page is neither PageSlab nor mappable to userspace,
* the value stored here may help determine what this page
* is used for. See page-flags.h for a list of page types
* which are currently stored here.
*/
unsigned int page_type;
unsigned int active; /* SLAB */
int units; /* SLOB */
};
/* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */
atomic_t _refcount;
#ifdef CONFIG_MEMCG
struct mem_cgroup *mem_cgroup;
#endif
/*
* On machines where all RAM is mapped into kernel address space,
* we can simply calculate the virtual address. On machines with
* highmem some memory is mapped into kernel virtual memory
* dynamically, so we need a place to store that address.
* Note that this field could be 16 bits on x86 ... ;)
*
* Architectures with slow multiplication can define
* WANT_PAGE_VIRTUAL in asm/page.h
*/
#if defined(WANT_PAGE_VIRTUAL)
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
int _last_cpupid;
#endif
} _struct_page_alignment;
And I have no idea where the linux kernel stores this huge(?) structure.
There are a lot of pages handled in the linux kernel and that means that we have a lot of this struct page structures. Where can it be stored on the memory?
Also I have no idea what the union up there is up for.
First, there are several memory models like FMM, SMP, and NUMA. The knowledge about virtual memory, page and page tables, sturct of kernel and user space memory may not write here because there are too much more than the limitation of the answer's length and I think you can learn it from any books.
Let's use NUMA as an example. In NUMA, every CPU will have a node : struct pglist_data *node_data, and this struct has many Zones like ZONE_DMA, ZONE_DMA32, ZONE_NORMAL, ZONE_HIGHMEM, ZONE_MOVALBE, every Zone has many struct free_area, and this struct includes a list of struct page.
Why we need to use page? It is because our physical memory is limit, so we create virtual memory. And then we need a mechanism to load virtual memory to physical memory to run the task(process or thread). So we use page as the meta entry, and use some models like NUMA to control those pages and page tables.
When we need to allocate some memory, we will use buddy system and slab/slub allocator to allocate memory pages and add them to a zone that can use. When the physical memory loads too much pages, it will use get_page_from_freelist() or kswapd to swap some out.
Auctally, memory and addresses space is a essential part of Linux kernel. I suggest you to read some books like CSAPP to get a relatively deep understand of OS, and then reading Linux kernel source codes to dive into them.
I have the same question as you recently.
Where is the struct page in Linux kernel?
And I think I can give you some helpful informations. Every node stores pages into pg_data_t's node_mem_map, and there is a global variable mem_map, which is pointing to Node 0's page array.
You can find more details from Professional Linux Kernel Architecture. In chapter 3, the section is "Creating Data Structures for Each Node".
If you want to know exact place where it is stored, this article might give you the answer.
It says that they are "usually stored at the beginning of ZONE_NORMAL".
Making a kernel module to link an inode to a directory. Beginner so on the learning curve. The plan is to resolve the directory to an inode, find its superblock and then get the inode from the file with iget_locked, finally link that inode into the dentry of the target directory and increment its link count.
syscall stat will take a pathname as input and dump a stat struct.
int stat(const char *pathname, struct stat *statbuf);
The inode to the file can be find as a member of the struct:
struct stat {
dev_t st_dev; /* ID of device containing file */
ino_t st_ino; /* Inode number */
mode_t st_mode; /* File type and mode */
nlink_t st_nlink; /* Number of hard links */
uid_t st_uid; /* User ID of owner */
gid_t st_gid; /* Group ID of owner */
dev_t st_rdev; /* Device ID (if special file) */
off_t st_size; /* Total size, in bytes */
blksize_t st_blksize; /* Block size for filesystem I/O */
blkcnt_t st_blocks; /* Number of 512B blocks allocated */
/* Since Linux 2.6, the kernel supports nanosecond
precision for the following timestamp fields.
For the details before Linux 2.6, see NOTES. */
struct timespec st_atim; /* Time of last access */
struct timespec st_mtim; /* Time of last modification */
struct timespec st_ctim; /* Time of last status change */
#define st_atime st_atim.tv_sec /* Backward compatibility */
#define st_mtime st_mtim.tv_sec
#define st_ctime st_ctim.tv_sec
};
Referring to manpage stat(2). Hope this answers your question.
As I was going through the below chunk of Linux char driver code, I found the structure pointer current in printk.
I want to know what structure the current is pointing to and its complete elements.
What purpose does this structure serve?
ssize_t sleepy_read (struct file *filp, char __user *buf, size_t count, loff_t *pos)
{
printk(KERN_DEBUG "process %i (%s) going to sleep\n",
current->pid, current->comm);
wait_event_interruptible(wq, flag != 0);
flag = 0;
printk(KERN_DEBUG "awoken %i (%s)\n", current->pid, current->comm);
return 0;
}
It is a pointer to the current process ie, the process which has issued the system call.
From the docs:
The Current Process
Although kernel modules don't execute sequentially as applications do,
most actions performed by the kernel are related to a specific
process. Kernel code can know the current process driving it by
accessing the global item current, a pointer to struct task_struct,
which as of version 2.4 of the kernel is declared in
<asm/current.h>, included by <linux/sched.h>. The current pointer
refers to the user process currently executing. During the execution
of a system call, such as open or read, the current process is the one
that invoked the call. Kernel code can use process-specific
information by using current, if it needs to do so. An example of this
technique is presented in "Access Control on a Device File", in
Chapter 5, "Enhanced Char Driver Operations".
Actually, current is not properly a global variable any more, like it
was in the first Linux kernels. The developers optimized access to the
structure describing the current process by hiding it in the stack
page. You can look at the details of current in <asm/current.h>. While
the code you'll look at might seem hairy, we must keep in mind that
Linux is an SMP-compliant system, and a global variable simply won't
work when you are dealing with multiple CPUs. The details of the
implementation remain hidden to other kernel subsystems though, and a
device driver can just include and refer to the
current process.
From a module's point of view, current is just like the external
reference printk. A module can refer to current wherever it sees fit.
For example, the following statement prints the process ID and the
command name of the current process by accessing certain fields in
struct task_struct:
printk("The process is \"%s\" (pid %i)\n",
current->comm, current->pid);
The command name stored in current->comm is the base name of the
program file that is being executed by the current process.
Here is the complete structure the "current" is pointing to
task_struct
Each task_struct data structure describes a process or task in the system.
struct task_struct {
/* these are hardcoded - don't touch */
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
long counter;
long priority;
unsigned long signal;
unsigned long blocked; /* bitmap of masked signals */
unsigned long flags; /* per process flags, defined below */
int errno;
long debugreg[8]; /* Hardware debugging registers */
struct exec_domain *exec_domain;
/* various fields */
struct linux_binfmt *binfmt;
struct task_struct *next_task, *prev_task;
struct task_struct *next_run, *prev_run;
unsigned long saved_kernel_stack;
unsigned long kernel_stack_page;
int exit_code, exit_signal;
/* ??? */
unsigned long personality;
int dumpable:1;
int did_exec:1;
int pid;
int pgrp;
int tty_old_pgrp;
int session;
/* boolean value for session group leader */
int leader;
int groups[NGROUPS];
/*
* pointers to (original) parent process, youngest child, younger sibling,
* older sibling, respectively. (p->father can be replaced with
* p->p_pptr->pid)
*/
struct task_struct *p_opptr, *p_pptr, *p_cptr,
*p_ysptr, *p_osptr;
struct wait_queue *wait_chldexit;
unsigned short uid,euid,suid,fsuid;
unsigned short gid,egid,sgid,fsgid;
unsigned long timeout, policy, rt_priority;
unsigned long it_real_value, it_prof_value, it_virt_value;
unsigned long it_real_incr, it_prof_incr, it_virt_incr;
struct timer_list real_timer;
long utime, stime, cutime, cstime, start_time;
/* mm fault and swap info: this can arguably be seen as either
mm-specific or thread-specific */
unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
int swappable:1;
unsigned long swap_address;
unsigned long old_maj_flt; /* old value of maj_flt */
unsigned long dec_flt; /* page fault count of the last time */
unsigned long swap_cnt; /* number of pages to swap on next pass */
/* limits */
struct rlimit rlim[RLIM_NLIMITS];
unsigned short used_math;
char comm[16];
/* file system info */
int link_count;
struct tty_struct *tty; /* NULL if no tty */
/* ipc stuff */
struct sem_undo *semundo;
struct sem_queue *semsleeping;
/* ldt for this task - used by Wine. If NULL, default_ldt is used */
struct desc_struct *ldt;
/* tss for this task */
struct thread_struct tss;
/* filesystem information */
struct fs_struct *fs;
/* open file information */
struct files_struct *files;
/* memory management info */
struct mm_struct *mm;
/* signal handlers */
struct signal_struct *sig;
#ifdef __SMP__
int processor;
int last_processor;
int lock_depth; /* Lock depth.
We can context switch in and out
of holding a syscall kernel lock... */
#endif
};
I'm writing a kernel module that needs to read the value of bitrate from this union:
union iwreq_data
{==
.......
struct iw_param bitrate; /* default bit rate */
....
}
This code is form wireless.h Does anyone know how I can access it's value?
(I'm using linux kernel 2.6.35)
There's no way to do that, unfortunately ...
taking a closer look at the iw_param struct we find
struct iw_param
{
__s32 value; /* The value of the parameter itself */
__u8 fixed; /* Hardware should not use auto select */
__u8 disabled; /* Disable the feature */
__u16 flags; /* Various specifc flags (if any) */
};
now, here's what the code looks like in wireless.h
union iwreq_data
{
/* Config - generic */
char name[IFNAMSIZ];
/* Name : used to verify the presence of wireless extensions.
* Name of the protocol/provider... */
struct iw_point essid; /* Extended network name */
struct iw_param nwid; /* network id (or domain - the cell) */
struct iw_freq freq; /* frequency or channel :
* 0-1000 = channel
* > 1000 = frequency in Hz */
struct iw_param sens; /* signal level threshold */
struct iw_param bitrate; /* default bit rate */
struct iw_param txpower; /* default transmit power */
struct iw_param rts; /* RTS threshold threshold */
struct iw_param frag; /* Fragmentation threshold */
__u32 mode; /* Operation mode */
struct iw_param retry; /* Retry limits & lifetime */
struct iw_point encoding; /* Encoding stuff : tokens */
struct iw_param power; /* PM duration/timeout */
struct iw_quality qual; /* Quality part of statistics */
struct sockaddr ap_addr; /* Access point address */
struct sockaddr addr; /* Destination address (hw/mac) */
struct iw_param param; /* Other small parameters */
struct iw_point data; /* Other large parameters */
};
I cannot think of a way to access it other than bitrate->value
What is the difference between calling mlock() on some memory and calling shmctl(SHM_LOCK) on that same memory?
These are the only differences I can ascertain:
mlock() guarantees that all locked pages are loaded when it returns. shmctl(SHM_LOCK) prevents swapping, but doesn't proactively load non-resident pages.
shmctl(SHM_LOCK) can only be used on shared memory segments.
shmctl(SHM_LOCK) sets an extra flag (SHM_LOCKED) on the permissions of the shared memory segment.
Are there other differences? In particular, is there any reason not to use mlock() on a shared memory segment?
First of all, mlock() is the syscall to lock process memory in the RAM, and shmctl(X,SHM_LOCK,Y) is used to do shared (IPC) memory, which requires more control from all the producers and consumers, so, that is why mlock() syscall is much easier as:
int mlock(const void *addr, size_t len);
While shmctl is much more complex to operate, as:
int shmctl(int shmid, SHM_LOCK, struct shmid_ds *buf);
Where:
The buf argument is a pointer to a shmid_ds structure, defined in as follows:
struct shmid_ds {
struct ipc_perm shm_perm; /* Ownership and permissions */
size_t shm_segsz; /* Size of segment (bytes) */
time_t shm_atime; /* Last attach time */
time_t shm_dtime; /* Last detach time */
time_t shm_ctime; /* Last change time */
pid_t shm_cpid; /* PID of creator */
pid_t shm_lpid; /* PID of last shmat(2)/shmdt(2) */
shmatt_t shm_nattch; /* No. of current attaches */
...
};