struct task_struct 是 Linux 内核中用于表示进程的核心数据结构。它包含了进程的各种信息,如状态、调度信息、内存管理信息、文件描述符等。理解 task_struct 的结构和各个字段的作用,对于深入了解 Linux 进程调度机制至关重要。
以下代码摘自 Linux 5.80 版本的内核源码,不同版本可能会有所不同。
linux内核源码路径: include/linux/sched.h
struct task_struct {
#ifdef CONFIG_THREAD_INFO_IN_TASK
/*
* For reasons of header soup (see current_thread_info()), this
* must be the first element of task_struct.
*/
struct thread_info thread_info;
#endif
/* -1 unrunnable, 0 runnable, >0 stopped: */
volatile long state;
/*
* This begins the randomizable portion of task_struct. Only
* scheduling-critical items should be added above here.
*/
randomized_struct_fields_start
void *stack;
refcount_t usage;
/* Per task flags (PF_*), defined further below: */
unsigned int flags;
unsigned int ptrace;
#ifdef CONFIG_SMP
int on_cpu;
struct __call_single_node wake_entry;
#ifdef CONFIG_THREAD_INFO_IN_TASK
/* Current CPU: */
unsigned int cpu;
#endif
unsigned int wakee_flips;
unsigned long wakee_flip_decay_ts;
struct task_struct *last_wakee;
/*
* recent_used_cpu is initially set as the last CPU used by a task
* that wakes affine another task. Waker/wakee relationships can
* push tasks around a CPU where each wakeup moves to the next one.
* Tracking a recently used CPU allows a quick search for a recently
* used CPU that may be idle.
*/
int recent_used_cpu;
int wake_cpu;
#endif
int on_rq;
int prio;
int static_prio;
int normal_prio;
unsigned int rt_priority;
const struct sched_class *sched_class;
struct sched_entity se;
struct sched_rt_entity rt;
#ifdef CONFIG_CGROUP_SCHED
struct task_group *sched_task_group;
#endif
struct sched_dl_entity dl;
#ifdef CONFIG_UCLAMP_TASK
/* Clamp values requested for a scheduling entity */
struct uclamp_se uclamp_req[UCLAMP_CNT];
/* Effective clamp values used for a scheduling entity */
struct uclamp_se uclamp[UCLAMP_CNT];
#endif
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* List of struct preempt_notifier: */
struct hlist_head preempt_notifiers;
#endif
#ifdef CONFIG_BLK_DEV_IO_TRACE
unsigned int btrace_seq;
#endif
unsigned int policy;
int nr_cpus_allowed;
const cpumask_t *cpus_ptr;
cpumask_t cpus_mask;
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
union rcu_special rcu_read_unlock_special;
struct list_head rcu_node_entry;
struct rcu_node *rcu_blocked_node;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_TASKS_RCU
unsigned long rcu_tasks_nvcsw;
u8 rcu_tasks_holdout;
u8 rcu_tasks_idx;
int rcu_tasks_idle_cpu;
struct list_head rcu_tasks_holdout_list;
#endif /* #ifdef CONFIG_TASKS_RCU */
#ifdef CONFIG_TASKS_TRACE_RCU
int trc_reader_nesting;
int trc_ipi_to_cpu;
union rcu_special trc_reader_special;
bool trc_reader_checked;
struct list_head trc_holdout_list;
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
struct sched_info sched_info;
struct list_head tasks;
#ifdef CONFIG_SMP
struct plist_node pushable_tasks;
struct rb_node pushable_dl_tasks;
#endif
struct mm_struct *mm;
struct mm_struct *active_mm;
/* Per-thread vma caching: */
struct vmacache vmacache;
#ifdef SPLIT_RSS_COUNTING
struct task_rss_stat rss_stat;
#endif
int exit_state;
int exit_code;
int exit_signal;
/* The signal sent when the parent dies: */
int pdeath_signal;
/* JOBCTL_*, siglock protected: */
unsigned long jobctl;
/* Used for emulating ABI behavior of previous Linux versions: */
unsigned int personality;
/* Scheduler bits, serialized by scheduler locks: */
unsigned sched_reset_on_fork:1;
unsigned sched_contributes_to_load:1;
unsigned sched_migrated:1;
unsigned sched_remote_wakeup:1;
#ifdef CONFIG_PSI
unsigned sched_psi_wake_requeue:1;
#endif
/* Force alignment to the next boundary: */
unsigned :0;
/* Unserialized, strictly 'current' */
/* Bit to tell LSMs we're in execve(): */
unsigned in_execve:1;
unsigned in_iowait:1;
#ifndef TIF_RESTORE_SIGMASK
unsigned restore_sigmask:1;
#endif
#ifdef CONFIG_MEMCG
unsigned in_user_fault:1;
#endif
#ifdef CONFIG_COMPAT_BRK
unsigned brk_randomized:1;
#endif
#ifdef CONFIG_CGROUPS
/* disallow userland-initiated cgroup migration */
unsigned no_cgroup_migration:1;
/* task is frozen/stopped (used by the cgroup freezer) */
unsigned frozen:1;
#endif
#ifdef CONFIG_BLK_CGROUP
unsigned use_memdelay:1;
#endif
#ifdef CONFIG_PSI
/* Stalled due to lack of memory */
unsigned in_memstall:1;
#endif
unsigned long atomic_flags; /* Flags requiring atomic access. */
struct restart_block restart_block;
pid_t pid;
pid_t tgid;
#ifdef CONFIG_STACKPROTECTOR
/* Canary value for the -fstack-protector GCC feature: */
unsigned long stack_canary;
#endif
/*
* Pointers to the (original) parent process, youngest child, younger sibling,
* older sibling, respectively. (p->father can be replaced with
* p->real_parent->pid)
*/
/* Real parent process: */
struct task_struct __rcu *real_parent;
/* Recipient of SIGCHLD, wait4() reports: */
struct task_struct __rcu *parent;
/*
* Children/sibling form the list of natural children:
*/
struct list_head children;
struct list_head sibling;
struct task_struct *group_leader;
/*
* 'ptraced' is the list of tasks this task is using ptrace() on.
*
* This includes both natural children and PTRACE_ATTACH targets.
* 'ptrace_entry' is this task's link on the p->parent->ptraced list.
*/
struct list_head ptraced;
struct list_head ptrace_entry;
/* PID/PID hash table linkage. */
struct pid *thread_pid;
struct hlist_node pid_links[PIDTYPE_MAX];
struct list_head thread_group;
struct list_head thread_node;
struct completion *vfork_done;
/* CLONE_CHILD_SETTID: */
int __user *set_child_tid;
/* CLONE_CHILD_CLEARTID: */
int __user *clear_child_tid;
u64 utime;
u64 stime;
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
u64 utimescaled;
u64 stimescaled;
#endif
u64 gtime;
struct prev_cputime prev_cputime;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
struct vtime vtime;
#endif
#ifdef CONFIG_NO_HZ_FULL
atomic_t tick_dep_mask;
#endif
/* Context switch counts: */
unsigned long nvcsw;
unsigned long nivcsw;
/* Monotonic time in nsecs: */
u64 start_time;
/* Boot based time in nsecs: */
u64 start_boottime;
/* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */
unsigned long min_flt;
unsigned long maj_flt;
/* Empty if CONFIG_POSIX_CPUTIMERS=n */
struct posix_cputimers posix_cputimers;
/* Process credentials: */
/* Tracer's credentials at attach: */
const struct cred __rcu *ptracer_cred;
/* Objective and real subjective task credentials (COW): */
const struct cred __rcu *real_cred;
/* Effective (overridable) subjective task credentials (COW): */
const struct cred __rcu *cred;
#ifdef CONFIG_KEYS
/* Cached requested key. */
struct key *cached_requested_key;
#endif
/*
* executable name, excluding path.
*
* - normally initialized setup_new_exec()
* - access it with [gs]et_task_comm()
* - lock it with task_lock()
*/
char comm[TASK_COMM_LEN];
struct nameidata *nameidata;
#ifdef CONFIG_SYSVIPC
struct sysv_sem sysvsem;
struct sysv_shm sysvshm;
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
unsigned long last_switch_count;
unsigned long last_switch_time;
#endif
/* Filesystem information: */
struct fs_struct *fs;
/* Open file information: */
struct files_struct *files;
#ifdef CONFIG_IO_URING
struct io_uring_task *io_uring;
#endif
/* Namespaces: */
struct nsproxy *nsproxy;
/* Signal handlers: */
struct signal_struct *signal;
struct sighand_struct __rcu *sighand;
sigset_t blocked;
sigset_t real_blocked;
/* Restored if set_restore_sigmask() was used: */
sigset_t saved_sigmask;
struct sigpending pending;
unsigned long sas_ss_sp;
size_t sas_ss_size;
unsigned int sas_ss_flags;
struct callback_head *task_works;
#ifdef CONFIG_AUDIT
#ifdef CONFIG_AUDITSYSCALL
struct audit_context *audit_context;
#endif
kuid_t loginuid;
unsigned int sessionid;
#endif
struct seccomp seccomp;
/* Thread group tracking: */
u64 parent_exec_id;
u64 self_exec_id;
/* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */
spinlock_t alloc_lock;
/* Protection of the PI data structures: */
raw_spinlock_t pi_lock;
struct wake_q_node wake_q;
#ifdef CONFIG_RT_MUTEXES
/* PI waiters blocked on a rt_mutex held by this task: */
struct rb_root_cached pi_waiters;
/* Updated under owner's pi_lock and rq lock */
struct task_struct *pi_top_task;
/* Deadlock detection and priority inheritance handling: */
struct rt_mutex_waiter *pi_blocked_on;
#endif
#ifdef CONFIG_DEBUG_MUTEXES
/* Mutex deadlock detection: */
struct mutex_waiter *blocked_on;
#endif
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
int non_block_count;
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
unsigned int irq_events;
unsigned int hardirq_threaded;
unsigned long hardirq_enable_ip;
unsigned long hardirq_disable_ip;
unsigned int hardirq_enable_event;
unsigned int hardirq_disable_event;
int hardirqs_enabled;
int hardirq_context;
u64 hardirq_chain_key;
unsigned long softirq_disable_ip;
unsigned long softirq_enable_ip;
unsigned int softirq_disable_event;
unsigned int softirq_enable_event;
int softirqs_enabled;
int softirq_context;
int irq_config;
#endif
#ifdef CONFIG_LOCKDEP
# define MAX_LOCK_DEPTH 48UL
u64 curr_chain_key;
int lockdep_depth;
unsigned int lockdep_recursion;
struct held_lock held_locks[MAX_LOCK_DEPTH];
#endif
#ifdef CONFIG_UBSAN
unsigned int in_ubsan;
#endif
/* Journalling filesystem info: */
void *journal_info;
/* Stacked block device info: */
struct bio_list *bio_list;
#ifdef CONFIG_BLOCK
/* Stack plugging: */
struct blk_plug *plug;
#endif
/* VM state: */
struct reclaim_state *reclaim_state;
struct backing_dev_info *backing_dev_info;
struct io_context *io_context;
#ifdef CONFIG_COMPACTION
struct capture_control *capture_control;
#endif
/* Ptrace state: */
unsigned long ptrace_message;
kernel_siginfo_t *last_siginfo;
struct task_io_accounting ioac;
#ifdef CONFIG_PSI
/* Pressure stall state */
unsigned int psi_flags;
#endif
#ifdef CONFIG_TASK_XACCT
/* Accumulated RSS usage: */
u64 acct_rss_mem1;
/* Accumulated virtual memory usage: */
u64 acct_vm_mem1;
/* stime + utime since last update: */
u64 acct_timexpd;
#endif
#ifdef CONFIG_CPUSETS
/* Protected by ->alloc_lock: */
nodemask_t mems_allowed;
/* Seqence number to catch updates: */
seqcount_t mems_allowed_seq;
int cpuset_mem_spread_rotor;
int cpuset_slab_spread_rotor;
#endif
#ifdef CONFIG_CGROUPS
/* Control Group info protected by css_set_lock: */
struct css_set __rcu *cgroups;
/* cg_list protected by css_set_lock and tsk->alloc_lock: */
struct list_head cg_list;
#endif
#ifdef CONFIG_X86_CPU_RESCTRL
u32 closid;
u32 rmid;
#endif
#ifdef CONFIG_FUTEX
struct robust_list_head __user *robust_list;
#ifdef CONFIG_COMPAT
struct compat_robust_list_head __user *compat_robust_list;
#endif
struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
struct mutex futex_exit_mutex;
unsigned int futex_state;
#endif
#ifdef CONFIG_PERF_EVENTS
struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
struct mutex perf_event_mutex;
struct list_head perf_event_list;
#endif
#ifdef CONFIG_DEBUG_PREEMPT
unsigned long preempt_disable_ip;
#endif
#ifdef CONFIG_NUMA
/* Protected by alloc_lock: */
struct mempolicy *mempolicy;
short il_prev;
short pref_node_fork;
#endif
#ifdef CONFIG_NUMA_BALANCING
int numa_scan_seq;
unsigned int numa_scan_period;
unsigned int numa_scan_period_max;
int numa_preferred_nid;
unsigned long numa_migrate_retry;
/* Migration stamp: */
u64 node_stamp;
u64 last_task_numa_placement;
u64 last_sum_exec_runtime;
struct callback_head numa_work;
/*
* This pointer is only modified for current in syscall and
* pagefault context (and for tasks being destroyed), so it can be read
* from any of the following contexts:
* - RCU read-side critical section
* - current->numa_group from everywhere
* - task's runqueue locked, task not running
*/
struct numa_group __rcu *numa_group;
/*
* numa_faults is an array split into four regions:
* faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
* in this precise order.
*
* faults_memory: Exponential decaying average of faults on a per-node
* basis. Scheduling placement decisions are made based on these
* counts. The values remain static for the duration of a PTE scan.
* faults_cpu: Track the nodes the process was running on when a NUMA
* hinting fault was incurred.
* faults_memory_buffer and faults_cpu_buffer: Record faults per node
* during the current scan window. When the scan completes, the counts
* in faults_memory and faults_cpu decay and these values are copied.
*/
unsigned long *numa_faults;
unsigned long total_numa_faults;
/*
* numa_faults_locality tracks if faults recorded during the last
* scan window were remote/local or failed to migrate. The task scan
* period is adapted based on the locality of the faults with different
* weights depending on whether they were shared or private faults
*/
unsigned long numa_faults_locality[3];
unsigned long numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */
#ifdef CONFIG_RSEQ
struct rseq __user *rseq;
u32 rseq_sig;
/*
* RmW on rseq_event_mask must be performed atomically
* with respect to preemption.
*/
unsigned long rseq_event_mask;
#endif
struct tlbflush_unmap_batch tlb_ubc;
union {
refcount_t rcu_users;
struct rcu_head rcu;
};
/* Cache last used pipe for splice(): */
struct pipe_inode_info *splice_pipe;
struct page_frag task_frag;
#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info *delays;
#endif
#ifdef CONFIG_FAULT_INJECTION
int make_it_fail;
unsigned int fail_nth;
#endif
/*
* When (nr_dirtied >= nr_dirtied_pause), it's time to call
* balance_dirty_pages() for a dirty throttling pause:
*/
int nr_dirtied;
int nr_dirtied_pause;
/* Start of a write-and-pause period: */
unsigned long dirty_paused_when;
#ifdef CONFIG_LATENCYTOP
int latency_record_count;
struct latency_record latency_record[LT_SAVECOUNT];
#endif
/*
* Time slack values; these are used to round up poll() and
* select() etc timeout values. These are in nanoseconds.
*/
u64 timer_slack_ns;
u64 default_timer_slack_ns;
#ifdef CONFIG_KASAN
unsigned int kasan_depth;
#endif
#ifdef CONFIG_KCSAN
struct kcsan_ctx kcsan_ctx;
#endif
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* Index of current stored address in ret_stack: */
int curr_ret_stack;
int curr_ret_depth;
/* Stack of return addresses for return function tracing: */
struct ftrace_ret_stack *ret_stack;
/* Timestamp for last schedule: */
unsigned long long ftrace_timestamp;
/*
* Number of functions that haven't been traced
* because of depth overrun:
*/
atomic_t trace_overrun;
/* Pause tracing: */
atomic_t tracing_graph_pause;
#endif
#ifdef CONFIG_TRACING
/* State flags for use by tracers: */
unsigned long trace;
/* Bitmask and counter of trace recursion: */
unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
#ifdef CONFIG_KCOV
/* See kernel/kcov.c for more details. */
/* Coverage collection mode enabled for this task (0 if disabled): */
unsigned int kcov_mode;
/* Size of the kcov_area: */
unsigned int kcov_size;
/* Buffer for coverage collection: */
void *kcov_area;
/* KCOV descriptor wired with this task or NULL: */
struct kcov *kcov;
/* KCOV common handle for remote coverage collection: */
u64 kcov_handle;
/* KCOV sequence number: */
int kcov_sequence;
/* Collect coverage from softirq context: */
unsigned int kcov_softirq;
#endif
#ifdef CONFIG_MEMCG
struct mem_cgroup *memcg_in_oom;
gfp_t memcg_oom_gfp_mask;
int memcg_oom_order;
/* Number of pages to reclaim on returning to userland: */
unsigned int memcg_nr_pages_over_high;
/* Used by memcontrol for targeted memcg charge: */
struct mem_cgroup *active_memcg;
#endif
#ifdef CONFIG_BLK_CGROUP
struct request_queue *throttle_queue;
#endif
#ifdef CONFIG_UPROBES
struct uprobe_task *utask;
#endif
#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
unsigned int sequential_io;
unsigned int sequential_io_avg;
#endif
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
unsigned long task_state_change;
#endif
int pagefault_disabled;
#ifdef CONFIG_MMU
struct task_struct *oom_reaper_list;
#endif
#ifdef CONFIG_VMAP_STACK
struct vm_struct *stack_vm_area;
#endif
#ifdef CONFIG_THREAD_INFO_IN_TASK
/* A live task holds one reference: */
refcount_t stack_refcount;
#endif
#ifdef CONFIG_LIVEPATCH
int patch_state;
#endif
#ifdef CONFIG_SECURITY
/* Used by LSM modules for access restriction: */
void *security;
#endif
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
unsigned long lowest_stack;
unsigned long prev_lowest_stack;
#endif
#ifdef CONFIG_X86_MCE
u64 mce_addr;
__u64 mce_ripv : 1,
mce_whole_page : 1,
__mce_reserved : 62;
struct callback_head mce_kill_me;
#endif
/*
* New fields for task_struct should be added above here, so that
* they are included in the randomized portion of task_struct.
*/
randomized_struct_fields_end
/* CPU-specific state of this task: */
struct thread_struct thread;
/*
* WARNING: on x86, 'thread_struct' contains a variable-sized
* structure. It *MUST* be at the end of 'task_struct'.
*
* Do not put anything below here!
*/
};
🧩 一、任务基础信息区
| 字段 | 含义 |
|---|
volatile long state | 当前任务的状态:TASK_RUNNING、TASK_INTERRUPTIBLE、TASK_UNINTERRUPTIBLE、TASK_STOPPED、EXIT_ZOMBIE 等。 |
void *stack | 指向该任务的内核栈顶。每个进程在内核态都有独立的8KB(x86)内核栈。 |
refcount_t usage | 引用计数,用于控制该 task_struct 的生命周期。 |
unsigned int flags | 标志位(PF_* 系列),标记任务特性,例如 PF_KTHREAD(内核线程)、PF_NOFREEZE 等。 |
unsigned int ptrace | 用于 ptrace 调试机制,标识被调试状态。 |
pid_t pid | 进程号(唯一标识)。 |
pid_t tgid | 线程组 ID,组内线程共享资源(如地址空间)。主线程的 pid == tgid。 |
char comm[TASK_COMM_LEN] | 当前任务的可读名称(通常是程序名,最多16字节)。 |
⚙️ 二、调度器相关字段
这些字段由内核调度器(kernel/sched/)使用,决定任务何时、在哪个 CPU 上执行。调度器通常通过优先级指定任务的调度顺序,普通任务通过 CFS(完全公平调度器)调度,进程的nice值确定了静态优先级,nice值范围是[-20, 19],数字越低优先级越高,通过top命令的NI列可以查看;实时任务的优先级范围是[1, 99],数字越大优先级越高;Deadline类型的进程优先级为-1。
| 字段 | 含义 |
|---|
int prio | 动态优先级,调度时实际使用的优先级。 |
int static_prio | 静态优先级(通常用户设置的 nice 值会影响它)。nice 值的范围是[-20, 19], 而实时优先级的有效范围是[1, 99], 二者的重叠部分如何处理呢? 内核在处理 nice 时会加上120, 完成 nice 值与静态优先级之间的转换。 |
int normal_prio | 归一化优先级(综合 static_prio + 调度策略)。rtpriority 数字越大,优先级越高;nice 值则相反,而 deadline 进程始终要维持最高优先级,为了便于管理,内核设计了一种归一化算法,将所有的优先级统一到 [-1, 139] 这个区间上,并且数字越小优先级越大,该优先级就叫着归一化优先级 |
unsigned int rt_priority | 实时任务优先级(0~99),数字越大优先级越高,当数值是0时表示该进程是普通进程。 |
unsigned int policy | 调度策略:SCHED_NORMAL、SCHED_FIFO、SCHED_RR、SCHED_DEADLINE 等。 |
const struct sched_class *sched_class | 指向调度类对象(如 fair_sched_class, rt_sched_class),定义任务的调度行为。 |
struct sched_entity se | 普通任务的调度实体(CFS调度器使用)。 |
struct sched_rt_entity rt | 实时任务的调度实体。 |
struct sched_dl_entity dl | deadline 调度任务的调度实体。 |
int on_cpu | 当前任务是否正在某个 CPU 上运行。 |
int on_rq | 是否在运行队列中。 |
unsigned int cpu | 当前任务所在的 CPU。 |
int nr_cpus_allowed / cpumask_t cpus_mask | 该任务允许运行的 CPU 集合(CPU亲和性)。 |
优先级归一化的实现:
static inline int __normal_prio(struct task_struct *p) {
return p->static_prio;
}
static inline int normal_prio(struct task_struct *p) {
int prio;
if (task_has_dl_policy(p))
/* MAX_DL_PRIO为0, 因此Deadline的优先级永远为-1 */
prio = MAX_DL_PRIO - 1;
else if (task_has_rt_policy(p))
/* MAX_RT_PRIO为100, 而rt_priority的范围是[1,99]且数字越大对应的优先级越高,下面的算法实现了优先级反转,高优先级将对应小的数字。
*/
prio = MAX_RT_PRIO - 1 - p->rt_priority;
else
/* 对于普通进程,直接返回静态优先级static_prio */
prio = __normal_prio(p);
return prio;
}
🧠 三、内存管理相关
| 字段 | 含义 |
|---|
struct mm_struct *mm | 用户空间内存描述符,包含页表、VMA等。内核线程中此项为 NULL。 |
struct mm_struct *active_mm | 当前活跃的 mm。即使是内核线程,在切换时也会借用上一个用户进程的 mm。 |
struct vmacache vmacache | 最近访问的虚拟内存区域缓存,加速 find_vma() 查找。 |
struct task_rss_stat rss_stat | 内存页统计信息。 |
🧾 四、父子关系与进程树
| 字段 | 含义 |
|---|
struct task_struct __rcu *real_parent | 实际的父进程。 |
struct task_struct __rcu *parent | 接收 SIGCHLD 的进程。 |
struct list_head children | 子进程链表头。 |
struct list_head sibling | 兄弟进程链表节点。 |
struct task_struct *group_leader | 线程组的主线程。 |
🧩 五、文件系统与文件描述符表
| 字段 | 含义 |
|---|
struct fs_struct *fs | 文件系统上下文(当前工作目录、根目录等)。 |
struct files_struct *files | 打开文件表(文件描述符表)。 |
struct nsproxy *nsproxy | 命名空间(UTS、IPC、PID、Mount、NET 等)信息。 |
🔒 六、安全与权限
| 字段 | 含义 |
|---|
const struct cred __rcu *cred | 有效凭证(权限、UID、GID、能力等)。 |
const struct cred __rcu *real_cred | 实际凭证。 |
struct seccomp seccomp | seccomp 安全策略(系统调用过滤)。 |
void *security | LSM 安全模块(SELinux/AppArmor)使用。 |
🔄 七、信号与异常处理
| 字段 | 含义 |
|---|
struct signal_struct *signal | 信号共享信息(线程组共享)。 |
struct sighand_struct *sighand | 信号处理函数表。 |
sigset_t blocked | 屏蔽的信号集合。 |
struct sigpending pending | 待处理的信号队列。 |
int exit_state, exit_code, exit_signal | 退出状态和信号。 |
📊 八、时间与统计
| 字段 | 含义 |
|---|
u64 utime, stime | 用户态时间与内核态时间。 |
u64 start_time, start_boottime | 任务启动时间。 |
unsigned long nvcsw, nivcsw | 上下文切换次数(自愿与非自愿)。 |
unsigned long min_flt, maj_flt | 次缺页和主缺页次数。 |
💡 九、同步与锁机制
| 字段 | 含义 |
|---|
spinlock_t alloc_lock | 分配保护锁。 |
raw_spinlock_t pi_lock | 优先级继承(PI)锁。 |
struct wake_q_node wake_q | 唤醒队列节点。 |
struct mutex_waiter *blocked_on | 当前阻塞的互斥量。 |
🧰 十、性能、追踪、调试
| 字段 | 含义 |
|---|
struct perf_event_context *perf_event_ctxp[] | perf 性能计数器上下文。 |
struct io_context *io_context | I/O 调度信息。 |
struct audit_context *audit_context | 审计系统信息。 |
struct task_delay_info *delays | 延迟统计信息。 |
struct kcov *kcov | 内核代码覆盖率工具使用。 |
⚡ 十一、NUMA(非一致内存访问)与调度平衡
| 字段 | 含义 |
|---|
struct mempolicy *mempolicy | NUMA 内存策略。 |
int numa_preferred_nid | 首选 NUMA 节点。 |
struct numa_group *numa_group | 关联的 NUMA 组。 |
🧱 十二、内核线程与特殊任务支持
| 字段 | 含义 |
|---|
struct thread_struct thread | CPU 特定寄存器上下文(保存在寄存器中或用于上下文切换)。 |
struct thread_info thread_info | 每线程的低层信息(栈、标志等)。 |
struct reclaim_state *reclaim_state | 内存回收状态。 |
struct page_frag task_frag | 页面片段缓存。 |
struct bio_list *bio_list | I/O 操作链表。 |
🚀 十三、cgroup(控制组)
| 字段 | 含义 |
|---|
struct css_set *cgroups | 当前所属的控制组集合。 |
struct list_head cg_list | 所在控制组链表节点。 |
struct task_group *sched_task_group | 调度器的 cgroup 控制信息。 |
🧮 十四、其它辅助信息
| 字段 | 含义 |
|---|
struct completion *vfork_done | 用于 vfork() 同步。 |
int pdeath_signal | 父进程退出时发送给子进程的信号。 |
struct io_uring_task *io_uring | io_uring 上下文。 |
struct bio_list *bio_list | 块 I/O 任务队列。 |
void *journal_info | 文件系统日志。 |
🧩 十五、最后的 thread_struct
这一部分保存了CPU特定上下文信息,如:
- 通用寄存器(RIP、RSP、RBP等)
- FPU寄存器
- 调试寄存器
- 段寄存器等
在进程上下文切换时(context_switch()),内核会把当前任务的 thread_struct 寄存器状态保存下来,载入下一个任务的寄存器值,实现任务切换。
🧭 总结
| 模块 | 功能 |
|---|
| 调度器字段 | 决定任务运行位置与时间 |
| 内存管理字段 | 指向地址空间、页表等 |
| 信号处理字段 | 响应用户信号与中断 |
| 文件系统字段 | 打开文件、目录、命名空间 |
| 父子关系字段 | 构建进程树 |
| 安全字段 | 权限控制、LSM 支持 |
| NUMA/性能字段 | 多核调度与性能计数 |
| thread_struct | CPU 寄存器上下文 |
参考文献
Linux核心概念详解:s3.shizhz.me/linux-sched/concepts/priority