对于一个人,脑部支配着他的一举一动;对于一支军队,指挥中心控制着它的所有活动;同样redhat linux 9.0,对于内核中的RAID5,也须要一个像脑部一样的东西来支配着他的正确运转,那就是RAID5的守护进程raid5d。明天,我们就好好来瞧瞧raid5d究竟是怎样一回事~
进程的注册
后面的博文中贴出的源码常常会出现这样一条句子md_wakeup_thread(mddev->thread);,这条句子是干嘛的呢?就是唤起守护进程的。这么,怎么让显存感知这个进程的存在呢?这时须要对进程进行注册。
在raid5.c中的set_conf()中(这时配置RAID5的全局信息的函数),有这样一句conf->thread=md_register_thread(raid5d,mddev,pers_name);,这就是将RAID5的守护进程raid5d注册到内核中,让内核辨识这个进程。追踪md_register_thread():
struct md_thread *md_register_thread(void (*run) (struct md_thread *),struct mddev *mddev, const char *name)
{
struct md_thread *thread;
thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);/*为进程开辟内存空间*/
if (!thread)
return NULL;
init_waitqueue_head(&thread->wqueue);//初始化进程等待队列
thread->run = run;//设置进程的执行函数
thread->mddev = mddev;
thread->timeout = MAX_SCHEDULE_TIMEOUT;//这时进程的超时机制
thread->tsk = kthread_run(md_thread, thread,
"%s_%s",
mdname(thread->mddev),
name);//设置运行信息
if (IS_ERR(thread->tsk)) {//运行出错时的反应
kfree(thread);
return NULL;
}
return thread;
}
EXPORT_SYMBOL(md_register_thread);
结合上述注释,可以清楚的发觉当唤起这个进程时详解神秘linux内核,执行的是raid5d这个函数,真正的主体在raid5d里,好吧,如今是时侯揭露它神秘的面纱了,gogogo!!!
进程执行函数—raid5d
在raid5.c中搜索raid5d的代码:
/*
* This is our raid5 kernel thread.
*
* We scan the hash table for stripes which can be handled now.
* During the scan, completed stripes are saved for us by the interrupt
* handler, so that they will not have to wait for our next wakeup.
*/
static void raid5d(struct md_thread *thread)
{
struct mddev *mddev = thread->mddev;
struct r5conf *conf = mddev->private;
int handled;
struct blk_plug plug;
pr_debug("+++ raid5d activen");
md_check_recovery(mddev);//检查RAID5同步
blk_start_plug(&plug);
handled = 0;
spin_lock_irq(&conf->device_lock);
while (1) {//^_^死循环哦
struct bio *bio;
int batch_size, released;
released = release_stripe_list(conf, conf->temp_inactive_list);
if (
!list_empty(&conf->bitmap_list)) {//激活bitmap处理
/* Now is a good time to flush some bitmap updates */
conf->seq_flush++;
spin_unlock_irq(&conf->device_lock);
bitmap_unplug(mddev->bitmap);
spin_lock_irq(&conf->device_lock);
conf->seq_write = conf->seq_flush;
activate_bit_delay(conf, conf->temp_inactive_list);
}
raid5_activate_delayed(conf);//激活延迟处理装置
while ((bio = remove_bio_from_retry(conf))) {//有关重试读的操作
int ok;
spin_unlock_irq(&conf->device_lock);
ok = retry_aligned_read(conf, bio);
spin_lock_irq(&conf->device_lock);
if (!ok)
break;
handled++;
}
batch_size = handle_active_stripes(conf, ANY_GROUP, NULL,conf->temp_inactive_list);//处理stripe_head的主战场,返回处理的个数
if (!batch_size && !released)
break;
handled += batch_size;
if (mddev->flags & ~(1<device_lock);
md_check_recovery(mddev);
spin_lock_irq(&conf->device_lock);
}
}
pr_debug("%d stripes handledn", handled);
spin_unlock_irq(&conf->device_lock);
async_tx_issue_pending_all();
blk_finish_plug(&plug);
pr_debug("--- raid5d inactiven");
}
这儿我们只介绍一些流程,并不对个别操作具体讲解,由于像同步或则处理条带这种操作很复杂,旁边会详尽的介绍,明天,我们只做一个流程规划。
虽然raid5d只做了这几件事:检测同步、处理temp_inactive_list、激活bitmap处理、激活延后处理、重试读和处理条带。下边一一介绍这种功能:
static void raid5_activate_delayed(struct r5conf *conf)
{
if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
while (!list_empty(&conf->delayed_list)) {
struct list_head *l = conf->delayed_list.next;//延迟链表
struct stripe_head *sh;
sh = list_entry(l, struct stripe_head, lru);
list_del_init(l);//从list中删除
clear_bit(STRIPE_DELAYED, &sh->state);//清楚延迟处理标志
if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
atomic_inc(&conf->preread_active_stripes);
list_add_tail(&sh->lru, &conf->hold_list);//加入到hold_list
raid5_wakeup_stripe_thread(sh);
}
}
}
还记得Linux内核中RAID5源码解读之stripe_head的管理中提及的delayed_list和hold_list之间的转化吗?就是在这儿实现的。
static int handle_active_stripes(struct r5conf *conf, int group,
struct r5worker *worker,
struct list_head *temp_inactive_list)
{
struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
int i, batch_size = 0, hash;
bool release_inactive = false;
while (batch_size < MAX_STRIPE_BATCH &&
(sh = __get_priority_stripe(conf, group)) != NULL)/*默认的MAX_STRIPE_BATCH为8,即一次最多取8个stripe_head处理*/
batch[batch_size++] = sh;
if (batch_size == 0) {
for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
if (!list_empty(temp_inactive_list + i))
break;
if (i == NR_STRIPE_HASH_LOCKS)
return batch_size;
release_inactive = true;
}
spin_unlock_irq(&conf->device_lock);
release_inactive_stripe_list(conf, temp_inactive_list,
NR_STRIPE_HASH_LOCKS);
if (release_inactive) {
spin_lock_irq(&conf->device_lock);
return 0;
}
for (i = 0; i < batch_size; i++)
handle_stripe(batch[i]);//处理条带的主战场
cond_resched();
spin_lock_irq(&conf->device_lock);
for (i = 0; i hash_lock_index;
__release_stripe(conf, batch[i], &temp_inactive_list[hash]);//回收条带
}
return batch_size;
}
首先,RAID5是进行批量处理条带的,每次处理MAX_STRIPE_BATCH个条带,默认值为8详解神秘linux内核,在取条带时也有规则,跟进__get_priority_stripe():
/* __get_priority_stripe - get the next stripe to process
*
* Full stripe writes are allowed to pass preread active stripes up until
* the bypass_threshold is exceeded. In general the bypass_count
* increments when the handle_list is handled before the hold_list; however, it
* will not be incremented when STRIPE_IO_STARTED is sampled set signifying a
* stripe with in flight i/o. The bypass_count will be reset when the
* head of the hold_list has changed, i.e. the head was promoted to the
* handle_list.
*/
static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
{
struct stripe_head *sh = NULL, *tmp;
struct list_head *handle_list = NULL;
struct r5worker_group *wg = NULL;
if (conf->worker_cnt_per_group == 0) {//确定handle_list
handle_list = &conf->handle_list;
} else if (group != ANY_GROUP) {
handle_list = &conf->worker_groups[group].handle_list;
wg = &conf->worker_groups[group];
} else {
int i;
for (i = 0; i group_cnt; i++) {
handle_list = &conf->worker_groups[i].handle_list;
wg = &conf->worker_groups[i];
if (!list_empty(handle_list))
break;
}
}
pr_debug("%s: handle: %s hold: %s full_writes: %d bypass_count: %dn",
__func__,
list_empty(handle_list) ? "empty" : "busy",
list_empty(&conf->hold_list) ? "empty" : "busy",
atomic_read(&conf->pending_full_writes), conf->bypass_count);
if (!list_empty(handle_list)) {//handle_list不为空,则从中取条带
sh = list_entry(handle_list->next, typeof(*sh), lru);
if (list_empty(&conf->hold_list))
conf->bypass_count = 0;
else if (!test_bit(STRIPE_IO_STARTED, &sh->state)) {
if (conf->hold_list.next == conf->last_hold)
conf->bypass_count++;
else {
conf->last_hold = conf->hold_list.next;
conf->bypass_count -= conf->bypass_threshold;
if (conf->bypass_count < 0)
conf->bypass_count = 0;
}
}
} else if (!list_empty(&conf->hold_list) &&
((conf->bypass_threshold &&
conf->bypass_count > conf->bypass_threshold) ||
atomic_read(&conf->pending_full_writes) == 0)) {//否则从hold_list中取
list_for_each_entry(tmp, &conf->hold_list, lru) {
if (conf->worker_cnt_per_group == 0 ||
group == ANY_GROUP ||
!cpu_online(tmp->cpu) ||
cpu_to_group(tmp->cpu) == group) {
sh = tmp;
break;
}
}
if (sh) {
conf->bypass_count -= conf->bypass_threshold;
if (conf->bypass_count < 0)
conf->bypass_count = 0;
}
wg = NULL;
}
if (!sh)
return NULL;
if (wg) {
wg->stripes_cnt--;
sh->group = NULL;
}
list_del_init(&sh->lru);
BUG_ON(atomic_inc_return(&sh->count) != 1);
return sh;
}
按照代码我们可以看出,handle_list中条带的优先级低于hold_list中的条带优先级,并且函数的注释中早已明晰表明了bypass_count的形参情况。
回到handle_active_stripes()中,取到条带后,调用handle_stripe()进行处理,这个函数可厉害了,但是情况也非常复杂,我们这儿不做讨论,前面会专门来讲解这个函数。处理完成后,调用__release_stripe()对条带进行回收,我的前一篇博文上面有提到这个函数,具体点击这儿,在此就不赘言了。
至此,raid5d所干的事早已昭示于天下了,也不是很复杂的哦,虽然它只是个指挥官,真正的提枪上阵还得真正的士兵。前面会对个别操作进行具体的讲解,别急哦~
进程的注销
RAID5的守护进程raid5d的注销是通过调用md_unregister_thread()函数来实现的,跟进md_unregister_thread():
void md_unregister_thread(struct md_thread **threadp)
{
struct md_thread *thread = *threadp;
if (!thread)
return;
pr_debug("interrupting MD-thread pid %dn", task_pid_nr(thread->tsk));
/* Locking ensures that mddev_unlock does not wake_up a
* non-existent thread
*/
spin_lock(&pers_lock);
*threadp = NULL;
spin_unlock(&pers_lock);
kthread_stop(thread->tsk);//停止这个进程
kfree(thread);
}
EXPORT_SYMBOL(md_unregister_thread);
很简单,只是调用一下kthread_stop()来停止下raid5d。
有关RAID5的守护进程raid5d的一些基本功能讲得差不多了,从注册到注销,正应了那句话,善始善终linux下载工具,goodluck~