1 SMP cpu map和操作函数集初始化
1.1 初步初始化cpu map
start_kernel
---------->smp_setup_processor_id
linux 从第一个cpu上面启动以后,先利用smp_setup_processor_id设置cpu index
void __init smp_setup_processor_id(void)
{
int i;
//判断是否是smp系统,是则从arm协处理器读取当前cpuid,否则为0
u32 mpidr = is_smp() ? read_cpuid_mpidr() & MPIDR_HWID_BITMASK : 0;
//根据level确定CPU号,即cpu = (mpidr >> 8) & 0xff;
u32 cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
//设置map 0为引导cpu
cpu_logical_map(0) = cpu;
//nr_cpu_ids表示系统中CPU总数
for (i = 1; i < nr_cpu_ids; ++i)
cpu_logical_map(i) = i == cpu ? 0 : i; //初始化除boot cpu以外的其他cpu的map
printk(KERN_INFO "Booting Linux on physical CPU 0x%x\n", mpidr);
}
is_smp的判断比较简单:
static inline bool is_smp(void)
{
#ifndef CONFIG_SMP
return false;
#elif defined(CONFIG_SMP_ON_UP)
extern unsigned int smp_on_up;
return !!smp_on_up;
#else
return true;
#endif
}
read_cpuid_mpidr读取CPUID_MPIDR寄存器获取cpu的值,比较简单,这边不展开。
1.2 利用device tree再次初始化cpu map,根据不同的machine 设置smp 函数集
先看一下device tree一般是如何描述cpu节点的:
cpus {
enable-method = "allwinner,sun8i-a23";
#address-cells = <1>;
#size-cells = <0>;
cpu0: cpu@0 {
compatible = "arm,cortex-a7";
device_type = "cpu";
reg = <0>;
};
cpu@1 {
compatible = "arm,cortex-a7";
device_type = "cpu";
reg = <1>;
};
};
接着再看一下内核如何利用device tree初始化相关信息
start_kernel
------------>setup_arch
-------------->arm_dt_init_cpu_maps
void __init arm_dt_init_cpu_maps(void)
{
/*
* Temp logical map is initialized with UINT_MAX values that are
* considered invalid logical map entries since the logical map must
* contain a list of MPIDR[23:0] values where MPIDR[31:24] must
* read as 0.
*/
struct device_node *cpu, *cpus;
u32 i, j, cpuidx = 1;
u32 mpidr = is_smp() ? read_cpuid_mpidr() & MPIDR_HWID_BITMASK : 0;
u32 tmp_map[NR_CPUS] = { [0 ... NR_CPUS-1] = MPIDR_INVALID };
bool bootcpu_valid = false;
//查找cpu根节点
cpus = of_find_node_by_path("/cpus");
if (!cpus)
return;
//遍历 cpus 所有的child node
for_each_child_of_node(cpus, cpu) {
u32 hwid;
if (of_node_cmp(cpu->type, "cpu"))
continue;
pr_debug(" * %s...\n", cpu->full_name);
/*
* A device tree containing CPU nodes with missing "reg"
* properties is considered invalid to build the
* cpu_logical_map.
*/
//读取reg属性的值,并赋值给hwid,一般reg中存放的就是cpu index值
if (of_property_read_u32(cpu, "reg", &hwid)) {
pr_debug(" * %s missing reg property\n",
cpu->full_name);
return;
}
/*
* 8 MSBs must be set to 0 in the DT since the reg property
* defines the MPIDR[23:0].
*/
//reg的属性值bit:24必须设置为0,这是Arm CPU binding定义的
if (hwid & ~MPIDR_HWID_BITMASK)
return;
/*
* Duplicate MPIDRs are a recipe for disaster.
* Scan all initialized entries and check for
* duplicates. If any is found just bail out.
* temp values were initialized to UINT_MAX
* to avoid matching valid MPIDR[23:0] values.
*/
for (j = 0; j < cpuidx; j++)
if (WARN(tmp_map[j] == hwid, "Duplicate /cpu reg "
"properties in the DT\n"))
return;
/*
* Build a stashed array of MPIDR values. Numbering scheme
* requires that if detected the boot CPU must be assigned
* logical id 0. Other CPUs get sequential indexes starting
* from 1. If a CPU node with a reg property matching the
* boot CPU MPIDR is detected, this is recorded so that the
* logical map built from DT is validated and can be used
* to override the map created in smp_setup_processor_id().
*/
if (hwid == mpidr) {
i = 0; //说明是boot cpu
bootcpu_valid = true;
} else {
i = cpuidx++;
}
if (WARN(cpuidx > nr_cpu_ids, "DT /cpu %u nodes greater than "
"max cores %u, capping them\n",
cpuidx, nr_cpu_ids)) {
cpuidx = nr_cpu_ids;
break;
}
// 数组tmp_map保存了系统中所有CPU的MPIDR值(CPU ID值),
// 具体的index的编码规则是: tmp_map[0]保存了booting CPU的id值,
// 其余的CPU的ID值保存在1~NR_CPUS的位置
tmp_map[i] = hwid;
}
if (!bootcpu_valid) {
pr_warn("DT missing boot CPU MPIDR[23:0], fall back to default cpu_logical_map\n");
return;
}
/*
* Since the boot CPU node contains proper data, and all nodes have
* a reg property, the DT CPU list can be considered valid and the
* logical map created in smp_setup_processor_id() can be overridden
*/
for (i = 0; i < cpuidx; i++) {
set_cpu_possible(i, true);//配置系统可运行的CPU核心的数量
cpu_logical_map(i) = tmp_map[i];//重新初始化cpu_logical_map
pr_debug("cpu logical map 0x%x\n", cpu_logical_map(i));
}
}
执行完上面函数,接着在setup_arch中往下执行,初始化smp 操作集函数:
#ifdef CONFIG_SMP
if (is_smp()) {
smp_set_ops(mdesc->smp);
smp_init_cpus();
}
#endif
smp_set_ops设置操作函数集:
static struct smp_operations smp_ops;
void __init smp_set_ops(struct smp_operations *ops)
{
if (ops)
smp_ops = *ops;
};
void __init smp_init_cpus(void)
{
if (smp_ops.smp_init_cpus) //调用操作集函数的初始化函数
smp_ops.smp_init_cpus();
}
传入的参数由mdesc->smp给出,先看一下系统是如何定义smp_operations的:
struct smp_operations {
#ifdef CONFIG_SMP
/*
* Setup the set of possible CPUs (via set_cpu_possible)
*/
//初始化系统可运行的cpu
void (*smp_init_cpus)(void);
/*
* Initialize cpu_possible map, and enable coherency
*/
void (*smp_prepare_cpus)(unsigned int max_cpus);
/*
* Perform platform specific initialisation of the specified CPU.
*/
//引导slave cpu的初始化
void (*smp_secondary_init)(unsigned int cpu);
/*
* Boot a secondary CPU, and assign it the specified idle task.
* This also gives us the initial stack to use for this CPU.
*/
//启动slave CPU
int (*smp_boot_secondary)(unsigned int cpu, struct task_struct *idle);
#ifdef CONFIG_HOTPLUG_CPU
int (*cpu_kill)(unsigned int cpu);
void (*cpu_die)(unsigned int cpu);
int (*cpu_disable)(unsigned int cpu);
#endif
#endif
};
在定义某个machine的时候,如果是smp处理器,一般我们如下来描述某个machine:
DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express")
.dt_compat = v2m_dt_match,
.l2c_aux_val = 0x00400000,
.l2c_aux_mask = 0xfe0fffff,
.smp = vexpress_smp_dt_ops, //设置smp操作集函数
MACHINE_END
const struct smp_operations vexpress_smp_dt_ops __initconst = {
.smp_prepare_cpus = vexpress_smp_dt_prepare_cpus,
.smp_secondary_init = versatile_secondary_init,
.smp_boot_secondary = versatile_boot_secondary,
.smp_init_cpus = vexpress_smp_init_ops,
#ifdef CONFIG_HOTPLUG_CPU
.cpu_die = vexpress_cpu_die,
#endif
};
2 smp多核引导过程
start_kernel
----------->rest_init
------------>kernel_init
---------------->kernel_init_freeable
------------------>smp_prepare_cpus
void __init smp_prepare_cpus(unsigned int max_cpus)
{
unsigned int ncores = num_possible_cpus();//获取系统可使用的CPU数量
init_cpu_topology();
smp_store_cpu_info(smp_processor_id());
/*
* are we trying to boot more cores than exist?
*/
if (max_cpus > ncores)
max_cpus = ncores;
if (ncores > 1 && max_cpus) {
/*
* Enable the local timer or broadcast device for the
* boot CPU, but only if we have more than one CPU.
*/
percpu_timer_setup();
/*
* Initialise the present map, which describes the set of CPUs
* actually populated at the present time. A platform should
* re-initialize the map in the platforms smp_prepare_cpus()
* if present != possible (e.g. physical hotplug).
*/
init_cpu_present(cpu_possible_mask);
/*
* Initialise the SCU if there are more than one CPU
* and let them know where to start.
*/
// 调用smp_operations 中的smp_prepare_cpus成员函数,
// 为wake_up CPU做准备,这个是具体machine相关的函数,这边不做分析
if (smp_ops.smp_prepare_cpus)
smp_ops.smp_prepare_cpus(max_cpus);
}
}
kernel_init_freeable
------------------>smp_init
void __init smp_init(void)
{
unsigned int cpu;
idle_threads_init(); //空闲线程初始化,每个cpu都有一个空闲线程,为每个cpu都初始化一个空闲线程
/* FIXME: This should be done in userspace --RR */
for_each_present_cpu(cpu) {
if (num_online_cpus() >= setup_max_cpus)
break;
if (!cpu_online(cpu))
cpu_up(cpu);//wake_up所有的非online的CPU
}
/* Any cleanup work */
printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus());
smp_cpus_done(setup_max_cpus);
}
在cpu_up中,就要开始引导其他cpu核启动了
cpu_up
------------->_cpu_up
------------------>__cpu_up
int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
{
int ret;
/*
* We need to tell the secondary core where to find
* its stack and the page tables.
*/
secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;//设置要引导cpu的栈地址,和其idle进程的进程描述符在同一块内存中
secondary_data.pgdir = virt_to_phys(idmap_pgd);//设置其页目录基地址,该页目录是一份拷贝
secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir);//设置页目录基址,该页目录和就是boot cpu的页目录,这个变量设置了好像后面并没有使用
__cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data));
outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1));
/*
* Now bring the CPU into our world.
*/
ret = boot_secondary(cpu, idle); //在该函数中引导其他的cpu
if (ret == 0) {
/*
* CPU was successfully started, wait for it
* to come online or time out.
*/
wait_for_completion_timeout(&cpu_running,
msecs_to_jiffies(1000)); //boot cpu在这边等待引导完成,然后继续其他的工作
if (!cpu_online(cpu)) {
pr_crit("CPU%u: failed to come online\n", cpu);
ret = -EIO;
}
} else {
pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
}
secondary_data.stack = NULL;
secondary_data.pgdir = 0;
return ret;
}
先看一下idmap_pgd这个页目录是如何拷贝出来的:
static int __init init_static_idmap(void)
{
idmap_pgd = pgd_alloc(&init_mm);//复制boot cpu init进程的内核页目录
if (!idmap_pgd)
return -ENOMEM;
pr_info("Setting up static identity map for 0x%p - 0x%p\n",
__idmap_text_start, __idmap_text_end);
identity_mapping_add(idmap_pgd, __idmap_text_start,
__idmap_text_end, 0);
/* Flush L1 for the hardware to see this page table content */
flush_cache_louis();
return 0;
}
early_initcall(init_static_idmap);
可以看到主要是利用了pgd_alloc函数完成页目录的复制,具体的复制分析,可以参考这篇文章:
https://blog.csdn.net/oqqYuJi12345678/article/details/102828714
再看一下boot_secondary函数:
int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
{
if (smp_ops.smp_boot_secondary)
return smp_ops.smp_boot_secondary(cpu, idle);
return -ENOSYS;
}
smp_boot_secondary是具体machine相关的函数,为了更容易说明问题,举一个厂商的machine进行分析:
static int sun8i_smp_boot_secondary(unsigned int cpu,
struct task_struct *idle)
{
u32 reg;
if (!(prcm_membase && cpucfg_membase))
return -EFAULT;
spin_lock(&cpu_lock);
/* Set CPU boot address */
//配置bringup_cpu的入口地址
writel(__pa_symbol(secondary_startup),
cpucfg_membase + CPUCFG_PRIVATE0_REG);
/* Assert the CPU core in reset */
writel(0, cpucfg_membase + CPUCFG_CPU_RST_CTRL_REG(cpu));
/* Assert the L1 cache in reset */
reg = readl(cpucfg_membase + CPUCFG_GEN_CTRL_REG);
writel(reg & ~BIT(cpu), cpucfg_membase + CPUCFG_GEN_CTRL_REG);
/* Clear CPU power-off gating */
reg = readl(prcm_membase + PRCM_CPU_PWROFF_REG);
writel(reg & ~BIT(cpu), prcm_membase + PRCM_CPU_PWROFF_REG);
mdelay(1);
/* Deassert the CPU core reset */
writel(3, cpucfg_membase + CPUCFG_CPU_RST_CTRL_REG(cpu));
spin_unlock(&cpu_lock);
return 0;
}
通过上面设置,被引导的cpu就从secondary_startup地址开始执行
3 smp 多核处理器启动过程
除了boot cpu以后,其他cpu均从secondary_startup代码开始执行,注意下面所说的编译地址,即linux内核正常启动以后运行的虚拟地址,该地址由程序链接时决定,和物理地址无关
#if defined(CONFIG_SMP)
__CPUINIT
ENTRY(secondary_startup)
/*
* Common entry point for secondary CPUs.
*
* Ensure that we're in SVC mode, and IRQs are disabled. Lookup
* the processor type - there is no need to check the machine type
* as it has already been validated by the primary processor.
*/
#ifdef CONFIG_ARM_VIRT_EXT
bl __hyp_stub_install_secondary
#endif
safe_svcmode_maskall r9
-------------------------------------------------------(1)
mrc p15, 0, r9, c0, c0 @ get processor id
bl __lookup_processor_type
movs r10, r5 @ invalid processor?
moveq r0, #'p' @ yes, error 'p'
THUMB( it eq ) @ force fixup-able long branch encoding
beq __error_p
/*
* Use the page tables supplied from __cpu_up.
*/
adr r4, __secondary_data //获取__secondary_data的地址,__secondary_data的链接地址肯定是个虚拟地址,由于mmu还没有开启,所以使用相对取址获取其真实地址
//加载该地址处的三个变量的值,分别是__secondary_data的编译地址,secondary_data的编译地址以及__secondary_switched的编译地址
ldmia r4, {r5, r7, r12} @ address to jump to after
//r4通过相对地址寻址得到的是__secondary_data的物理地址,和其编译地址做差值得到物理地址和编译地址之间的差值,用以下面根据编译地址得到物理地址
sub lr, r4, r5 @ mmu has been enabled
//得到secondary_data的物理地址,就是secondary_data.pgdir的物理地址
ldr r4, [r7, lr] @ get secondary_data.pgdir
add r7, r7, #4 //加4偏移,取下一个变量的物理地址
//得到secondary_data.swapper_pg_dir的物理地址
ldr r8, [r7, lr] @ get secondary_data.swapper_pg_dir
//把__enable_mmu 作为返回地址
adr lr, BSYM(__enable_mmu) @ return address
把__secondary_switched的编译地址直接赋值给r13,后面使用该地址的时候已经开启mmu,所以直接使用虚拟地址没有问题
mov r13, r12 @ __secondary_switched address
-------------------------------------------------------------(2)
//跳转到具体体系架构相关的initialise processor函数处理
ARM( add pc, r10, #PROCINFO_INITFUNC ) @ initialise processor
@ (return control reg)
THUMB( add r12, r10, #PROCINFO_INITFUNC )
THUMB( mov pc, r12 )
ENDPROC(secondary_startup)
/*
* r6 = &secondary_data
*/
ENTRY(__secondary_switched)
ldr sp, [r7, #4] @ get secondary_data.stack
mov fp, #0
b secondary_start_kernel
ENDPROC(__secondary_switched)
.align
.type __secondary_data, %object
__secondary_data:
.long .
.long secondary_data
.long __secondary_switched
#endif /* defined(CONFIG_SMP) */
(1)
mrc p15, 0, r9, c0, c0 @ get processor id
bl __lookup_processor_type //根据该cpu id,查找process info list
movs r10, r5 @ invalid processor? //并把找到的process info 起始地址放入r10
更详细的说明,参考这篇文章:
https://blog.csdn.net/oqqYuJi12345678/article/details/99654760
(2)前面__lookup_processor_type 函数找到对应的处理器process info,该结构偏移PROCINFO_INITFUNC 以后,存储的是其对应的体系架构相关的处理函数,再上面那篇文章中页可以找到详细说明。从初始化函数返回一般是如下代码:
mov pc, lr
从设置的lr地址处接着执行,即跳转到__enable_mmu代码处,执行mmu的enable工作,分别是__enable_mmu和__turn_mmu_on函数,具体实现在上面那篇文章中也有详细说明,不过这边需要注意的是,开启mmu的那段代码需要做恒等映射,使得开启mmu前后,代码不会出错,在boot cpu起来得时候,我们已经看到创建页表得时候,进行了恒等映射相关得工作,感兴趣得可以参照这篇文章:
https://blog.csdn.net/oqqYuJi12345678/article/details/96029177
bootcpu跳转到c代码处执行以后,会首先对user空间得页表进行处理,我们使用得物理地址起始地址为0x30000000,开启mmu以后该空间处在用户空间,所以会被清理掉,该恒等映射页表不再存在,那么这边引导多核,是在哪完成恒等映射得呢。
其实在上面得init_static_idmap函数中,为多核处理器复制内核页表得时候以后,就为其做了恒等映射:
static int __init init_static_idmap(void)
{
。。。。。。。。。。。。。
//恒等映射相关的代码
identity_mapping_add(idmap_pgd, __idmap_text_start,
__idmap_text_end, 0);
。。。。。。。。。。。。。。。。。
}
对__idmap_text_start和__idmap_text_end之间得代码做了恒等映射,那么这里面得代码具体是在哪边,可以看一下链接脚本是如何处理得,在arch/arm/kernel/vmlinux.lds.S中:
VMLINUX_SYMBOL(__idmap_text_start) = .; \
*(.idmap.text) \
VMLINUX_SYMBOL(__idmap_text_end) = .; \
该段代码就是放在.idmap.text段中得代码,再看一下开启mmu得那个函数__turn_mmu_on是如何定义得:
.align 5
.pushsection .idmap.text, "ax"
ENTRY(__turn_mmu_on)
mov r0, r0
instr_sync
mcr p15, 0, r0, c1, c0, 0 @ write control reg
mrc p15, 0, r3, c0, c0, 0 @ read id reg
instr_sync
mov r3, r3
mov r3, r13
mov pc, r3
__turn_mmu_on_end:
ENDPROC(__turn_mmu_on)
.popsection
可以看到该代码就在__turn_mmu_on中,至此真相大白。开启mmu以后,从r13寄存器取返回地址。上面r13寄存器存放得返回地址为__secondary_switched:
ENTRY(__secondary_switched)
ldr sp, [r7, #4] @ get secondary_data.stack //先设置堆栈,为执行c函数做准备
mov fp, #0
b secondary_start_kernel //跳转到secondary_start_kernel代码处开始执行
ENDPROC(__secondary_switched)
最终跳转到c语言得入口函数secondary_start_kernel:
asmlinkage void __cpuinit secondary_start_kernel(void)
{
struct mm_struct *mm = &init_mm;
unsigned int cpu;
/*
* The identity mapping is uncached (strongly ordered), so
* switch away from it before attempting any exclusive accesses.
*/
---------------------------------------------------------(1)
cpu_switch_mm(mm->pgd, mm);
local_flush_bp_all();
enter_lazy_tlb(mm, current);
local_flush_tlb_all();
/*
* All kernel threads share the same mm context; grab a
* reference and switch to it.
*/
cpu = smp_processor_id();
atomic_inc(&mm->mm_count);
current->active_mm = mm;
cpumask_set_cpu(cpu, mm_cpumask(mm));
cpu_init();//设置 异常处理堆栈
printk("CPU%u: Booted secondary processor\n", cpu);
preempt_disable();
trace_hardirqs_off();
/*
* Give the platform a chance to do its own initialisation.
*/
if (smp_ops.smp_secondary_init)
smp_ops.smp_secondary_init(cpu);
notify_cpu_starting(cpu);
calibrate_delay(); //可能不同cpu得运行频率不同,所以重新计算delay时间
smp_store_cpu_info(cpu);
/*
* OK, now it's safe to let the boot CPU continue. Wait for
* the CPU migration code to notice that the CPU is online
* before we continue - which happens after __cpu_up returns.
*/
set_cpu_online(cpu, true);
complete(&cpu_running); //通知boot cpu启动成功
/*
* Setup the percpu timer for this CPU.
*/
percpu_timer_setup();
local_irq_enable();
local_fiq_enable();
/*
* OK, it's off to the idle thread for us
*/
-----------------------------------------------------------(2)
cpu_startup_entry(CPUHP_ONLINE);
}
(1)切换页表,和boot cpu使用相同的页目录,从这边可以看出,最终所有得smp cpu都使用同一份内核页表,具体切换过程得分析,可以参考这篇文章:
https://blog.csdn.net/oqqYuJi12345678/article/details/102758457
(2)处理idle相关得工作,更详细得说明可以参考这篇文章:
https://blog.csdn.net/oqqYuJi12345678/article/details/102876424
至此,多核处理器引导结束。
文中有些内容参考自该篇文章:
来源:CSDN
作者:oqqYuJi12345678
链接:https://blog.csdn.net/oqqYuJi12345678/article/details/104068333