欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

驱动篇:inux 电源管理的系统架构和驱动(三)

程序员文章站 2024-02-24 09:28:31
...

驱动篇:inux 电源管理的系统架构和驱动(三)

PowerTop
PowerTop 是一款开源的用于进行电量消耗分析和电源管理诊断的工具,其主页位于 Intel 开源技术中心的 https://01.org/powertop/,维护者是 Arjan van de Ven 和 Kristen Accardi 。 PowerTop 可分析系统中软件的功耗,以便找到功耗大户,也可显示系统中不同的 C 状态(与 CPUIdle 驱动对应)和 P 状态(与 CPUFreq 驱动对应)的时间比例,并采用了基于 TAB 的界面风格

驱动篇:inux 电源管理的系统架构和驱动(三)Regulator 驱动
Regulator 是 Linux 系统中电源管理的基础设施之一,用于稳压电源的管理,是各种驱动子系统中设置电压的标准接口。前面介绍的CPUFreq 驱动就经常使用它来设定电压

ret = regulator_set_voltage(vddarm, dvfs->vddarm_min, dvfs->vddarm_max);

而 Regulator 则可以管理系统中的供电单元,即稳压器( Low Dropout Regulator , LDO ,即低压差线性稳压器),并提供获取和设置这些供电单元电压的接口。一般在 ARM 电路板上,各个稳压器和设备会形成一个 Regulator 树形结构
驱动篇:inux 电源管理的系统架构和驱动(三)Linux 的 Regulator 子系统提供如下 API 以用于注册 / 注销一个稳压器:

structregulator_dev* regulator_register(conststructregulator_desc
*regulator_desc, conststructregulator_config *config);
voidregulator_unregister(structregulator_dev *rdev);

regulator_register ()函数的两个参数分别是 regulator_desc 结构体和 
regulator_config 结构体的指针。

regulator_desc 结构体是对这个稳压器属性和操作的封装:

struct regulator_desc {
	const char *name;  /* Regulator 的名字 */
	const char *supply_name; /* Regulator Supply 的名字 */
	const char *of_match;
	const char *regulators_node;
	int (*of_parse_cb)(struct device_node *,
			    const struct regulator_desc *,
			    struct regulator_config *);
	int id;
	unsigned int continuous_voltage_range:1;
	unsigned n_voltages;
	const struct regulator_ops *ops;
	int irq;
	enum regulator_type type;/* 是电压还是电流Regulator */
	struct module *owner;

	unsigned int min_uV;/* 在线性映射情况下最低的Selector 的电压 */
	unsigned int uV_step;/*在线性映射情况下每步增加/减少的电压*/
	unsigned int linear_min_sel;
	int fixed_uV;
	unsigned int ramp_delay;/* 电压改变后稳定下来所需时间*/
	int min_dropout_uV;

	const struct regulator_linear_range *linear_ranges;
	int n_linear_ranges;

	const unsigned int *volt_table;/*基于表映射情况下的电压映射表*/

	unsigned int vsel_reg;
	unsigned int vsel_mask;
	unsigned int csel_reg;
	unsigned int csel_mask;
	unsigned int apply_reg;
	unsigned int apply_bit;
	unsigned int enable_reg;
	unsigned int enable_mask;
	unsigned int enable_val;
	unsigned int disable_val;
	bool enable_is_inverted;
	unsigned int bypass_reg;
	unsigned int bypass_mask;
	unsigned int bypass_val_on;
	unsigned int bypass_val_off;
	unsigned int active_discharge_on;
	unsigned int active_discharge_off;
	unsigned int active_discharge_mask;
	unsigned int active_discharge_reg;
	unsigned int soft_start_reg;
	unsigned int soft_start_mask;
	unsigned int soft_start_val_on;
	unsigned int pull_down_reg;
	unsigned int pull_down_mask;
	unsigned int pull_down_val_on;

	unsigned int enable_time;

	unsigned int off_on_delay;

	unsigned int (*of_map_mode)(unsigned int mode);
};

上述结构体中的 regulator_ops 指针 ops 是对这个稳压器硬件操作的封装,其中包含获取、设置电压等的成员函数:

struct regulator_ops {

	/* enumerate supported voltages */
	int (*list_voltage) (struct regulator_dev *, unsigned selector);

	/* get/set regulator voltage */
	int (*set_voltage) (struct regulator_dev *, int min_uV, int max_uV,
			    unsigned *selector);
	int (*map_voltage)(struct regulator_dev *, int min_uV, int max_uV);
	int (*set_voltage_sel) (struct regulator_dev *, unsigned selector);
	int (*get_voltage) (struct regulator_dev *);
	int (*get_voltage_sel) (struct regulator_dev *);

	/* get/set regulator current  */
	int (*set_current_limit) (struct regulator_dev *,
				 int min_uA, int max_uA);
	int (*get_current_limit) (struct regulator_dev *);

	int (*set_input_current_limit) (struct regulator_dev *, int lim_uA);
	int (*set_over_current_protection) (struct regulator_dev *);
	int (*set_active_discharge) (struct regulator_dev *, bool enable);

	/* enable/disable regulator */
	int (*enable) (struct regulator_dev *);
	int (*disable) (struct regulator_dev *);
	int (*is_enabled) (struct regulator_dev *);

	/* get/set regulator operating mode (defined in consumer.h) */
	int (*set_mode) (struct regulator_dev *, unsigned int mode);
	unsigned int (*get_mode) (struct regulator_dev *);

	/* retrieve current error flags on the regulator */
	int (*get_error_flags)(struct regulator_dev *, unsigned int *flags);

	/* Time taken to enable or set voltage on the regulator */
	int (*enable_time) (struct regulator_dev *);
	int (*set_ramp_delay) (struct regulator_dev *, int ramp_delay);
	int (*set_voltage_time) (struct regulator_dev *, int old_uV,
				 int new_uV);
	int (*set_voltage_time_sel) (struct regulator_dev *,
				     unsigned int old_selector,
				     unsigned int new_selector);

	int (*set_soft_start) (struct regulator_dev *);

	/* report regulator status ... most other accessors report
	 * control inputs, this reports results of combining inputs
	 * from Linux (and other sources) with the actual load.
	 * returns REGULATOR_STATUS_* or negative errno.
	 */
	int (*get_status)(struct regulator_dev *);

	/* get most efficient regulator operating mode for load */
	unsigned int (*get_optimum_mode) (struct regulator_dev *, int input_uV,
					  int output_uV, int load_uA);
	/* set the load on the regulator */
	int (*set_load)(struct regulator_dev *, int load_uA);

	/* control and report on bypass mode */
	int (*set_bypass)(struct regulator_dev *dev, bool enable);
	int (*get_bypass)(struct regulator_dev *dev, bool *enable);

	/* the operations below are for configuration of regulator state when
	 * its parent PMIC enters a global STANDBY/HIBERNATE state */

	/* set regulator suspend voltage */
	int (*set_suspend_voltage) (struct regulator_dev *, int uV);

	/* enable/disable regulator in suspend state */
	int (*set_suspend_enable) (struct regulator_dev *);
	int (*set_suspend_disable) (struct regulator_dev *);

	/* set regulator suspend operating mode (defined in consumer.h) */
	int (*set_suspend_mode) (struct regulator_dev *, unsigned int mode);

	int (*set_pull_down) (struct regulator_dev *);
};

在 drivers/regulator 目录下,包含大量的与电源芯片对应的 Regulator 驱动,如 Dialog 的 DA9052 、 Intersil 的 ISL6271A 、 ST-Ericsson 的TPS61050/61052 、 Wolfon 的 WM831x 系列等,它同时提供了一个虚拟的 Regulator 驱动作为参考:
虚拟的 Regulator 驱动

struct regulator_dev *dummy_regulator_rdev;
static struct regulator_init_data dummy_initdata;
static struct regulator_ops dummy_ops;
static struct regulator_desc dummy_desc = {
 .name = "regulator-dummy",
 .id = -1,
 .type = REGULATOR_VOLTAGE,
 .owner = THIS_MODULE,
 .ops = &dummy_ops,
};

static int __devinit dummy_regulator_probe(struct platform_device *pdev)
{
 struct regulator_config config = { };
 int ret;

 config.dev = &pdev->dev;
 config.init_data = &dummy_initdata;

 dummy_regulator_rdev = regulator_register(&dummy_desc, &config);
 if (IS_ERR(dummy_regulator_rdev)) {
 ret = PTR_ERR(dummy_regulator_rdev);
 pr_err("Failed to register regulator: %d\n", ret);
 return ret;

}

return 0;
}

Linux 的 Regulator 子系统提供消费者( Consumer ) API 以便让其他的驱动获取、设置、关闭和使能稳压器:

struct regulator * regulator_get(structdevice *dev, const char *id);
struct regulator * devm_regulator_get(structdevice *dev, const char *id);
struct regulator * regulator_get_exclusive(structdevice *dev, const char *id);
void regulator_put(structregulator *regulator);
void devm_regulator_put(structregulator *regulator);
int regulator_enable(structregulator *regulator);
int regulator_disable(structregulator *regulator);
int regulator_set_voltage(structregulator *regulator, intmin_uV, intmax_uV);
int regulator_get_voltage(structregulator *regulator);

这些消费者 API 的地位大致与 GPIO 子系统的 gpio_request ()、时钟子系统的 clk_get ()、 
dmaengine 子系统的dmaengine_submit ()等相当,属于基础设施

OPP
现今的 SoC 一般包含很多集成组件,在系统运行过程中,并不需要所有的模块都运行于最高频率和最高性能。在SoC 内,某些 domain 可以运行在较低的频率和电压下,而其他 domain 可以运行在较高的频率和电压下,某个domain 所支持的 < 频率,电压 > 对的集合被称为 Operating Performance Point ,缩写为 OPP 。

int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt);

目前, TI OMAP CPUFreq 驱动的底层就使用了 OPP 这种机制来获取 CPU 所支持的频率和电压列表。在开机的过程中, TI OMAP4 芯片会注册针对 CPU 设备的 OPP 表(代码位于 arch/arm/mach-omap2/ 中):
TI OMAP4 CPU 的 OPP 表

static struct omap_opp_def __initdata omap44xx_opp_def_list[] = {
 /* MPU OPP1 - OPP50 */
 OPP_INITIALIZER("mpu", true, 300000000, OMAP4430_VDD_MPU_OPP50_UV),
 /* MPU OPP2 - OPP100 */
 OPP_INITIALIZER("mpu", true, 600000000, OMAP4430_VDD_MPU_OPP100_UV),
 /* MPU OPP3 - OPP-Turbo */
 OPP_INITIALIZER("mpu", true, 800000000, OMAP4430_VDD_MPU_OPPTURBO_UV),
 /* MPU OPP4 - OPP-SB */
 OPP_INITIALIZER("mpu", true, 1008000000, OMAP4430_VDD_MPU_OPPNITRO_UV),
 ...
};
/**
 * omap4_opp_init() - initialize omap4 opp table 14 */ 
int __init omap4_opp_init(void)
{
 ...
 r = omap_init_opp_table(omap44xx_opp_def_list,

ARRAY_SIZE(omap44xx_opp_def_list));

return r;
}
device_initcall(omap4_opp_init);
int __init omap_init_opp_table(struct omap_opp_def *opp_def,
u32 opp_def_size)
{
 ...
 /* Lets now register with OPP library */
 for (i = 0; i < opp_def_size; i++, opp_def++) {
 ...
 if (!strncmp(opp_def->hwmod_name, "mpu", 3)) {

/*
 * All current OMAPs share voltage rail and
 * clock source, so CPU0 is used to represent
 * the MPU-SS.
 */

dev = get_cpu_device(0);
 } ...
 r = opp_add(dev, opp_def->freq, opp_def->u_volt);
 ...
 }
 return 0;
}

针对与 device 结构体指针 dev 对应的 domain 中增加一个新的 OPP ,参数 freq 和 u_volt 即为该 OPP 对应的频率和电压。

int opp_enable(struct device *dev, unsigned long freq);
int opp_disable(struct device *dev, unsigned long freq);

上述 API 用于使能和禁止某个 OPP ,一旦被禁止,其 available 将成为 false ,之后有设备驱动想设置为这个 OPP 就不再可能了。譬如,当温度超过某个范围后,系统不允许 1GHz 的工作频率,可采用类似下面的代码实现:

if (cur_temp > temp_high_thresh) {
/* Disable 1GHz if it was enabled */
rcu_read_lock();
opp = opp_find_freq_exact(dev, 1000000000, true);
rcu_read_unlock();
/* just error check */
if (!IS_ERR(opp))
ret = opp_disable(dev, 1000000000);
else
goto try_something_else;
}

上述代码中调用的 opp_find_freq_exact ()用于寻找与一个确定频率和 available 匹配的 OPP ,其原型为:

struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq,
bool available);

另外, Linux 还提供两个变体, opp_find_freq_floor ()用于寻找 1 个 OPP ,它的频率向上接近或等于指定的频率;opp_find_freq_ceil ()用于寻找 1 个 OPP ,它的频率向下接近或等于指定的频率,这两个函数的原型为:

struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq);
struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq);

我们可用下面的代码分别寻找 1 个设备的最大和最小工作频率:

freq = ULONG_MAX;
rcu_read_lock();
opp_find_freq_floor(dev, &freq);
rcu_read_unlock();
freq = 0;
rcu_read_lock();
opp_find_freq_ceil(dev, &freq);
rcu_read_unlock();

在频率降低的同时,支撑该频率运行所需的电压也往往可以动态调低;反之,则可能需要调高,下面这两个 API分别用于获取与某 OPP 对应的电压和频率:

unsigned long opp_get_voltage(struct opp *opp);
unsigned long opp_get_freq(struct opp *opp);

举个例子,当某 CPUFreq 驱动想将 CPU 设置为某一频率的时候,它可能会同时设置电压,其代码流程为:

soc_switch_to_freq_voltage(freq){
/* do things */
rcu_read_lock();
opp = opp_find_freq_ceil(dev, &freq);
v = opp_get_voltage(opp);
rcu_read_unlock();
if (v)
regulator_set_voltage(.., v);
/* do other things */
}

如下简单的 API 可用于获取某设备所支持的 OPP 的个数:

int opp_get_opp_count(struct device *dev);

前面提到, TI OMAP CPUFreq 驱动的底层就使用了 OPP 这种机制来获取 CPU 所支持的频率和电压列表。它在omap_init_opp_table ()函数中添加了相应的 OPP ,在 TI OMAP 芯片的 CPUFreq 驱动 drivers/cpufreq/omap-cpufreq.c 中,则借助了快捷函数 opp_init_cpufreq_table ()来根据前面注册的 OPP 建立 CPUFreq 的频率表:

static int __cpuinit omap_cpu_init(struct cpufreq_policy *policy)
{
...
if (!freq_table)
result = opp_init_cpufreq_table(mpu_dev, &freq_table);
...
}

而在 CPUFreq 驱动的目标成员函数 omap_target ()中,则使用与 OPP 相关的 API 来获取频率和电压:

static int omap_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
{
...
if (mpu_reg) {
opp = opp_find_freq_ceil(mpu_dev, &freq);
...
volt = opp_get_voltage(opp);
...
}
...
}

drivers/cpufreq/omap-cpufreq.c 相对来说较为规范,它在 < 频率,电压 > 表方面,在底层使用了 OPP ,在设置电压的时候又使用了规范的 Regulator API

比较新的驱动一般不太喜欢直接在代码里面固化 OPP 表,而是喜欢在相应的节点处添加 operating-points 属性,如imx27.dtsi 中的:

cpus {
#size-cells = <0>;
#address-cells = <1>;
  cpu: aaa@qq.com0 {
  device_type = "cpu";
  compatible = "arm,arm926ej-s";
  operating-points = <
   /* kHz uV */
  266000 1300000
  399000 1450000
   >;
  clock-latency = <62500>;
  clocks = <&clks IMX27_CLK_CPU_DIV>;
  voltage-tolerance = <5>;
    };
};

如果 CPUFreq 的变化可以使用非常标准的 regulator 、 clk API ,我们甚至可以直接使用 drivers/cpufreq/cpufreq-dt.c 这个驱动。这样只需要在 CPU 节点上填充好频率电压表,然后在平台代码里面注册 cpufreq-dt 设备就可以了,在arch/arm/mach-imx/imx27-dt.c 、 arch/arm/mach-imx/mach-imx51.c 中可以找到类似的例子:

static void __init imx27_dt_init(void)
{
struct platform_device_info devinfo = { .name = "cpufreq-dt", };
of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
platform_device_register_full(&devinfo);
}

PM QoS
Linux 内核的 PM QoS 系统针对内核和应用程序提供了一套接口,通过这个接口,用户可以设定自身对性能的期望。一类是系统级的需求,通过 cpu_dma_latency 、 network_latency 和network_throughput 这些参数来设定;另一类是单个设备可以根据自身的性能需求发起 per-device 的 PM QoS 请求。在内核空间,通过 pm_qos_add_request ()函数可以注册 PM QoS 请求:

void pm_qos_add_request(struct pm_qos_request *req,
int pm_qos_class, s32 value);

通过 pm_qos_update_request ()函数可以更新已注册的 PM QoS 请求:

void pm_qos_update_request(struct pm_qos_request *req,
s32 new_value);
void pm_qos_update_request_timeout(struct pm_qos_request *req, s32 new_value,
unsigned long timeout_us);

通过 pm_qos_remove_request ()函数可以删除已注册的 PM QoS 请求:

void pm_qos_remove_request(struct pm_qos_request *req);

譬如在 drivers/media/platform/via-camera.c 这个摄像头驱动中,当摄像头开启后,通过如下语句可以阻止 CPU 进入C3 级别的深度 Idle :

static int viacam_streamon(struct file *filp, void *priv, enum v4l2_buf_type t)
{
...
pm_qos_add_request(&cam->qos_request, PM_QOS_CPU_DMA_LATENCY, 50);
...
}

这是因为,在 CPUIdle 子系统中,会根据 PM_QOS_CPU_DMA_LATENCY 请求的情况选择合适的 C 状态,如drivers/cpuidle/governors/ladder.c 中的 ladder_select_state ()就会判断目标 C 状态的exit_latency 与 QoS 要求的关系,如代码清单所示:
CPUIdle LADDER governor 对 QoS 的判断

static int ladder_select_state(struct cpuidle_driver *drv,

struct cpuidle_device *dev)
{
...
int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
...
/* consider promotion */
if (last_idx < drv->state_count - 1 &&
 !drv->states[last_idx + 1].disabled &&
 !dev->states_usage[last_idx + 1].disable &&
 last_residency > last_state->threshold.promotion_time &&
 drv->states[last_idx + 1].exit_latency <= latency_req) {
 last_state->stats.promotion_count++;
 last_state->stats.demotion_count = 0;
 if(last_state->stats.promotion_count>=
last_state->threshold.promotion_count) {
 ladder_do_selection(ldev, last_idx, last_idx + 1);
 return last_idx + 1;

}
 }
 ...
}

LADDER 在选择是否进入更深层次的 C 状态时,会比较 C 状态的 exit_latency 要小于通过pm_qos_request ( PM_QOS_CPU_DMA_LATENCY )得到的 PM QoS 请求的延迟,drv->states[last_idx + 1].exit_latency <= latency_req)
同样的逻辑也出现于 drivers/cpuidle/governors/menu.c 中,如代码清单的第 18~19 行。

static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{
 struct menu_device *data = &__get_cpu_var(menu_devices);
 int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
 ...
 /*
 * Find the idle state with the lowest power while satisfying
 * our constraints.9
*/

for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
 struct cpuidle_state *s = &drv->states[i];
 struct cpuidle_state_usage *su = &dev->states_usage[i];

if (s->disabled || su->disable)

continue;

if (s->target_residency > data->predicted_us)

continue;

if ( s->exit_latency > latency_req)

continue;

if (s->exit_latency * multiplier > data->predicted_us)

continue;

if (s->power_usage < power_usage) {
 power_usage = s->power_usage;
 data->last_state_idx = i;
 data->exit_us = s->exit_latency;

}

}

return data->last_state_idx;
}

还是回到 drivers/media/platform/via-camera.c 中,当摄像头关闭后,它会通过如下语句告知上述代码对PM_QOS_CPU_DMA_LATENCY 的性能要求取消:

static int viacam_streamon(struct file *filp, void *priv, enum v4l2_buf_type t)
{
...
pm_qos_remove_request(&cam->qos_request);
...
}

类似的在设备驱动中申请 QoS 特性的例子还包括 drivers/net/wireless/ipw2x00/ipw2100.c 、 drivers/tty/serial/omap-serial.c 、 drivers/net/ethernet/intel/e1000e/netdev.c 等。
应用程序则可以通过向 /dev/cpu_dma_latency 和 /dev/network_latency 这样的设备节点写入值来发起 QoS 的性能请求。