sparse irq_desc[] array: core kernel and x86 changes

Impact: new feature

Problem on distro kernels: irq_desc[NR_IRQS] takes megabytes of RAM with
NR_CPUS set to large values. The goal is to be able to scale up to much
larger NR_IRQS value without impacting the (important) common case.

To solve this, we generalize irq_desc[NR_IRQS] to an (optional) array of
irq_desc pointers.

When CONFIG_SPARSE_IRQ=y is used, we use kzalloc_node to get irq_desc,
this also makes the IRQ descriptors NUMA-local (to the site that calls
request_irq()).

This gets rid of the irq_cfg[] static array on x86 as well: irq_cfg now
uses desc->chip_data for x86 to store irq_cfg.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Yinghai Lu
2008-12-05 18:58:31 -08:00
committed by Ingo Molnar
parent 218d11a8b0
commit 0b8f1efad3
23 changed files with 659 additions and 173 deletions

View File

@@ -18,6 +18,8 @@
#include <asm/ptrace.h>
#include <asm/system.h>
extern int nr_irqs;
/*
* These correspond to the IORESOURCE_IRQ_* defines in
* linux/ioport.h to select the interrupt line behaviour. When

View File

@@ -129,6 +129,8 @@ struct irq_chip {
const char *typename;
};
struct timer_rand_state;
struct irq_2_iommu;
/**
* struct irq_desc - interrupt descriptor
* @irq: interrupt number for this descriptor
@@ -154,6 +156,13 @@ struct irq_chip {
*/
struct irq_desc {
unsigned int irq;
#ifdef CONFIG_SPARSE_IRQ
struct timer_rand_state *timer_rand_state;
unsigned int *kstat_irqs;
# ifdef CONFIG_INTR_REMAP
struct irq_2_iommu *irq_2_iommu;
# endif
#endif
irq_flow_handler_t handle_irq;
struct irq_chip *chip;
struct msi_desc *msi_desc;
@@ -181,13 +190,51 @@ struct irq_desc {
const char *name;
} ____cacheline_internodealigned_in_smp;
extern void early_irq_init(void);
extern void arch_early_irq_init(void);
extern void arch_init_chip_data(struct irq_desc *desc, int cpu);
extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
struct irq_desc *desc, int cpu);
extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
#ifndef CONFIG_SPARSE_IRQ
extern struct irq_desc irq_desc[NR_IRQS];
static inline struct irq_desc *irq_to_desc(unsigned int irq)
{
return (irq < nr_irqs) ? irq_desc + irq : NULL;
return (irq < NR_IRQS) ? irq_desc + irq : NULL;
}
static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
{
return irq_to_desc(irq);
}
#ifdef CONFIG_GENERIC_HARDIRQS
# define for_each_irq_desc(irq, desc) \
for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
# define for_each_irq_desc_reverse(irq, desc) \
for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1); \
irq >= 0; irq--, desc--)
#endif
#else
extern struct irq_desc *irq_to_desc(unsigned int irq);
extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
# define for_each_irq_desc(irq, desc) \
for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; irq++, desc = irq_to_desc(irq))
# define for_each_irq_desc_reverse(irq, desc) \
for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; irq--, desc = irq_to_desc(irq))
#define kstat_irqs_this_cpu(DESC) \
((DESC)->kstat_irqs[smp_processor_id()])
#define kstat_incr_irqs_this_cpu(irqno, DESC) \
((DESC)->kstat_irqs[smp_processor_id()]++)
#endif
/*
* Migration helpers for obsolete names, they will go away:
@@ -380,6 +427,11 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
#define get_irq_data(irq) (irq_to_desc(irq)->handler_data)
#define get_irq_msi(irq) (irq_to_desc(irq)->msi_desc)
#define get_irq_desc_chip(desc) ((desc)->chip)
#define get_irq_desc_chip_data(desc) ((desc)->chip_data)
#define get_irq_desc_data(desc) ((desc)->handler_data)
#define get_irq_desc_msi(desc) ((desc)->msi_desc)
#endif /* CONFIG_GENERIC_HARDIRQS */
#endif /* !CONFIG_S390 */

View File

@@ -7,18 +7,10 @@
# define for_each_irq_desc(irq, desc) \
for (irq = 0; irq < nr_irqs; irq++)
#else
extern int nr_irqs;
# define for_each_irq_desc(irq, desc) \
for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
# define for_each_irq_desc_reverse(irq, desc) \
for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1); \
irq >= 0; irq--, desc--)
static inline early_sparse_irq_init(void)
{
}
#endif
#define for_each_irq_nr(irq) \
for (irq = 0; irq < nr_irqs; irq++)
#endif

View File

@@ -28,7 +28,9 @@ struct cpu_usage_stat {
struct kernel_stat {
struct cpu_usage_stat cpustat;
unsigned int irqs[NR_IRQS];
#ifndef CONFIG_SPARSE_IRQ
unsigned int irqs[NR_IRQS];
#endif
};
DECLARE_PER_CPU(struct kernel_stat, kstat);
@@ -39,6 +41,10 @@ DECLARE_PER_CPU(struct kernel_stat, kstat);
extern unsigned long long nr_context_switches(void);
#ifndef CONFIG_SPARSE_IRQ
#define kstat_irqs_this_cpu(irq) \
(kstat_this_cpu.irqs[irq])
struct irq_desc;
static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
@@ -46,11 +52,17 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
{
kstat_this_cpu.irqs[irq]++;
}
#endif
#ifndef CONFIG_SPARSE_IRQ
static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
{
return kstat_cpu(cpu).irqs[irq];
}
#else
extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
#endif
/*
* Number of interrupts per specific IRQ source, since bootup

View File

@@ -44,6 +44,57 @@ struct rand_pool_info {
extern void rand_initialize_irq(int irq);
struct timer_rand_state;
#ifndef CONFIG_SPARSE_IRQ
extern struct timer_rand_state *irq_timer_state[];
extern int nr_irqs;
static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq)
{
if (irq >= nr_irqs)
return NULL;
return irq_timer_state[irq];
}
static inline void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
{
if (irq >= nr_irqs)
return;
irq_timer_state[irq] = state;
}
#else
#include <linux/irq.h>
static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq)
{
struct irq_desc *desc;
desc = irq_to_desc(irq);
if (!desc)
return NULL;
return desc->timer_rand_state;
}
static inline void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
{
struct irq_desc *desc;
desc = irq_to_desc(irq);
if (!desc)
return;
desc->timer_rand_state = state;
}
#endif
extern void add_input_randomness(unsigned int type, unsigned int code,
unsigned int value);
extern void add_interrupt_randomness(int irq);