MALI: rockchip: upgrade midgard DDK to r9p0-05rel0

Conflicts:

	drivers/gpu/arm/midgard/Kconfig

Change-Id: Ib7975ebe959624bedd92f126768987f2e2f0f84b
Signed-off-by: chenzhen <chenzhen@rock-chips.com>
This commit is contained in:
chenzhen
2016-02-19 16:58:15 +08:00
committed by Huang, Tao
parent c9a873e3ed
commit 206f372ede
34 changed files with 494 additions and 271 deletions

View File

@@ -15,7 +15,7 @@
# Driver version string which is returned to userspace via an ioctl
MALI_RELEASE_NAME ?= "r8p0-02rel0"
MALI_RELEASE_NAME ?= "r9p0-05rel0"
# Paths required for build
KBASE_PATH = $(src)

View File

@@ -63,14 +63,6 @@ config MALI_MIDGARD_DVFS
help
Choose this option to enable legacy DVFS in the Mali Midgard DDK.
config MALI_MIDGARD_RT_PM
bool "Enable Runtime power management"
depends on MALI_MIDGARD
depends on PM
default y
help
Choose this option to enable runtime power management in the Mali Midgard DDK.
config MALI_MIDGARD_ENABLE_TRACE
bool "Enable kbase tracing"
depends on MALI_MIDGARD
@@ -79,13 +71,6 @@ config MALI_MIDGARD_ENABLE_TRACE
Enables tracing in kbase. Trace log available through
the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
config MALI_MIDGARD_DEBUG_SYS
bool "Enable sysfs for the Mali Midgard DDK "
depends on MALI_MIDGARD && SYSFS
default n
help
Enables sysfs for the Mali Midgard DDK. Set/Monitor the Mali Midgard DDK
config MALI_DEVFREQ
bool "devfreq support for Mali"
depends on MALI_MIDGARD && PM_DEVFREQ
@@ -109,7 +94,7 @@ menuconfig MALI_EXPERT
config MALI_DEBUG_SHADER_SPLIT_FS
bool "Allow mapping of shader cores via sysfs"
depends on MALI_MIDGARD && MALI_MIDGARD_DEBUG_SYS && MALI_EXPERT
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
Select this option to provide a sysfs entry for runtime configuration of shader

View File

@@ -63,6 +63,9 @@ struct slot_rb {
* @scheduling_timer: The timer tick used for rescheduling jobs
* @timer_running: Is the timer running? The runpool_mutex must be
* held whilst modifying this.
* @suspend_timer: Is the timer suspended? Set when a suspend
* occurs and cleared on resume. The runpool_mutex
* must be held whilst modifying this.
* @reset_gpu: Set to a KBASE_RESET_xxx value (see comments)
* @reset_workq: Work queue for performing the reset
* @reset_work: Work item for performing the reset
@@ -80,6 +83,7 @@ struct kbase_backend_data {
struct hrtimer scheduling_timer;
bool timer_running;
bool suspend_timer;
atomic_t reset_gpu;

View File

@@ -144,14 +144,14 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
"ctx_nr,atom_nr");
#endif
#ifdef CONFIG_GPU_TRACEPOINTS
if (kbase_backend_nr_atoms_submitted(kbdev, js) == 1) {
if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
/* If this is the only job on the slot, trace it as starting */
char js_string[16];
trace_gpu_sched_switch(
kbasep_make_job_slot_string(js, js_string),
ktime_to_ns(katom->start_timestamp),
(u32)katom->kctx, 0, katom->work_id);
(u32)katom->kctx->id, 0, katom->work_id);
kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
}
#endif
@@ -413,6 +413,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
failed = done >> 16;
finished = (done & 0xFFFF) | failed;
if (done)
end_timestamp = ktime_get();
} while (finished & (1 << i));
kbasep_job_slot_update_head_start_timestamp(kbdev, i,
@@ -819,6 +821,14 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
mutex_unlock(&kctx->jctx.lock);
}
u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
{
u32 flush_id = 0;
return flush_id;
}
int kbase_job_slot_init(struct kbase_device *kbdev)
{
#if KBASE_GPU_RESET_EN

View File

@@ -1060,7 +1060,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
js_string),
ktime_to_ns(*end_timestamp),
(u32)next_katom->kctx, 0,
(u32)next_katom->kctx->id, 0,
next_katom->work_id);
kbdev->hwaccess.backend.slot_rb[js].last_context =
next_katom->kctx;

View File

@@ -38,10 +38,15 @@
*/
static inline bool timer_callback_should_run(struct kbase_device *kbdev)
{
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
s8 nr_running_ctxs;
lockdep_assert_held(&kbdev->js_data.runpool_mutex);
/* Timer must stop if we are suspending */
if (backend->suspend_timer)
return false;
/* nr_contexts_pullable is updated with the runpool_mutex. However, the
* locking in the caller gives us a barrier that ensures
* nr_contexts_pullable is up-to-date for reading */
@@ -270,7 +275,6 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
backend->timer_running = false;
spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
/* From now on, return value of timer_callback_should_run() will
* also cause the timer to not requeue itself. Its return value
* cannot change, because it depends on variables updated with
@@ -284,7 +288,6 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
backend->timer_running = true;
spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
hrtimer_start(&backend->scheduling_timer,
HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
HRTIMER_MODE_REL);
@@ -314,3 +317,21 @@ void kbase_backend_timer_term(struct kbase_device *kbdev)
hrtimer_cancel(&backend->scheduling_timer);
}
void kbase_backend_timer_suspend(struct kbase_device *kbdev)
{
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
backend->suspend_timer = true;
kbase_backend_ctx_count_changed(kbdev);
}
void kbase_backend_timer_resume(struct kbase_device *kbdev)
{
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
backend->suspend_timer = false;
kbase_backend_ctx_count_changed(kbdev);
}

View File

@@ -41,4 +41,29 @@ int kbase_backend_timer_init(struct kbase_device *kbdev);
*/
void kbase_backend_timer_term(struct kbase_device *kbdev);
/**
* kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling
* timer
* @kbdev: Device pointer
*
* This function should be called on suspend, after the active count has reached
* zero. This is required as the timer may have been started on job submission
* to the job scheduler, but before jobs are submitted to the GPU.
*
* Caller must hold runpool_mutex.
*/
void kbase_backend_timer_suspend(struct kbase_device *kbdev);
/**
* kbase_backend_timer_resume - Resume is happening, re-evaluate the JS
* scheduling timer
* @kbdev: Device pointer
*
* This function should be called on resume. Note that is is not guaranteed to
* re-start the timer, only evalute whether it should be re-started.
*
* Caller must hold runpool_mutex.
*/
void kbase_backend_timer_resume(struct kbase_device *kbdev);
#endif /* _KBASE_JS_BACKEND_H_ */

View File

@@ -29,6 +29,7 @@
#include <mali_kbase_pm.h>
#include <backend/gpu/mali_kbase_jm_internal.h>
#include <backend/gpu/mali_kbase_js_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
void kbase_pm_register_access_enable(struct kbase_device *kbdev)
@@ -363,6 +364,8 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
WARN_ON(!kbase_pm_do_poweroff(kbdev, false));
}
kbase_backend_timer_suspend(kbdev);
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
}
@@ -373,8 +376,12 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
mutex_lock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
kbdev->pm.suspending = false;
kbase_pm_do_poweron(kbdev, true);
kbase_backend_timer_resume(kbdev);
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
}

View File

@@ -1066,6 +1066,13 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;
if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) {
if (prod_id < 0x760 || prod_id == 0x6956) /* T60x, T62x, T72x */
kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE;
else if (prod_id >= 0x760 && prod_id <= 0x880) /* T76x, T8xx */
kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES;
}
kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
GPU_CONTROL_REG(TILER_CONFIG), NULL);
@@ -1230,10 +1237,10 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
RESET_COMPLETED) {
/* The interrupt is set in the RAWSTAT; this suggests that the
* interrupts are not getting to the CPU */
dev_warn(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
/* If interrupts aren't working we can't continue. */
destroy_hrtimer_on_stack(&rtdata.timer);
goto out;
return -EINVAL;
}
/* The GPU doesn't seem to be responding to the reset so try a hard

View File

@@ -57,7 +57,6 @@ enum base_hw_issue {
BASE_HW_ISSUE_8986,
BASE_HW_ISSUE_8987,
BASE_HW_ISSUE_9010,
BASE_HW_ISSUE_9275,
BASE_HW_ISSUE_9418,
BASE_HW_ISSUE_9423,
BASE_HW_ISSUE_9435,
@@ -102,6 +101,7 @@ enum base_hw_issue {
BASE_HW_ISSUE_T76X_3953,
BASE_HW_ISSUE_T76X_3960,
BASE_HW_ISSUE_T76X_3966,
GPUCORE_1619,
BASE_HW_ISSUE_END
};
@@ -141,7 +141,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
BASE_HW_ISSUE_8986,
BASE_HW_ISSUE_8987,
BASE_HW_ISSUE_9010,
BASE_HW_ISSUE_9275,
BASE_HW_ISSUE_9418,
BASE_HW_ISSUE_9423,
BASE_HW_ISSUE_9435,
@@ -168,6 +167,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
BASE_HW_ISSUE_11035,
BASE_HW_ISSUE_11051,
BASE_HW_ISSUE_T76X_1909,
GPUCORE_1619,
BASE_HW_ISSUE_END
};
@@ -182,7 +182,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
BASE_HW_ISSUE_8778,
BASE_HW_ISSUE_8975,
BASE_HW_ISSUE_9010,
BASE_HW_ISSUE_9275,
BASE_HW_ISSUE_9418,
BASE_HW_ISSUE_9423,
BASE_HW_ISSUE_9435,
@@ -219,7 +218,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
BASE_HW_ISSUE_8778,
BASE_HW_ISSUE_8975,
BASE_HW_ISSUE_9010,
BASE_HW_ISSUE_9275,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_9510,
BASE_HW_ISSUE_10410,
@@ -494,7 +492,6 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
static const enum base_hw_issue base_hw_issues_model_t72x[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_6402,
BASE_HW_ISSUE_9275,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_10471,
BASE_HW_ISSUE_10797,
@@ -502,12 +499,12 @@ static const enum base_hw_issue base_hw_issues_model_t72x[] = {
BASE_HW_ISSUE_11051,
BASE_HW_ISSUE_T76X_1909,
BASE_HW_ISSUE_T76X_1963,
GPUCORE_1619,
BASE_HW_ISSUE_END
};
static const enum base_hw_issue base_hw_issues_model_t76x[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9275,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_11020,
BASE_HW_ISSUE_11024,
@@ -518,6 +515,7 @@ static const enum base_hw_issue base_hw_issues_model_t76x[] = {
BASE_HW_ISSUE_T76X_3086,
BASE_HW_ISSUE_T76X_3700,
BASE_HW_ISSUE_T76X_3793,
GPUCORE_1619,
BASE_HW_ISSUE_END
};
@@ -525,7 +523,6 @@ static const enum base_hw_issue base_hw_issues_model_t60x[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_6402,
BASE_HW_ISSUE_8778,
BASE_HW_ISSUE_9275,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_10472,
BASE_HW_ISSUE_10931,
@@ -535,6 +532,7 @@ static const enum base_hw_issue base_hw_issues_model_t60x[] = {
BASE_HW_ISSUE_11051,
BASE_HW_ISSUE_T76X_1909,
BASE_HW_ISSUE_T76X_1963,
GPUCORE_1619,
BASE_HW_ISSUE_END
};
@@ -551,6 +549,7 @@ static const enum base_hw_issue base_hw_issues_model_t62x[] = {
BASE_HW_ISSUE_11051,
BASE_HW_ISSUE_T76X_1909,
BASE_HW_ISSUE_T76X_1963,
GPUCORE_1619,
BASE_HW_ISSUE_END
};
@@ -619,13 +618,13 @@ static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = {
static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9275,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_11051,
BASE_HW_ISSUE_T76X_1963,
BASE_HW_ISSUE_T76X_3086,
BASE_HW_ISSUE_T76X_3700,
BASE_HW_ISSUE_T76X_3793,
GPUCORE_1619,
BASE_HW_ISSUE_END
};
@@ -677,13 +676,13 @@ static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = {
static const enum base_hw_issue base_hw_issues_model_t86x[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9275,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_11051,
BASE_HW_ISSUE_T76X_1963,
BASE_HW_ISSUE_T76X_3086,
BASE_HW_ISSUE_T76X_3700,
BASE_HW_ISSUE_T76X_3793,
GPUCORE_1619,
BASE_HW_ISSUE_END
};
@@ -720,7 +719,6 @@ static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = {
static const enum base_hw_issue base_hw_issues_model_t83x[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9275,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_11051,
BASE_HW_ISSUE_T76X_1909,
@@ -728,6 +726,7 @@ static const enum base_hw_issue base_hw_issues_model_t83x[] = {
BASE_HW_ISSUE_T76X_3086,
BASE_HW_ISSUE_T76X_3700,
BASE_HW_ISSUE_T76X_3793,
GPUCORE_1619,
BASE_HW_ISSUE_END
};
@@ -780,7 +779,6 @@ static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = {
static const enum base_hw_issue base_hw_issues_model_t82x[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9275,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_11051,
BASE_HW_ISSUE_T76X_1909,
@@ -788,6 +786,7 @@ static const enum base_hw_issue base_hw_issues_model_t82x[] = {
BASE_HW_ISSUE_T76X_3086,
BASE_HW_ISSUE_T76X_3700,
BASE_HW_ISSUE_T76X_3793,
GPUCORE_1619,
BASE_HW_ISSUE_END
};

View File

@@ -772,11 +772,12 @@ static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, str
/**
* @brief External resource info initialization.
*
* Sets up a external resource object to reference
* Sets up an external resource object to reference
* a memory allocation and the type of access requested.
*
* @param[in] res The resource object to initialize
* @param handle The handle to the imported memory object
* @param handle The handle to the imported memory object, must be
* obtained by calling @ref base_mem_as_import_handle().
* @param access The type of access requested
*/
static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access)

View File

@@ -196,7 +196,8 @@ bool kbase_replay_process(struct kbase_jd_atom *katom);
/* api used internally for register access. Contains validation and tracing */
void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size_t size);
int kbase_device_trace_buffer_install(
struct kbase_context *kctx, u32 *tb, size_t size);
void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
/* api to be ported per OS, only need to do the raw register access */

View File

@@ -66,7 +66,9 @@
#include <linux/io.h>
#include <linux/mm.h>
#include <linux/compat.h> /* is_compat_task */
#include <linux/mman.h>
#include <linux/version.h>
#include <linux/security.h>
#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
#include <linux/pm_runtime.h>
#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
@@ -1483,99 +1485,154 @@ static int kbase_check_flags(int flags)
return 0;
}
#ifdef CONFIG_64BIT
/* The following function is taken from the kernel and just
* renamed. As it's not exported to modules we must copy-paste it here.
*/
static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
*info)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long length, low_limit, high_limit, gap_start, gap_end;
/* Adjust search length to account for worst case alignment overhead */
length = info->length + info->align_mask;
if (length < info->length)
return -ENOMEM;
/*
* Adjust search limits by the desired length.
* See implementation comment at top of unmapped_area().
*/
gap_end = info->high_limit;
if (gap_end < length)
return -ENOMEM;
high_limit = gap_end - length;
if (info->low_limit > high_limit)
return -ENOMEM;
low_limit = info->low_limit + length;
/* Check highest gap, which does not precede any rbtree node */
gap_start = mm->highest_vm_end;
if (gap_start <= high_limit)
goto found_highest;
/* Check if rbtree root looks promising */
if (RB_EMPTY_ROOT(&mm->mm_rb))
return -ENOMEM;
vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
if (vma->rb_subtree_gap < length)
return -ENOMEM;
while (true) {
/* Visit right subtree if it looks promising */
gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
if (gap_start <= high_limit && vma->vm_rb.rb_right) {
struct vm_area_struct *right =
rb_entry(vma->vm_rb.rb_right,
struct vm_area_struct, vm_rb);
if (right->rb_subtree_gap >= length) {
vma = right;
continue;
}
}
check_current:
/* Check if current node has a suitable gap */
gap_end = vma->vm_start;
if (gap_end < low_limit)
return -ENOMEM;
if (gap_start <= high_limit && gap_end - gap_start >= length)
goto found;
/* Visit left subtree if it looks promising */
if (vma->vm_rb.rb_left) {
struct vm_area_struct *left =
rb_entry(vma->vm_rb.rb_left,
struct vm_area_struct, vm_rb);
if (left->rb_subtree_gap >= length) {
vma = left;
continue;
}
}
/* Go back up the rbtree to find next candidate node */
while (true) {
struct rb_node *prev = &vma->vm_rb;
if (!rb_parent(prev))
return -ENOMEM;
vma = rb_entry(rb_parent(prev),
struct vm_area_struct, vm_rb);
if (prev == vma->vm_rb.rb_right) {
gap_start = vma->vm_prev ?
vma->vm_prev->vm_end : 0;
goto check_current;
}
}
}
found:
/* We found a suitable gap. Clip it with the original high_limit. */
if (gap_end > info->high_limit)
gap_end = info->high_limit;
found_highest:
/* Compute highest gap address at the desired alignment */
gap_end -= info->length;
gap_end -= (gap_end - info->align_offset) & info->align_mask;
VM_BUG_ON(gap_end < info->low_limit);
VM_BUG_ON(gap_end < gap_start);
return gap_end;
}
static unsigned long kbase_get_unmapped_area(struct file *filp,
const unsigned long addr, const unsigned long len,
const unsigned long pgoff, const unsigned long flags)
{
#ifdef CONFIG_64BIT
/* based on get_unmapped_area, but simplified slightly due to that some
* values are known in advance */
struct kbase_context *kctx = filp->private_data;
struct mm_struct *mm = current->mm;
struct vm_unmapped_area_info info;
if (!kctx->is_compat && !addr &&
kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) {
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long low_limit, high_limit, gap_start, gap_end;
/* err on fixed address */
if ((flags & MAP_FIXED) || addr)
return -EINVAL;
/* Hardware has smaller VA than userspace, ensure the page
* comes from a VA which can be used on the GPU */
/* too big? */
if (len > TASK_SIZE - SZ_2M)
return -ENOMEM;
gap_end = (1UL<<33);
if (gap_end < len)
return -ENOMEM;
high_limit = gap_end - len;
low_limit = PAGE_SIZE + len;
if (kctx->is_compat)
return current->mm->get_unmapped_area(filp, addr, len, pgoff,
flags);
gap_start = mm->highest_vm_end;
if (gap_start <= high_limit)
goto found_highest;
if (RB_EMPTY_ROOT(&mm->mm_rb))
return -ENOMEM;
vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
if (vma->rb_subtree_gap < len)
return -ENOMEM;
while (true) {
gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
if (gap_start <= high_limit && vma->vm_rb.rb_right) {
struct vm_area_struct *right =
rb_entry(vma->vm_rb.rb_right,
struct vm_area_struct, vm_rb);
if (right->rb_subtree_gap >= len) {
vma = right;
continue;
}
}
check_current:
gap_end = vma->vm_start;
if (gap_end < low_limit)
return -ENOMEM;
if (gap_start <= high_limit &&
gap_end - gap_start >= len)
goto found;
if (vma->vm_rb.rb_left) {
struct vm_area_struct *left =
rb_entry(vma->vm_rb.rb_left,
struct vm_area_struct, vm_rb);
if (left->rb_subtree_gap >= len) {
vma = left;
continue;
}
}
while (true) {
struct rb_node *prev = &vma->vm_rb;
if (!rb_parent(prev))
return -ENOMEM;
vma = rb_entry(rb_parent(prev),
struct vm_area_struct, vm_rb);
if (prev == vma->vm_rb.rb_right) {
gap_start = vma->vm_prev ?
vma->vm_prev->vm_end : 0;
goto check_current;
}
}
if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) {
info.high_limit = 1ul << 33;
info.align_mask = 0;
info.align_offset = 0;
} else {
info.high_limit = mm->mmap_base;
if (len >= SZ_2M) {
info.align_offset = SZ_2M;
info.align_mask = SZ_2M - 1;
} else {
info.align_mask = 0;
info.align_offset = 0;
}
found:
if (gap_end > (1UL<<33))
gap_end = (1UL<<33);
found_highest:
gap_end -= len;
VM_BUG_ON(gap_end < PAGE_SIZE);
VM_BUG_ON(gap_end < gap_start);
return gap_end;
}
#endif
/* No special requirements - fallback to the default version */
return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
info.flags = 0;
info.length = len;
info.low_limit = SZ_2M;
return kbase_unmapped_area_topdown(&info);
}
#endif
static const struct file_operations kbase_fops = {
.owner = THIS_MODULE,
@@ -1587,7 +1644,9 @@ static const struct file_operations kbase_fops = {
.compat_ioctl = kbase_ioctl,
.mmap = kbase_mmap,
.check_flags = kbase_check_flags,
#ifdef CONFIG_64BIT
.get_unmapped_area = kbase_get_unmapped_area,
#endif
};
#ifndef CONFIG_MALI_NO_MALI
@@ -3876,7 +3935,7 @@ static int kbase_device_runtime_suspend(struct device *dev)
*/
#ifdef KBASE_PM_RUNTIME
int kbase_device_runtime_resume(struct device *dev)
static int kbase_device_runtime_resume(struct device *dev)
{
int ret = 0;
struct kbase_device *kbdev = to_kbase_device(dev);

View File

@@ -433,12 +433,12 @@ void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
/* We need allocate double size register range
* Because this memory will keep the register address and value
*/
kctx->reg_dump = kmalloc(0x4000 * 2, GFP_KERNEL);
kctx->reg_dump = vmalloc(0x4000 * 2);
if (kctx->reg_dump == NULL)
return;
if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
kfree(kctx->reg_dump);
vfree(kctx->reg_dump);
kctx->reg_dump = NULL;
}
INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
@@ -451,7 +451,7 @@ void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
*/
void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
{
kfree(kctx->reg_dump);
vfree(kctx->reg_dump);
}
#else /* CONFIG_DEBUG_FS */

View File

@@ -144,6 +144,8 @@
#define MIDGARD_MMU_TOPLEVEL 1
#endif
#define MIDGARD_MMU_BOTTOMLEVEL 3
#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)
/** setting in kbase_context::as_nr that indicates it's invalid */
@@ -386,6 +388,9 @@ struct kbase_jd_atom {
/* Pointer to atom that has cross-slot dependency on this atom */
struct kbase_jd_atom *x_post_dep;
/* The GPU's flush count recorded at the time of submission, used for
* the cache flush optimisation */
u32 flush_id;
struct kbase_jd_atom_backend backend;
#ifdef CONFIG_DEBUG_FS
@@ -934,10 +939,6 @@ struct kbase_device {
struct list_head kctx_list;
struct mutex kctx_list_lock;
#ifdef CONFIG_MALI_MIDGARD_RT_PM
struct delayed_work runtime_pm_workqueue;
#endif
#ifdef CONFIG_PM_DEVFREQ
struct devfreq_dev_profile devfreq_profile;
struct devfreq *devfreq;
@@ -1216,6 +1217,9 @@ struct kbase_context {
struct list_head completed_jobs;
/* Number of work items currently pending on job_done_wq */
atomic_t work_count;
/* true if context is counted in kbdev->js_data.nr_contexts_runnable */
bool ctx_runnable_ref;
};
enum kbase_reg_access_type {

View File

@@ -257,13 +257,20 @@ void kbase_device_free(struct kbase_device *kbdev)
kfree(kbdev);
}
void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size_t size)
int kbase_device_trace_buffer_install(
struct kbase_context *kctx, u32 *tb, size_t size)
{
unsigned long flags;
KBASE_DEBUG_ASSERT(kctx);
KBASE_DEBUG_ASSERT(tb);
/* Interface uses 16-bit value to track last accessed entry. Each entry
* is composed of two 32-bit words.
* This limits the size that can be handled without an overflow. */
if (0xFFFF * (2 * sizeof(u32)) < size)
return -EINVAL;
/* set up the header */
/* magic number in the first 4 bytes */
tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
@@ -278,6 +285,8 @@ void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size
kctx->jctx.tb_wrap_offset = size / 8;
kctx->jctx.tb = tb;
spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
return 0;
}
void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)

View File

@@ -221,7 +221,7 @@ static const char * const hardware_counters_mali_t60x[] = {
"T60x_LSC_DIRTY_LINE",
"T60x_LSC_SNOOPS",
"T60x_AXI_TLB_STALL",
"T60x_AXI_TLB_MIESS",
"T60x_AXI_TLB_MISS",
"T60x_AXI_TLB_TRANSACTION",
"T60x_LS_TLB_MISS",
"T60x_LS_TLB_HIT",
@@ -486,7 +486,7 @@ static const char * const hardware_counters_mali_t62x[] = {
"T62x_LSC_DIRTY_LINE",
"T62x_LSC_SNOOPS",
"T62x_AXI_TLB_STALL",
"T62x_AXI_TLB_MIESS",
"T62x_AXI_TLB_MISS",
"T62x_AXI_TLB_TRANSACTION",
"T62x_LS_TLB_MISS",
"T62x_LS_TLB_HIT",
@@ -1018,7 +1018,7 @@ static const char * const hardware_counters_mali_t76x[] = {
"T76x_LSC_DIRTY_LINE",
"T76x_LSC_SNOOPS",
"T76x_AXI_TLB_STALL",
"T76x_AXI_TLB_MIESS",
"T76x_AXI_TLB_MISS",
"T76x_AXI_TLB_TRANSACTION",
"T76x_LS_TLB_MISS",
"T76x_LS_TLB_HIT",
@@ -1284,7 +1284,7 @@ static const char * const hardware_counters_mali_t82x[] = {
"T82x_LSC_DIRTY_LINE",
"T82x_LSC_SNOOPS",
"T82x_AXI_TLB_STALL",
"T82x_AXI_TLB_MIESS",
"T82x_AXI_TLB_MISS",
"T82x_AXI_TLB_TRANSACTION",
"T82x_LS_TLB_MISS",
"T82x_LS_TLB_HIT",
@@ -1550,7 +1550,7 @@ static const char * const hardware_counters_mali_t83x[] = {
"T83x_LSC_DIRTY_LINE",
"T83x_LSC_SNOOPS",
"T83x_AXI_TLB_STALL",
"T83x_AXI_TLB_MIESS",
"T83x_AXI_TLB_MISS",
"T83x_AXI_TLB_TRANSACTION",
"T83x_LS_TLB_MISS",
"T83x_LS_TLB_HIT",
@@ -1816,7 +1816,7 @@ static const char * const hardware_counters_mali_t86x[] = {
"T86x_LSC_DIRTY_LINE",
"T86x_LSC_SNOOPS",
"T86x_AXI_TLB_STALL",
"T86x_AXI_TLB_MIESS",
"T86x_AXI_TLB_MISS",
"T86x_AXI_TLB_TRANSACTION",
"T86x_LS_TLB_MISS",
"T86x_LS_TLB_HIT",
@@ -2082,7 +2082,7 @@ static const char * const hardware_counters_mali_t88x[] = {
"T88x_LSC_DIRTY_LINE",
"T88x_LSC_SNOOPS",
"T88x_AXI_TLB_STALL",
"T88x_AXI_TLB_MIESS",
"T88x_AXI_TLB_MISS",
"T88x_AXI_TLB_TRANSACTION",
"T88x_LS_TLB_MISS",
"T88x_LS_TLB_HIT",

View File

@@ -32,7 +32,6 @@
static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
{
ssize_t ret = 0;
struct list_head *entry;
const struct list_head *kbdev_list;
@@ -58,7 +57,7 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
mutex_unlock(&kbdev->kctx_list_lock);
}
kbase_dev_list_put(kbdev_list);
return ret;
return 0;
}
/*

View File

@@ -254,6 +254,15 @@ void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx);
*/
void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx);
/**
* kbase_backend_get_current_flush_id - Return the current flush ID
*
* @kbdev: Device pointer
*
* Return: the current flush ID to be recorded for each job chain
*/
u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
#if KBASE_GPU_RESET_EN
/**
* kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.

View File

@@ -248,11 +248,6 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
dma_addr_t dma_addr;
unsigned long min;
/* if page already is private, we can't store our
* private data. */
if (PagePrivate(pages[i]))
goto unwind;
min = MIN(PAGE_SIZE - offset, local_size);
dma_addr = dma_map_page(dev, pages[i],
offset, min,
@@ -260,7 +255,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
if (dma_mapping_error(dev, dma_addr))
goto unwind;
kbase_set_dma_addr(pages[i], dma_addr);
alloc->imported.user_buf.dma_addrs[i] = dma_addr;
pa[i] = page_to_phys(pages[i]);
local_size -= min;
@@ -279,7 +274,8 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
/* fall down */
unwind:
while (i--) {
dma_unmap_page(kctx->kbdev->dev, kbase_dma_addr(pages[i]),
dma_unmap_page(kctx->kbdev->dev,
alloc->imported.user_buf.dma_addrs[i],
PAGE_SIZE, DMA_BIDIRECTIONAL);
put_page(pages[i]);
pages[i] = NULL;
@@ -299,12 +295,11 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
pages = alloc->imported.user_buf.pages;
for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
unsigned long local_size;
dma_addr_t dma_addr = kbase_dma_addr(pages[i]);
dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
DMA_BIDIRECTIONAL);
ClearPagePrivate(pages[i]);
if (writeable)
set_page_dirty_lock(pages[i]);
put_page(pages[i]);
@@ -1258,7 +1253,8 @@ bool jd_submit_atom(struct kbase_context *kctx,
#ifdef CONFIG_GPU_TRACEPOINTS
katom->work_id = atomic_inc_return(&jctx->work_id);
trace_gpu_job_enqueue((u32)kctx, katom->work_id, kbasep_map_core_reqs_to_string(katom->core_req));
trace_gpu_job_enqueue((u32)kctx->id, katom->work_id,
kbasep_map_core_reqs_to_string(katom->core_req));
#endif
if (queued && !IS_GPU_ATOM(katom)) {
@@ -1320,6 +1316,7 @@ int kbase_jd_submit(struct kbase_context *kctx,
bool need_to_try_schedule_context = false;
struct kbase_device *kbdev;
void __user *user_addr;
u32 latest_flush;
/*
* kbase_jd_submit isn't expected to fail and so all errors with the jobs
@@ -1349,6 +1346,9 @@ int kbase_jd_submit(struct kbase_context *kctx,
KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(submit_data->nr_atoms, &kctx->timeline.jd_atoms_in_flight));
/* All atoms submitted in this call have the same flush ID */
latest_flush = kbase_backend_get_current_flush_id(kbdev);
for (i = 0; i < submit_data->nr_atoms; i++) {
struct base_jd_atom_v2 user_atom;
struct kbase_jd_atom *katom;
@@ -1424,6 +1424,9 @@ while (false)
#endif
katom = &jctx->atoms[user_atom.atom_number];
/* Record the flush ID for the cache flush optimisation */
katom->flush_id = latest_flush;
while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) {
/* Atom number is already in use, wait for the atom to
* complete

View File

@@ -1,6 +1,6 @@
/*
*
* (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -17,6 +17,8 @@
#include <linux/seq_file.h>
#include <mali_kbase.h>
#include <mali_kbase_jd_debugfs.h>
#ifdef CONFIG_DEBUG_FS
@@ -41,6 +43,13 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
KBASE_DEBUG_ASSERT(kctx != NULL);
/* Print version */
seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION);
/* Print U/K API version */
seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR,
BASE_UK_VERSION_MINOR);
/* Print table heading */
seq_puts(sfile, "atom id,core reqs,status,coreref status,predeps,start time,time on gpu\n");

View File

@@ -1,6 +1,6 @@
/*
*
* (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,6 +27,8 @@
#include <mali_kbase.h>
#define MALI_JD_DEBUGFS_VERSION 1
/**
* kbasep_jd_debugfs_ctx_add() - Add debugfs entries for JD system
*

View File

@@ -921,6 +921,7 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
struct kbasep_js_kctx_info *js_kctx_info;
union kbasep_js_policy *js_policy;
int js;
bool update_ctx_count = false;
KBASE_DEBUG_ASSERT(kctx != NULL);
@@ -937,14 +938,31 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
}
mutex_lock(&kbdev->js_data.queue_mutex);
mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
if (kctx->ctx_runnable_ref) {
WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0);
atomic_dec(&kbdev->js_data.nr_contexts_runnable);
update_ctx_count = true;
kctx->ctx_runnable_ref = false;
}
mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
mutex_unlock(&kbdev->js_data.queue_mutex);
if ((js_kctx_info->init_status & JS_KCTX_INIT_POLICY))
kbasep_js_policy_term_ctx(js_policy, kctx);
js_kctx_info->init_status = JS_KCTX_INIT_NONE;
if (update_ctx_count) {
mutex_lock(&kbdev->js_data.runpool_mutex);
kbase_backend_ctx_count_changed(kbdev);
mutex_unlock(&kbdev->js_data.runpool_mutex);
}
}
/**
@@ -982,8 +1000,11 @@ static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev,
if (!kctx->slots_pullable) {
kbdev->js_data.nr_contexts_pullable++;
ret = true;
if (!atomic_read(&kctx->atoms_pulled))
if (!atomic_read(&kctx->atoms_pulled)) {
WARN_ON(kctx->ctx_runnable_ref);
kctx->ctx_runnable_ref = true;
atomic_inc(&kbdev->js_data.nr_contexts_runnable);
}
}
kctx->slots_pullable |= (1 << js);
@@ -1025,8 +1046,11 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
if (!kctx->slots_pullable) {
kbdev->js_data.nr_contexts_pullable++;
ret = true;
if (!atomic_read(&kctx->atoms_pulled))
if (!atomic_read(&kctx->atoms_pulled)) {
WARN_ON(kctx->ctx_runnable_ref);
kctx->ctx_runnable_ref = true;
atomic_inc(&kbdev->js_data.nr_contexts_runnable);
}
}
kctx->slots_pullable |= (1 << js);
@@ -1065,8 +1089,11 @@ static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev,
if (kctx->slots_pullable == (1 << js)) {
kbdev->js_data.nr_contexts_pullable--;
ret = true;
if (!atomic_read(&kctx->atoms_pulled))
if (!atomic_read(&kctx->atoms_pulled)) {
WARN_ON(!kctx->ctx_runnable_ref);
kctx->ctx_runnable_ref = false;
atomic_dec(&kbdev->js_data.nr_contexts_runnable);
}
}
kctx->slots_pullable &= ~(1 << js);
@@ -1105,8 +1132,11 @@ static bool kbase_js_ctx_list_remove(struct kbase_device *kbdev,
if (kctx->slots_pullable == (1 << js)) {
kbdev->js_data.nr_contexts_pullable--;
ret = true;
if (!atomic_read(&kctx->atoms_pulled))
if (!atomic_read(&kctx->atoms_pulled)) {
WARN_ON(!kctx->ctx_runnable_ref);
kctx->ctx_runnable_ref = false;
atomic_dec(&kbdev->js_data.nr_contexts_runnable);
}
}
kctx->slots_pullable &= ~(1 << js);
@@ -1368,6 +1398,10 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
/* Dependencies could not be represented */
--(js_kctx_info->ctx.nr_jobs);
/* Setting atom status back to queued as it still has unresolved
* dependencies */
atom->status = KBASE_JD_ATOM_STATE_QUEUED;
spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
mutex_unlock(&js_devdata->runpool_mutex);
@@ -2442,8 +2476,11 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
kctx->pulled = true;
pulled = atomic_inc_return(&kctx->atoms_pulled);
if (pulled == 1 && !kctx->slots_pullable)
if (pulled == 1 && !kctx->slots_pullable) {
WARN_ON(kctx->ctx_runnable_ref);
kctx->ctx_runnable_ref = true;
atomic_inc(&kctx->kbdev->js_data.nr_contexts_runnable);
}
atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]);
jsctx_rb_pull(kctx, katom);
@@ -2495,8 +2532,11 @@ static void js_return_worker(struct work_struct *data)
timer_sync |= kbase_js_ctx_list_remove(kbdev, kctx, js);
if (!atomic_read(&kctx->atoms_pulled)) {
if (!kctx->slots_pullable)
if (!kctx->slots_pullable) {
WARN_ON(!kctx->ctx_runnable_ref);
kctx->ctx_runnable_ref = false;
atomic_dec(&kbdev->js_data.nr_contexts_runnable);
}
if (kctx->as_nr != KBASEP_AS_NR_INVALID &&
!js_kctx_info->ctx.is_dying) {
@@ -2698,8 +2738,12 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
context_idle = !atomic_dec_return(&kctx->atoms_pulled);
atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);
if (!atomic_read(&kctx->atoms_pulled) && !kctx->slots_pullable)
if (!atomic_read(&kctx->atoms_pulled) &&
!kctx->slots_pullable) {
WARN_ON(!kctx->ctx_runnable_ref);
kctx->ctx_runnable_ref = false;
atomic_dec(&kbdev->js_data.nr_contexts_runnable);
}
if (katom->event_code != BASE_JD_EVENT_DONE)
kbase_js_compact(kctx);

View File

@@ -708,7 +708,7 @@ void kbasep_js_policy_deregister_job(union kbasep_js_policy *js_policy, struct k
bool kbasep_js_policy_dequeue_job(struct kbase_device *kbdev, int job_slot_idx, struct kbase_jd_atom ** const katom_ptr);
/**
* @brief Requeue a Job back into the the Job Scheduler Policy Run Pool
* @brief Requeue a Job back into the Job Scheduler Policy Run Pool
*
* This will be used to enqueue a job after its creation and also to requeue
* a job into the Run Pool that was previously dequeued (running). It notifies

View File

@@ -1124,6 +1124,8 @@ int kbase_alloc_phy_pages_helper(
struct kbase_mem_phy_alloc *alloc,
size_t nr_pages_requested)
{
int new_page_count __maybe_unused;
KBASE_DEBUG_ASSERT(alloc);
KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
KBASE_DEBUG_ASSERT(alloc->imported.kctx);
@@ -1131,7 +1133,8 @@ int kbase_alloc_phy_pages_helper(
if (nr_pages_requested == 0)
goto done; /*nothing to do*/
kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->used_pages);
new_page_count = kbase_atomic_add_pages(
nr_pages_requested, &alloc->imported.kctx->used_pages);
kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
/* Increase mm counters before we allocate pages so that this
@@ -1143,7 +1146,9 @@ int kbase_alloc_phy_pages_helper(
goto no_alloc;
#if defined(CONFIG_MALI_MIPE_ENABLED)
kbase_tlstream_aux_pagesalloc((s64)nr_pages_requested);
kbase_tlstream_aux_pagesalloc(
(u32)alloc->imported.kctx->id,
(u64)new_page_count);
#endif
alloc->nents += nr_pages_requested;
@@ -1164,6 +1169,7 @@ int kbase_free_phy_pages_helper(
{
bool syncback;
phys_addr_t *start_free;
int new_page_count __maybe_unused;
KBASE_DEBUG_ASSERT(alloc);
KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
@@ -1185,11 +1191,14 @@ int kbase_free_phy_pages_helper(
alloc->nents -= nr_pages_to_free;
kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_to_free);
kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->used_pages);
new_page_count = kbase_atomic_sub_pages(
nr_pages_to_free, &alloc->imported.kctx->used_pages);
kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->kbdev->memdev.used_pages);
#if defined(CONFIG_MALI_MIPE_ENABLED)
kbase_tlstream_aux_pagesalloc(-(s64)nr_pages_to_free);
kbase_tlstream_aux_pagesalloc(
(u32)alloc->imported.kctx->id,
(u64)new_page_count);
#endif
return 0;

View File

@@ -144,6 +144,7 @@ struct kbase_mem_phy_alloc {
struct page **pages;
unsigned int current_mapping_usage_count;
struct task_struct *owner;
dma_addr_t *dma_addrs;
} user_buf;
} imported;
};
@@ -317,12 +318,22 @@ static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg)
static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type)
{
struct kbase_mem_phy_alloc *alloc;
const size_t alloc_size =
sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
size_t per_page_size = sizeof(*alloc->pages);
/* Prevent nr_pages*sizeof + sizeof(*alloc) from wrapping around. */
/* Imported pages may have page private data already in use */
if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
alloc_size += nr_pages *
sizeof(*alloc->imported.user_buf.dma_addrs);
per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs);
}
/*
* Prevent nr_pages*per_page_size + sizeof(*alloc) from
* wrapping around.
*/
if (nr_pages > ((((size_t) -1) - sizeof(*alloc))
/ sizeof(*alloc->pages)))
/ per_page_size))
return ERR_PTR(-ENOMEM);
/* Allocate based on the size to reduce internal fragmentation of vmem */
@@ -345,6 +356,10 @@ static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, en
INIT_LIST_HEAD(&alloc->mappings);
alloc->type = type;
if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
alloc->imported.user_buf.dma_addrs =
(void *) (alloc->pages + nr_pages);
return alloc;
}
@@ -757,7 +772,7 @@ static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
SetPagePrivate(p);
if (sizeof(dma_addr_t) > sizeof(p->private)) {
/* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the
* private filed stays the same. So we have to be clever and
* private field stays the same. So we have to be clever and
* use the fact that we only store DMA addresses of whole pages,
* so the low bits should be zero */
KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1)));

View File

@@ -145,7 +145,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
unsigned long prot = PROT_NONE;
unsigned long va_size = va_pages << PAGE_SHIFT;
unsigned long va_map = va_size;
unsigned long cookie;
unsigned long cookie, cookie_nr;
unsigned long cpu_addr;
/* Bind to a cookie */
@@ -155,15 +155,15 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
goto no_cookie;
}
/* return a cookie */
cookie = __ffs(kctx->cookies);
kctx->cookies &= ~(1UL << cookie);
BUG_ON(kctx->pending_regions[cookie]);
kctx->pending_regions[cookie] = reg;
cookie_nr = __ffs(kctx->cookies);
kctx->cookies &= ~(1UL << cookie_nr);
BUG_ON(kctx->pending_regions[cookie_nr]);
kctx->pending_regions[cookie_nr] = reg;
kbase_gpu_vm_unlock(kctx);
/* relocate to correct base */
cookie += PFN_DOWN(BASE_MEM_COOKIE_BASE);
cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE);
cookie <<= PAGE_SHIFT;
/* See if we must align memory due to GPU PC bits vs CPU VA */
@@ -197,8 +197,11 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot, MAP_SHARED,
cookie);
if (IS_ERR_VALUE(cpu_addr))
if (IS_ERR_VALUE(cpu_addr)) {
kctx->pending_regions[cookie_nr] = NULL;
kctx->cookies |= (1UL << cookie_nr);
goto no_mmap;
}
/*
* If we had to allocate extra VA space to force the
@@ -1440,7 +1443,11 @@ static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_st
goto out;
}
kbase_device_trace_buffer_install(kctx, tb, size);
err = kbase_device_trace_buffer_install(kctx, tb, size);
if (err) {
vfree(tb);
goto out;
}
} else {
err = -EINVAL;
goto out;

View File

@@ -32,18 +32,16 @@
static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data)
{
struct kbase_context *kctx = sfile->private;
int err = 0;
mutex_lock(&kctx->mem_profile_lock);
err = seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);
seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);
if (!err)
seq_putc(sfile, '\n');
seq_putc(sfile, '\n');
mutex_unlock(&kctx->mem_profile_lock);
return err;
return 0;
}
/*

View File

@@ -46,18 +46,18 @@
/**
* kbase_mmu_sync_pgd - sync page directory to memory
* @dev: Device pointer.
* @kbdev: Device pointer.
* @handle: Address of DMA region.
* @size: Size of the region to sync.
*
* This should be called after each page directory update.
*/
static void kbase_mmu_sync_pgd(struct device *dev,
static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
dma_addr_t handle, size_t size)
{
dma_sync_single_for_device(dev, handle, size, DMA_TO_DEVICE);
dma_sync_single_for_device(kbdev->dev, handle, size, DMA_TO_DEVICE);
}
/*
@@ -260,9 +260,7 @@ void page_fault_worker(struct work_struct *data)
kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
#endif
#if defined(CONFIG_MALI_MIPE_ENABLED)
kbase_tlstream_aux_pagefault(
kctx->id,
atomic_read(&kctx->used_pages));
kbase_tlstream_aux_pagefault(kctx->id, (u64)new_pages);
#endif
/* flush L2 and unlock the VA (resumes the MMU) */
@@ -316,15 +314,22 @@ phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
u64 *page;
int i;
struct page *p;
int new_page_count __maybe_unused;
KBASE_DEBUG_ASSERT(NULL != kctx);
kbase_atomic_add_pages(1, &kctx->used_pages);
new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages);
kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
p = kbase_mem_pool_alloc(&kctx->mem_pool);
if (!p)
goto sub_pages;
#if defined(CONFIG_MALI_MIPE_ENABLED)
kbase_tlstream_aux_pagesalloc(
(u32)kctx->id,
(u64)new_page_count);
#endif
page = kmap(p);
if (NULL == page)
goto alloc_free;
@@ -334,7 +339,7 @@ phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
kctx->kbdev->mmu_mode->entry_invalidate(&page[i]);
kbase_mmu_sync_pgd(kctx->kbdev->dev, kbase_dma_addr(p), PAGE_SIZE);
kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
kunmap(p);
return page_to_phys(p);
@@ -388,8 +393,7 @@ static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd,
kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
kbase_mmu_sync_pgd(kctx->kbdev->dev,
kbase_dma_addr(p), PAGE_SIZE);
kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
/* Rely on the caller to update the address space flags. */
}
@@ -404,7 +408,7 @@ static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
pgd = kctx->pgd;
for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) {
for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l);
/* Handle failure condition */
if (!pgd) {
@@ -451,7 +455,7 @@ static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context
pgd = kctx->pgd;
for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) {
for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l);
/* Should never fail */
KBASE_DEBUG_ASSERT(0 != pgd);
@@ -500,9 +504,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vp
vpfn += count;
nr -= count;
kbase_mmu_sync_pgd(kctx->kbdev->dev,
kbase_dma_addr(p),
PAGE_SIZE);
kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
kunmap_atomic(pgd_page);
}
@@ -584,10 +586,9 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
vpfn += count;
nr -= count;
kbase_mmu_sync_pgd(kctx->kbdev->dev,
kbase_dma_addr(p) +
(index * sizeof(u64)),
count * sizeof(u64));
kbase_mmu_sync_pgd(kctx->kbdev,
kbase_dma_addr(p) + (index * sizeof(u64)),
count * sizeof(u64));
kunmap(p);
/* We have started modifying the page table.
@@ -676,10 +677,9 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
vpfn += count;
nr -= count;
kbase_mmu_sync_pgd(kctx->kbdev->dev,
kbase_dma_addr(p) +
(index * sizeof(u64)),
count * sizeof(u64));
kbase_mmu_sync_pgd(kctx->kbdev,
kbase_dma_addr(p) + (index * sizeof(u64)),
count * sizeof(u64));
kunmap(p);
/* We have started modifying the page table. If further pages
@@ -824,10 +824,9 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
vpfn += count;
nr -= count;
kbase_mmu_sync_pgd(kctx->kbdev->dev,
kbase_dma_addr(p) +
(index * sizeof(u64)),
count * sizeof(u64));
kbase_mmu_sync_pgd(kctx->kbdev,
kbase_dma_addr(p) + (index * sizeof(u64)),
count * sizeof(u64));
kunmap(p);
}
@@ -898,10 +897,9 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph
vpfn += count;
nr -= count;
kbase_mmu_sync_pgd(kctx->kbdev->dev,
kbase_dma_addr(p) +
(index * sizeof(u64)),
count * sizeof(u64));
kbase_mmu_sync_pgd(kctx->kbdev,
kbase_dma_addr(p) + (index * sizeof(u64)),
count * sizeof(u64));
kunmap(pfn_to_page(PFN_DOWN(pgd)));
}
@@ -952,7 +950,7 @@ static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int
target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
if (target_pgd) {
if (level < 2) {
if (level < (MIDGARD_MMU_BOTTOMLEVEL - 1)) {
mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64)));
} else {
/*
@@ -1001,6 +999,8 @@ void kbase_mmu_term(struct kbase_context *kctx)
void kbase_mmu_free_pgd(struct kbase_context *kctx)
{
int new_page_count __maybe_unused;
KBASE_DEBUG_ASSERT(NULL != kctx);
KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
@@ -1011,8 +1011,14 @@ void kbase_mmu_free_pgd(struct kbase_context *kctx)
beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd);
kbase_mem_pool_free(&kctx->mem_pool, phys_to_page(kctx->pgd), true);
kbase_process_page_usage_dec(kctx, 1);
kbase_atomic_sub_pages(1, &kctx->used_pages);
new_page_count = kbase_atomic_sub_pages(1, &kctx->used_pages);
kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
#if defined(CONFIG_MALI_MIPE_ENABLED)
kbase_tlstream_aux_pagesalloc(
(u32)kctx->id,
(u64)new_page_count);
#endif
}
KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd);
@@ -1052,16 +1058,21 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
*size_left -= size;
}
for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
if (mmu_mode->pte_is_valid(pgd_page[i])) {
target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
if (level < MIDGARD_MMU_BOTTOMLEVEL) {
for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
if (mmu_mode->pte_is_valid(pgd_page[i])) {
target_pgd = mmu_mode->pte_to_phy_addr(
pgd_page[i]);
dump_size = kbasep_mmu_dump_level(kctx, target_pgd, level + 1, buffer, size_left);
if (!dump_size) {
kunmap(pfn_to_page(PFN_DOWN(pgd)));
return 0;
dump_size = kbasep_mmu_dump_level(kctx,
target_pgd, level + 1,
buffer, size_left);
if (!dump_size) {
kunmap(pfn_to_page(PFN_DOWN(pgd)));
return 0;
}
size += dump_size;
}
size += dump_size;
}
}

View File

@@ -448,15 +448,15 @@ static const struct tp_desc tp_desc_aux[] = {
KBASE_AUX_PAGEFAULT,
__stringify(KBASE_AUX_PAGEFAULT),
"Page fault",
"@II",
"ctx_nr,page_cnt"
"@IL",
"ctx_nr,page_cnt_change"
},
{
KBASE_AUX_PAGESALLOC,
__stringify(KBASE_AUX_PAGESALLOC),
"Total alloc pages change",
"@l",
"page_cnt_change"
"@IL",
"ctx_nr,page_cnt"
}
};
@@ -1998,9 +1998,34 @@ void kbase_tlstream_aux_job_softstop(u32 js_id)
kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
}
void kbase_tlstream_aux_pagefault(u32 ctx_nr, u32 page_count)
void kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change)
{
const u32 msg_id = KBASE_AUX_PAGEFAULT;
const size_t msg_size =
sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
sizeof(page_count_change);
unsigned long flags;
char *buffer;
size_t pos = 0;
buffer = kbasep_tlstream_msgbuf_acquire(
TL_STREAM_TYPE_AUX, msg_size, &flags);
KBASE_DEBUG_ASSERT(buffer);
pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
pos = kbasep_tlstream_write_timestamp(buffer, pos);
pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
pos = kbasep_tlstream_write_bytes(
buffer, pos,
&page_count_change, sizeof(page_count_change));
KBASE_DEBUG_ASSERT(msg_size == pos);
kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
}
void kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count)
{
const u32 msg_id = KBASE_AUX_PAGESALLOC;
const size_t msg_size =
sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
sizeof(page_count);
@@ -2022,26 +2047,3 @@ void kbase_tlstream_aux_pagefault(u32 ctx_nr, u32 page_count)
kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
}
void kbase_tlstream_aux_pagesalloc(s64 page_count_change)
{
const u32 msg_id = KBASE_AUX_PAGESALLOC;
const size_t msg_size =
sizeof(msg_id) + sizeof(u64) + sizeof(page_count_change);
unsigned long flags;
char *buffer;
size_t pos = 0;
buffer = kbasep_tlstream_msgbuf_acquire(
TL_STREAM_TYPE_AUX, msg_size, &flags);
KBASE_DEBUG_ASSERT(buffer);
pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
pos = kbasep_tlstream_write_timestamp(buffer, pos);
pos = kbasep_tlstream_write_bytes(
buffer, pos,
&page_count_change, sizeof(page_count_change));
KBASE_DEBUG_ASSERT(msg_size == pos);
kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
}

View File

@@ -375,18 +375,18 @@ void kbase_tlstream_aux_job_softstop(u32 js_id);
/**
* kbase_tlstream_aux_pagefault - timeline message: MMU page fault event
* resulting in new pages being mapped
* @ctx_nr: kernel context number
* @page_count: number of currently used pages
* @ctx_nr: kernel context number
* @page_count_change: number of pages to be added
*/
void kbase_tlstream_aux_pagefault(u32 ctx_nr, u32 page_count);
void kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
/**
* kbase_tlstream_aux_pagesalloc - timeline message: total number of allocated
* pages is changed
* @page_count_change: number of pages to be added or subtracted (according to
* the sign)
* @ctx_nr: kernel context number
* @page_count: number of pages used by the context
*/
void kbase_tlstream_aux_pagesalloc(s64 page_count_change);
void kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count);
#endif /* _KBASE_TLSTREAM_H */

View File

@@ -493,7 +493,9 @@
#define SC_ALT_COUNTERS (1ul << 3)
#define SC_OVERRIDE_FWD_PIXEL_KILL (1ul << 4)
#define SC_SDC_DISABLE_OQ_DISCARD (1ul << 6)
#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16)
#define SC_LS_PAUSEBUFFER_DISABLE (1ul << 16)
#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18)
#define SC_ENABLE_TEXGRD_FLAGS (1ul << 25)
/* End SHADER_CONFIG register */

View File

@@ -1,8 +0,0 @@
# This confidential and proprietary software may be used only as
# authorised by a licensing agreement from ARM Limited
# (C) COPYRIGHT 2010-2011, 2013 ARM Limited
# ALL RIGHTS RESERVED
# The entire notice above must be reproduced on all authorised
# copies and copies may only be made to the extent permitted
# by a licensing agreement from ARM Limited.

View File

@@ -1,11 +0,0 @@
# This confidential and proprietary software may be used only as
# authorised by a licensing agreement from ARM Limited
# (C) COPYRIGHT 2010-2011, 2013 ARM Limited
# ALL RIGHTS RESERVED
# The entire notice above must be reproduced on all authorised
# copies and copies may only be made to the extent permitted
# by a licensing agreement from ARM Limited.
SConscript( 'customer/sconscript' )
if Glob('internal/sconscript'):
SConscript( 'internal/sconscript' )