/*
 * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files
 * (the "Software"), to deal in the Software without restriction,
 * including without limitation the rights to use, copy, modify, merge,
 * publish, distribute, sublicense, and/or sell copies of the Software,
 * and to permit persons to whom the Software is furnished to do so,
 * subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
#include <platform/boot_secondary.h>

#define DEBUG_CPU_RESET_HANDLER		0
#define SCRATCH_WORDS			8
#define CLK_RESET_CCLK_BURST		0x20
#define CLK_RESET_CCLK_DIVIDER		0x24
#define CLK_RESET_SCLK_BURST		0x28
#define CLK_RESET_SCLK_DIVIDER		0x2c

.globl invalidate_l1_v7
invalidate_l1_v7:
	mov	r0, #0
	mcr	p15, 2, r0, c0, c0, 0
	mrc	p15, 1, r0, c0, c0, 0

	movw	r1, #0x7fff
	and	r2, r1, r0, lsr #13	@ get number of sets

	movw	r1, #0x3ff

	and	r3, r1, r0, lsr #3	@ NumWays - 1
	add	r2, r2, #1		@ NumSets

	and	r0, r0, #0x7
	add	r0, r0, #4		@ SetShift

	clz	r1, r3			@ WayShift
	add	r4, r3, #1		@ NumWays
1:	sub	r2, r2, #1		@ NumSets--
	mov	r3, r4			@ Temp = NumWays
2:	subs	r3, r3, #1		@ Temp--
	mov	r5, r3, lsl r1
	mov	r6, r2, lsl r0
	orr	r5, r5, r6		@ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
	mcr	p15, 0, r5, c7, c6, 2
	bgt	2b
	cmp	r2, #0
	bgt	1b
	dsb
	isb
	mov	pc, lr

.globl _boot_secondary_cpu
_boot_secondary_cpu:
	clrex

	cpsid	aif, 0x13			@ SVC mode, interrupts disabled
#if ARM_CPU_CORTEX_A9
	mrc	p15, 0, r0, c0, c0, 0		@ read main ID register
	and	r5, r0, #0x00f00000		@ variant
	and	r6, r0, #0x0000000f		@ revision
	orr	r6, r6, r5, lsr #20-4		@ combine variant and revision
	/* ARM_ERRATA_743622 */
	teq	r6, #0x20			@ present in r2p0
	teqne	r6, #0x21			@ present in r2p1
	teqne	r6, #0x22			@ present in r2p2
	teqne	r6, #0x27			@ present in r2p7
	teqne	r6, #0x29			@ present in r2p9
	mrceq	p15, 0, r10, c15, c0, 1		@ read diagnostic register
	orreq	r10, r10, #1 << 6		@ set bit #6
	mcreq	p15, 0, r10, c15, c0, 1		@ write diagnostic register
#endif
#if ARM_CPU_CORTEX_A15
	mov	r0, #0
	mcr	p15, 0, r0, c1, c0, 1		@ disable SMP, prefetch, broadcast
	isb
#endif

#if DEBUG_CPU_RESET_HANDLER
	b	.
#endif

#if ARM_CPU_CORTEX_A15
	mrc	p15, 0x1, r0, c15, c0, 3	@ L2 prefetch control reg
	tst	r0, #0x1000
	orreq	r0, r0, #0x1000			@ disable throttling
	mcreq	p15, 0x1, r0, c15, c0, 3

	mrc	p15, 0, r0, c0, c0, 0		@ main ID register
	ubfx    r1, r0, #20, #4
	cmp	r1, #0x3			@ Compare to rev 3
	mrcge	p15, 0x1, r0, c15, c0, 4	@ ACTLR2
	orrge	r0, #(1<<31)			@ Enable regional clock gates
	mcrge	p15, 0x1, r0, c15, c0, 4	@ Rev 3 or higher

#if ARM_CLUSTER0_INIT_L2
	/* This is only needed for cluster 0 with integrated L2 cache */
	mrc	p15, 0, r0, c0, c0, 5
	ubfx	r0, r0, #8, #4
	tst	r0, #1
	bne	__enable_i_cache
	mrc	p15, 0x1, r0, c9, c0, 2
	and	r1, r0, #7
	cmp	r1, #2
	beq	__enable_i_cache
	bic	r0, r0, #7
	orr	r0, r0, #2
	mcr	p15, 0x1, r0, c9, c0, 2
__enable_i_cache:
#endif
	mrc	p15, 0, r0, c1, c0, 0
	orr	r0, r0, #0x1000
	mcr	p15, 0, r0, c1, c0, 0		@ enable i-cache
	b	cpu_context_restore
#endif

#if ARM_CPU_CORTEX_A9
	mcr	p15, 0, r0, c7, c5, 0		@ invalidate BTAC, i-cache
	mcr	p15, 0, r0, c7, c5, 6		@ invalidate branch pred array
	mcr	p15, 0, r0, c8, c5, 0		@ invalidate instruction TLB
	mcr	p15, 0, r0, c8, c6, 0		@ invalidate data TLB
	mcr	p15, 0, r0, c8, c7, 0		@ invalidate unified TLB
	dsb
	isb
#endif

#if ARM_WITH_SCU
	cpu_id	r0
	cmp	r0, #0
	ldrne	r1, =TEGRA_ARM_SCU_BASE
	movne	r0, r0, lsl #2
	movne	r2, #0xf
	movne	r2, r2, lsl r0
	strne	r2, [r1, #0xC]		@ invalidate SCU tags for CPU
#endif

#if ARM_CPU_CORTEX_A9
	dsb
	ldr	r0, =0x1000
	mcr	p15, 0, r0, c1, c0, 0		@ enable i-cache
	isb

	bl	invalidate_l1_v7	@ invalidate D$
#endif

cpu_context_restore:
	ldr	r0, =_cpu_context
	ldr	r1, =VMEMBASE
	sub	r0, r0, r1
	adr	r1, _boot_secondary_phys_base
	ldr	r1, [r1]

	/* restore cpu context */
	add	r0, r0, r1			@ phys base of context
	mov	r1, #1				@ enable MMU
	cpu_restore_context r0, r1, r2, r3

	/* go virtual now */
	ldr	r1, =go_virtual
	mov	pc, r1
go_virtual:

	cpu_id	r0
	cmp	r0, #0				@ CPU 0?
	bne	1110f

	ldr	r0, =svc_scratch_space
	add	sp, r0, #(SCRATCH_WORDS * 4)	@ temporary svc_sp
#if ARM_WITH_SCU
	ldr	r0, =cpu_enable_scu		@ enable SCU
	blx	r0

	ldr	r0, =cpu_enable_scu_access	@ r/w scu regs from all cpus
	blx	r0
#endif
	/* allow full access to CP10 and CP11 */
	mrc	p15, 0, r0, c1, c0, 2		@ CPACR
	orr	r0, r0, #0x00F00000
	mcr	p15, 0, r0, c1, c0, 2		@ CPACR
	isb

	ldr	r0, =pm_early_init
	blx	r0

	ldr	r0, =cpu_power_down_mode
	ldr	r0, [r0]
	cmp	r0, #CPU_IN_LP0
	ldreq	r0, =platform_restore_memory
	blxeq	r0

	/* enable user space perf counter access */
	mrc	p15, 0, r0, c9, c12, 0
	lsr	r0, #11
	and	r0, r0, #0x1f
	mov	r1, #1
	lsl	r1, r1, r0
	sub	r1, r1, #1
	movt	r1, #0x8000
	mcr	p15, 0, r1, c9, c14, 2
	mov	r0, #1
	mcr	p15, 0, r0, c9, c14, 0

	/* enable user space CNTVCT access */
	mrc	p15, 0, r0, c14, c1, 0
	orr	r0, r0, #(1 << 1)
	mcr	p15, 0, r0, c14, c1, 0

1110:
	ldr	r0, =platform_config_interrupts
	blx	r0

	/* enable SCR.FW */
	mov	r0, #(1 << 4)		@ SCR.FW
	mcr	p15, 0, r0, c1, c1, 0
	isb

	/* let normal world enable SMP, lock TLB, access CP10/11 */
	mrc	p15, 0, r0, c1, c1, 2		@ NSACR
	orr	r0, r0, #0x00000C00
	orr	r0, r0, #0x00060000
	mcr	p15, 0, r0, c1, c1, 2		@ NSACR

#if ARM_CPU_CORTEX_A9
	/* normal world can write SMP, but not FW */
	mrc	p15, 0, r0, c1, c0, 1
	orr	r0, r0, #(0x1 << 0)		@ enable FW
	mcr	p15, 0, r0, c1, c0, 1		@ ACTLR
#endif

	/* mark banked CPU intrs (31:0) be non-secure */
	ldr	r0, =cpu_gic_setup
	blx	r0

	/* goto monitor mode */
	cpu_id	r0
	cmp	r0, #0				@ are we CPU #0?
	ldreq	lr, =mon_stack_top
	ldreq	r0, [lr]
	cps	#0x16				@ monitor mode
	moveq	sp, r0				@ restore mon_sp

	/* NS world's entry point */
	ldr	r0, =cpu_power_down_mode
	ldr	r1, [r0]
	cmp	r1, #CPU_IN_LP0
	ldreq	lr, =_ns_resume_addr
	moveq	r1, #0
	streq	r1, [r0]
	ldrne	lr, =_ns_addr_secondary_cpus
	ldr	lr, [lr]
	cmp	lr, #0
	beq	.				@ error: if NULL entry point

	/* NS=1 */
	mrc	p15, 0, r0, c1, c1, 0		@ SCR
	orr	r0, r0, #0x1
	mcr	p15, 0, r0, c1, c1, 0		@ NS=1 mode
	isb

	/*
	 * set shadow spsr_mon, with current reset CPSR state
	 * (e.g. intrs disabled) and changed the mode to SVC
	 */
	mrs	r0, cpsr
	bic	r0, #0x1f
	orr	r0, #0x13
	msr	spsr_cxfs, r0
	movs	pc, lr

.globl boot_secondary_cpu_addr
boot_secondary_cpu_addr:	.word	_boot_secondary_cpu

.globl svc_scratch_space
svc_scratch_space:
	.rept	SCRATCH_WORDS
	.long	0
	.endr

.globl _ns_resume_addr
_ns_resume_addr:
	.word	0

.global _boot_secondary_phys_base
_boot_secondary_phys_base:
	.int 0

/*
 * LP1 resume code.  This code is executed out of TZRAM.  Note that DRAM
 * is in self-refresh while this code is executing.
 */
.globl _lp1_resume
_lp1_resume:
	clrex

	cpsid	aif, 0x13			@ SVC mode, interrupts disabled
#if ARM_CPU_CORTEX_A9
	mrc	p15, 0, r0, c0, c0, 0		@ read main ID register
	and	r5, r0, #0x00f00000		@ variant
	and	r6, r0, #0x0000000f		@ revision
	orr	r6, r6, r5, lsr #20-4		@ combine variant and revision
	/* ARM_ERRATA_743622 */
	teq	r6, #0x20			@ present in r2p0
	teqne	r6, #0x21			@ present in r2p1
	teqne	r6, #0x22			@ present in r2p2
	teqne	r6, #0x27			@ present in r2p7
	teqne	r6, #0x29			@ present in r2p9
	mrceq	p15, 0, r10, c15, c0, 1		@ read diagnostic register
	orreq	r10, r10, #1 << 6		@ set bit #6
	mcreq	p15, 0, r10, c15, c0, 1		@ write diagnostic register
#endif
#if ARM_CPU_CORTEX_A15
	mov	r0, #0
	mcr	p15, 0, r0, c1, c0, 1		@ disable SMP, prefetch, broadcast
	isb
#endif

#if DEBUG_CPU_RESET_HANDLER
	b	.
#endif

#if ARM_CPU_CORTEX_A15
	mrc	p15, 0x1, r0, c15, c0, 3	@ L2 prefetch control reg
	tst	r0, #0x1000
	orreq	r0, r0, #0x1000			@ disable throttling
	mcreq	p15, 0x1, r0, c15, c0, 3

	mrc	p15, 0, r0, c0, c0, 0		@ main ID register
	ubfx    r1, r0, #20, #4
	cmp	r1, #0x3			@ Compare to rev 3
	mrcge	p15, 0x1, r0, c15, c0, 4	@ ACTLR2
	orrge	r0, #(1<<31)			@ Enable regional clock gates
	mcrge	p15, 0x1, r0, c15, c0, 4	@ Rev 3 or higher

#if ARM_CLUSTER0_INIT_L2
	/* set up L2 for cluster0 */
	mrc	p15, 0, r0, c0, c0, 5
	ubfx	r0, r0, #8, #4			@ get cluster id
	cmp	r0, #0				@ C0 = 0, C1 = 1
	bne	not_on_cluster0

	mrc	p15, 0x1, r0, c9, c0, 0x2	@ read L2CTLR
	bic	r0, r0, #0x7			@ clear data ram latency field
	orr	r0, r0, #0x2			@ pipeline depth of 3
	mcr	p15, 0x1, r0, c9, c0, 0x2	@ write L2CTLR

not_on_cluster0:
#endif
#endif

	/* check if we should avoid switching to CLKM */
	ldr	r0, =TZRAM_CPU_AVOID_CLKM_SWITCH
	ldr	r0, [r0]
	cmp	r0, #1
	beq	avoid_clkm_switch

	/* resuming from LP1 so handle 32KHz to CLKM/OSC clock switch */
	ldr	r0, =TEGRA_CLK_RESET_BASE
	mov	r1, #(1<<28)
	str	r1, [r0, #CLK_RESET_SCLK_BURST]
	str	r1, [r0, #CLK_RESET_CCLK_BURST]
	mov 	r1, #0
	str	r1, [r0, #CLK_RESET_SCLK_DIVIDER]
	str	r1, [r0, #CLK_RESET_CCLK_DIVIDER]

avoid_clkm_switch:
	mov	r1, #0
	ldr	r0, =TZRAM_CPU_AVOID_CLKM_SWITCH
	str	r1, [r0]		@ clear the clkm switch flag

#if ARM_CPU_CORTEX_A15
	mrc	p15, 0x1, r0, c15, c0, 3	@ L2 prefetch control reg
	tst	r0, #0x1000
	orreq	r0, r0, #0x1000			@ disable throttling
	mcreq	p15, 0x1, r0, c15, c0, 3

	mrc	p15, 0, r0, c0, c0, 0		@ main ID register
	ubfx    r1, r0, #20, #4
	cmp	r1, #0x3			@ Compare to rev 3
	mrcge	p15, 0x1, r0, c15, c0, 4	@ ACTLR2
	orrge	r0, #(1<<31)			@ Enable regional clock gates
	mcrge	p15, 0x1, r0, c15, c0, 4	@ Rev 3 or higher

	/* This is only needed for cluster 0 with integrated L2 cache */
	mrc	p15, 0, r0, c0, c0, 5
	ubfx	r0, r0, #8, #4
	tst	r0, #1
	bne	lp1_enable_i_cache
	mrc	p15, 0x1, r0, c9, c0, 2
	and	r1, r0, #7
	cmp	r1, #2
	beq	lp1_enable_i_cache
	bic	r0, r0, #7
	orr	r0, r0, #2
	mcr	p15, 0x1, r0, c9, c0, 2
lp1_enable_i_cache:
	mrc	p15, 0, r0, c1, c0, 0
	orr	r0, r0, #0x1000
	mcr	p15, 0, r0, c1, c0, 0		@ enable i-cache
	b	lp1_cpu_context_restore
#endif

#if ARM_CPU_CORTEX_A9
	mcr	p15, 0, r0, c7, c5, 0		@ invalidate BTAC, i-cache
	mcr	p15, 0, r0, c7, c5, 6		@ invalidate branch pred array
	mcr	p15, 0, r0, c8, c5, 0		@ invalidate instruction TLB
	mcr	p15, 0, r0, c8, c6, 0		@ invalidate data TLB
	mcr	p15, 0, r0, c8, c7, 0		@ invalidate unified TLB
	dsb
	isb
#endif

#if ARM_WITH_SCU
	cpu_id	r0
	cmp	r0, #0
	ldrne	r1, =TEGRA_ARM_SCU_BASE
	movne	r0, r0, lsl #2
	movne	r2, #0xf
	movne	r2, r2, lsl r0
	strne	r2, [r1, #0xC]			@ invalidate SCU tags for CPU
#endif

#if ARM_CPU_CORTEX_A9
	dsb
	ldr	r0, =0x1000
	mcr	p15, 0, r0, c1, c0, 0		@ enable i-cache
	isb

	/* invalidate dcache */
	mov	r0, #0
	mcr	p15, 2, r0, c0, c0, 0
	mrc	p15, 1, r0, c0, c0, 0

	movw	r1, #0x7fff
	and	r2, r1, r0, lsr #13	@ get number of sets

	movw	r1, #0x3ff

	and	r3, r1, r0, lsr #3	@ NumWays - 1
	add	r2, r2, #1		@ NumSets

	and	r0, r0, #0x7
	add	r0, r0, #4		@ SetShift

	clz	r1, r3			@ WayShift
	add	r4, r3, #1		@ NumWays
1:	sub	r2, r2, #1		@ NumSets--
	mov	r3, r4			@ Temp = NumWays
2:	subs	r3, r3, #1		@ Temp--
	mov	r5, r3, lsl r1
	mov	r6, r2, lsl r0
	orr	r5, r5, r6		@ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
	mcr	p15, 0, r5, c7, c6, 2
	bgt	2b
	cmp	r2, #0
	bgt	1b
	dsb
	isb
#endif

lp1_cpu_context_restore:
	/* restore cpu context */
	ldr	r0, =TZRAM_CPU_CONTEXT		@ phys base of context
	mov	r1, #0				@ leave MMU disabled
	cpu_restore_context r0, r1, r2, r3
	isb

	ldr	r0, =TZRAM_TEMP_SVC_SP		@ setup tempory svc sp
	add	sp, r0, #(SCRATCH_WORDS * 4)

#if ARM_WITH_SCU
	ldr	r3, =TEGRA_ARM_SCU_BASE		@ enable SCU
	mov	r2, #1
	str	r2, [r3]

	ldr	r2, =0xf			@ r/w scu regs from all cpus
	str	r2, [r3, #0x50]
	ldr	r2, =0xfff
	str	r2, [r3, #0x54]
#endif
	/* allow full access to CP10 and CP11 */
	mrc	p15, 0, r0, c1, c0, 2		@ CPACR
	orr	r0, r0, #0x00F00000
	mcr	p15, 0, r0, c1, c0, 2		@ CPACR
	isb

	/* configure interrupts */
	mov	r3, #0
	mvn	r1, #0
interrupt_loop:
	ldr	r2, =TEGRA_ARM_INT_DIST_BASE	@ should be better way to handle
	add	r2, r2, #0x80			@ add here
	asr	r0, r3, #0x3
	add	r3, r3, #0x20
	cmp	r3, #0x320
	add	r2, r0, r2
	str	r1, [r2]
	bne	interrupt_loop

	/* restore reset vector */
	ldr	r3, =TZRAM_BOOT_SECONDARY_CPU_ADDR
	ldr	r2, [r3]
	ldr	r3, =TEGRA_EXCEPTION_VECTORS_BASE
	str	r2, [r3, #0x100]
	dmb 	sy
	ldr	r3, [r3, #0x100]		@ dummy read to ensure write

	/* mark banked CPU intrs (31:0) be non-secure */
	ldr	r3, =TEGRA_ARM_INT_DIST_BASE
	mvn	r2, #0x0
	str	r2, [r3, #0x80]			@ DIST_IRQ_SEC
	ldr	r3, =TEGRA_ARM_INT_CPU_BASE
	ldr	r2, =0xff
	str	r2, [r3, #0x4]			@ GIC_CPU_PRIMASK

	/* restore monitor_vector base */
	ldr	r3, =TZRAM_MVBAR
	ldr	r3, [r3]
	mcr	p15, 0, r3, c12, c0, 1

	/* let normal world enable SMP, lock TLB, access CP10/11 */
	mrc	p15, 0, r0, c1, c1, 2		@ NSACR
	orr	r0, r0, #0x00000C00
	orr	r0, r0, #0x00060000
	mcr	p15, 0, r0, c1, c1, 2		@ NSACR

#if ARM_CPU_CORTEX_A9
	/* normal world can write SMP, but not FW */
	mrc	p15, 0, r0, c1, c0, 1
	orr	r0, r0, #(0x1 << 0)		@ enable FW
	mcr	p15, 0, r0, c1, c0, 1		@ ACTLR
#endif

	/* goto monitor mode */
	ldr	lr, =TZRAM_MON_STACK_TOP
	ldr	r0, [lr]
	cps	#0x16				@ monitor mode
	mov	sp, r0				@ restore mon_sp

	/* NS world's entry point */
	ldr	lr, =TZRAM_NS_RESUME_ADDR
	ldr	lr, [lr]
	cmp	lr, #0
	beq	.				@ error: if NULL entry point

	/* enable SCR.FW and NS=1 */
	mrc	p15, 0, r0, c1, c1, 0		@ SCR
	mov	r0, #(1 << 4) | 0x1		@ SCR.FW and NS=1
	mcr	p15, 0, r0, c1, c1, 0
	isb

	/*
	 * set shadow spsr_mon, with current reset CPSR state
	 * (e.g. intrs disabled) and changed the mode to SVC
	 */
	mrs	r0, cpsr
	bic	r0, #0x1f
	orr	r0, #0x13
	msr	spsr_cxfs, r0
	movs	pc, lr

.ltorg
.globl _lp1_resume_end
_lp1_resume_end:
	b	.

.globl lp1_resume_addr
lp1_resume_addr:	.word	_lp1_resume
