ARM: tegra: clock: Re-factor Tegra3 cpu clocks
Alex Frid [Sun, 13 Mar 2011 08:41:14 +0000 (00:41 -0800)]
Added second level virtualization (on top of virtual cpu rate control)
to support different Tegra3 CPU power modes: low power (LP) mode and
geared performance (G) mode. Virtual cpu complex (cpu_cmplx) clock is
defined as a child with two parents: virtual cpu_lp and virtual cpu_g
clocks for the respective modes. Mode switch sequence was integrated
into cpu_cmplx set parent implementation. (Before this commit mode
switch was triggered outside the clock framework, which created cpu
clock/mode synchronization problems).

Each mode clock is derived from its own super clock mux (cclk_lp and
cclk_g) to statically match Tegra3 h/w layout. (Before this commit the
code had to dynamically synchronize CPU mode and active mux selection).
This change also allowed to support PLLX output divider for low power
mode as fixed 1:2 divider with bypass control embedded into cclk_lp
parent section.

Updated auto and sysfs CPU mode switch calls to use new clock framework,
and removed clock manipulation from the low level mode switch
implementation.

Original-Change-Id: Ibc3cc495b2ff29e2d3417eff2bfd45535cbd015b
Reviewed-on: http://git-master/r/24734
Reviewed-by: Aleksandr Frid <afrid@nvidia.com>
Tested-by: Aleksandr Frid <afrid@nvidia.com>
Tested-by: Jin Qian <jqian@nvidia.com>
Reviewed-by: Scott Williams <scwilliams@nvidia.com>
Original-Change-Id: I23ae80edbf14fb22727a6fc317cd9e5baf8bd6be

Rebase-Id: Rdcd4a2165ebd92bf4caa35d68ca81d19a3789351

arch/arm/mach-tegra/clock.c
arch/arm/mach-tegra/clock.h
arch/arm/mach-tegra/cpu-tegra3.c
arch/arm/mach-tegra/platsmp.c
arch/arm/mach-tegra/pm-t3.c
arch/arm/mach-tegra/pm.h
arch/arm/mach-tegra/sysfs-cluster.c
arch/arm/mach-tegra/tegra3_clocks.c

index b2085a7..8131c72 100644 (file)
@@ -111,9 +111,6 @@ static unsigned long clk_predict_rate_from_parent(struct clk *c, struct clk *p)
 
        rate = clk_get_rate(p);
 
-       if (c->ops && c->ops->recalculate_rate)
-               c->ops->recalculate_rate(c);
-
        if (c->mul != 0 && c->div != 0) {
                rate *= c->mul;
                rate += c->div - 1; /* round up */
@@ -123,11 +120,8 @@ static unsigned long clk_predict_rate_from_parent(struct clk *c, struct clk *p)
        return rate;
 }
 
-static unsigned long clk_get_max_rate(struct clk *c)
+unsigned long clk_get_max_rate(struct clk *c)
 {
-       if (c->ops && c->ops->get_max_rate)
-               return c->ops->get_max_rate(c);
-       else
                return c->max_rate;
 }
 
@@ -404,8 +398,6 @@ unsigned long clk_get_rate_all_locked(struct clk *c)
 
        while (p) {
                c = p;
-               if (c->ops && c->ops->recalculate_rate)
-                       c->ops->recalculate_rate(c);
                if (c->mul != 0 && c->div != 0) {
                        mul *= c->mul;
                        div *= c->div;
@@ -578,6 +570,27 @@ static int __init tegra_keep_boot_clocks_setup(char *__unused)
 __setup("tegra_keep_boot_clocks", tegra_keep_boot_clocks_setup);
 
 /*
+ * Bootloader may not match kernel restrictions on CPU clock sources.
+ * Make sure CPU clock is sourced from either main or backup parent.
+ */
+static int tegra_sync_cpu_clock(void)
+{
+       int ret;
+       unsigned long rate;
+       struct clk *c = tegra_get_clock_by_name("cpu");
+
+       BUG_ON(!c);
+       rate = clk_get_rate(c);
+       ret = clk_set_rate(c, rate);
+       if (ret)
+               pr_err("%s: Failed to sync CPU at rate %lu\n", __func__, rate);
+       else
+               pr_info("CPU rate: %lu MHz\n", clk_get_rate(c) / 1000000);
+       return ret;
+}
+late_initcall(tegra_sync_cpu_clock);
+
+/*
  * Iterate through all clocks, disabling any for which the refcount is 0
  * but the clock init detected the bootloader left the clock on.
  */
index eb570fa..27d70d0 100644 (file)
@@ -86,12 +86,15 @@ struct clk_ops {
        int             (*set_parent)(struct clk *, struct clk *);
        int             (*set_rate)(struct clk *, unsigned long);
        long            (*round_rate)(struct clk *, unsigned long);
-       unsigned long   (*get_max_rate)(struct clk *);
-       void            (*recalculate_rate)(struct clk *);
        int             (*clk_cfg_ex)(struct clk *, enum tegra_clk_ex_param, u32);
        void            (*reset)(struct clk *, bool);
 };
 
+enum cpu_mode {
+       MODE_G = 0,
+       MODE_LP,
+};
+
 enum clk_state {
        UNINITIALIZED = 0,
        ON,
@@ -152,7 +155,7 @@ struct clk {
                struct {
                        struct clk                      *main;
                        struct clk                      *backup;
-                       unsigned long                   lp_max_rate;
+                       enum cpu_mode                   mode;
                } cpu;
                struct {
                        struct list_head                node;
@@ -193,6 +196,7 @@ unsigned long clk_measure_input_freq(void);
 int clk_reparent(struct clk *c, struct clk *parent);
 void tegra_clk_init_from_table(struct tegra_clk_init_table *table);
 void clk_set_cansleep(struct clk *c);
+unsigned long clk_get_max_rate(struct clk *c);
 unsigned long clk_get_rate_locked(struct clk *c);
 int clk_set_rate_locked(struct clk *c, unsigned long rate);
 void tegra2_sdmmc_tap_delay(struct clk *c, int delay);
index b7c6e97..c805903 100644 (file)
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/cpu.h>
+#include <linux/clk.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
 #include "pm.h"
+#include "clock.h"
 
 #define INITIAL_STATE          TEGRA_HP_DISABLED
 #define IDLE_HYSTERESIS                100000
@@ -62,6 +64,9 @@ module_param(idle_bottom_freq, uint, 0644);
 
 static unsigned int lpcpu_max_freq;
 
+static struct clk *cpu_clk;
+static struct clk *cpu_g_clk;
+static struct clk *cpu_lp_clk;
 
 static struct {
        cputime64_t time_up_total;
@@ -189,18 +194,20 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work)
                                hotplug_wq, &hotplug_work, down_delay);
                        hp_stats_update(cpu, false);
                } else if (!is_lp_cluster() && !no_lp) {
-                       tegra_cluster_control(0, TEGRA_POWER_CLUSTER_LP |
-                                                TEGRA_POWER_CLUSTER_IMMEDIATE);
-                       hp_stats_update(CONFIG_NR_CPUS, true);
-                       hp_stats_update(0, false);
+                       if(!clk_set_parent(cpu_clk, cpu_lp_clk)) {
+                               hp_stats_update(CONFIG_NR_CPUS, true);
+                               hp_stats_update(0, false);
+                       } else
+                               queue_delayed_work(
+                                       hotplug_wq, &hotplug_work, down_delay);
                }
                break;
        case TEGRA_HP_UP:
                if (is_lp_cluster() && !no_lp) {
-                       tegra_cluster_control(0, TEGRA_POWER_CLUSTER_G |
-                                                TEGRA_POWER_CLUSTER_IMMEDIATE);
-                       hp_stats_update(CONFIG_NR_CPUS, false);
-                       hp_stats_update(0, true);
+                       if(!clk_set_parent(cpu_clk, cpu_g_clk)) {
+                               hp_stats_update(CONFIG_NR_CPUS, false);
+                               hp_stats_update(0, true);
+                       }
                        queue_delayed_work(
                                hotplug_wq, &hotplug_work, up2gn_delay);
                } else {
@@ -281,7 +288,7 @@ int tegra_auto_hotplug_init(void)
 {
        /*
         * Not bound to the issuer CPU (=> high-priority), has rescue worker
-        * task, single-threaded, frrezeable.
+        * task, single-threaded, freezable.
         */
        hotplug_wq = alloc_workqueue(
                "cpu-tegra3", WQ_UNBOUND | WQ_RESCUER | WQ_FREEZEABLE, 1);
@@ -289,7 +296,13 @@ int tegra_auto_hotplug_init(void)
                return -ENOMEM;
        INIT_DELAYED_WORK(&hotplug_work, tegra_auto_hotplug_work_func);
 
-       lpcpu_max_freq = tegra_get_lpcpu_max_rate() / 1000;
+       cpu_clk = clk_get_sys(NULL, "cpu");
+       cpu_g_clk = clk_get_sys(NULL, "cpu_g");
+       cpu_lp_clk = clk_get_sys(NULL, "cpu_lp");
+       if (IS_ERR(cpu_clk) || IS_ERR(cpu_g_clk) || IS_ERR(cpu_lp_clk))
+               return -ENOENT;
+
+       lpcpu_max_freq = clk_get_max_rate(cpu_lp_clk) / 1000;
        idle_top_freq = lpcpu_max_freq;
        idle_bottom_freq = idle_top_freq - IDLE_HYSTERESIS;
 
index e903da7..74af22d 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/io.h>
 #include <linux/smp.h>
 #include <linux/delay.h>
+#include <linux/clk.h>
 
 #include <asm/hardware/gic.h>
 #include <asm/smp_scu.h>
@@ -27,6 +28,7 @@
 #include <mach/powergate.h>
 
 #include "pm.h"
+#include "clock.h"
 
 #define EVP_CPU_RESET_VECTOR \
        (IO_ADDRESS(TEGRA_EXCEPTION_VECTORS_BASE) + 0x100)
@@ -79,16 +81,25 @@ int boot_secondary(unsigned int cpu, struct task_struct *idle)
        int status;
 
        if (is_lp_cluster()) {
+               struct clk *cpu_clk, *cpu_g_clk;
+
                /* The G CPU may not be available for a
                   variety of reasons. */
                status = is_g_cluster_available(cpu);
                if (status)
                        return status;
 
-               /* Switch to the G CPU before continuing. */
-               status = tegra_cluster_control(0,
-                                              TEGRA_POWER_CLUSTER_G |
-                                              TEGRA_POWER_CLUSTER_IMMEDIATE);
+               cpu_clk = tegra_get_clock_by_name("cpu");
+               cpu_g_clk = tegra_get_clock_by_name("cpu_g");
+
+               /* Switch to G CPU before continuing. */
+               if (!cpu_clk || !cpu_g_clk) {
+                       /* Early boot, clock infrastructure is not initialized
+                          - CPU mode switch is not allowed */
+                       status = -EINVAL;
+               } else
+                       status = clk_set_parent(cpu_clk, cpu_g_clk);
+
                if (status)
                        return status;
        }
index 1c57a01..92209dc 100644 (file)
@@ -94,29 +94,32 @@ static int cluster_switch_prolog_clock(unsigned int flags)
        u32 CclkBurstPolicy;
        u32 SuperCclkDivier;
 
-       /* Read the CPU clock settings for the currently active CPU. */
-       CclkBurstPolicy = readl(CAR_CCLK_BURST_POLICY);
-       SuperCclkDivier = readl(CAR_SUPER_CCLK_DIVIDER);
-
        /* Read the bond out register containing the G and LP CPUs. */
        reg = readl(CAR_BOND_OUT_V);
 
+       /* Sync G-PLLX divider bypass with LP (no effect on G, just to prevent
+          LP settings overwrite by save/restore code */
+       CclkBurstPolicy = ~PLLX_DIV2_BYPASS_LP & readl(CAR_CCLKG_BURST_POLICY);
+       CclkBurstPolicy |= PLLX_DIV2_BYPASS_LP & readl(CAR_CCLKLP_BURST_POLICY);
+       writel(CclkBurstPolicy, CAR_CCLKG_BURST_POLICY);
+
        /* Switching to G? */
        if (flags & TEGRA_POWER_CLUSTER_G) {
                /* Do the G CPUs exist? */
                if (reg & CAR_BOND_OUT_V_CPU_G)
                        return -ENXIO;
 
+               /* Keep G CPU clock policy set by upper laayer, with the
+                  exception of the transition via LP1 */
                if (flags & TEGRA_POWER_SDRAM_SELFREFRESH) {
                        /* In LP1 power mode come up on CLKM (oscillator) */
+                       CclkBurstPolicy = readl(CAR_CCLKG_BURST_POLICY);
                        CclkBurstPolicy |= ~0xF;
                        SuperCclkDivier = 0;
-               }
 
-               /* We will be running on the G CPU after the switch.
-                  Set up the G clock policy. */
-               writel(CclkBurstPolicy, CAR_CCLKG_BURST_POLICY);
-               writel(SuperCclkDivier, CAR_SUPER_CCLKG_DIVIDER);
+                       writel(CclkBurstPolicy, CAR_CCLKG_BURST_POLICY);
+                       writel(SuperCclkDivier, CAR_SUPER_CCLKG_DIVIDER);
+               }
 
                /* Hold G CPUs 1-3 in reset after the switch */
                reg = CPU_RESET(1) | CPU_RESET(2) | CPU_RESET(3);
@@ -144,43 +147,17 @@ static int cluster_switch_prolog_clock(unsigned int flags)
                if (reg & CAR_BOND_OUT_V_CPU_LP)
                        return -ENXIO;
 
+               /* Keep LP CPU clock policy set by upper layer, with the
+                  exception of the transition via LP1 */
                if (flags & TEGRA_POWER_SDRAM_SELFREFRESH) {
                        /* In LP1 power mode come up on CLKM (oscillator) */
+                       CclkBurstPolicy = readl(CAR_CCLKLP_BURST_POLICY);
                        CclkBurstPolicy |= ~0xF;
                        SuperCclkDivier = 0;
-               } else {
-                       /* It is possible that PLLX frequency is too high
-                          for the LP CPU. Reduce the frequency if necessary
-                          to prevent over-clocking when we switch. PLLX
-                          has an implied divide-by-2 when the LP CPU is
-                          active unless PLLX_DIV2_BYPASS_LP is selected. */
-
-                       struct clk *c = tegra_get_clock_by_name("cpu");
-                       unsigned long cur_rate = clk_get_rate(c);
-                       unsigned long max_rate = tegra_get_lpcpu_max_rate();
-                       int err;
-
-                       BUG_ON(max_rate == 0);
-                       if (cur_rate/2 > max_rate) {
-                               /* PLLX is running too fast for the LP CPU.
-                                  Reduce it to LP maximum rate which must
-                                  be multipled by 2 because of the LP CPU's
-                                  implied divied-by-2. */
-
-                               DEBUG_CLUSTER(("%s: G freq %lu\r\n", __func__,
-                                              cur_rate));
-                               err = clk_set_rate(c, max_rate * 2);
-                               BUG_ON(err);
-                               DEBUG_CLUSTER(("%s: G freq %lu\r\n", __func__,
-                                              clk_get_rate(c)));
-                       }
-               }
 
-               /* We will be running on the LP CPU after the switch.
-                  Set up the LP clock policy. */
-               CclkBurstPolicy &= ~PLLX_DIV2_BYPASS_LP;
-               writel(CclkBurstPolicy, CAR_CCLKLP_BURST_POLICY);
-               writel(SuperCclkDivier, CAR_SUPER_CCLKLP_DIVIDER);
+                       writel(CclkBurstPolicy, CAR_CCLKLP_BURST_POLICY);
+                       writel(SuperCclkDivier, CAR_SUPER_CCLKLP_DIVIDER);
+               }
 
                /* Take the LP CPU ut of reset after the switch */
                reg = CPU_RESET(0);
@@ -280,7 +257,9 @@ void tegra_cluster_switch_epilog(unsigned int flags)
 
        #if DEBUG_CLUSTER_SWITCH
        {
-               struct clk *c = tegra_get_clock_by_name("cpu");
+               /* FIXME: clock functions below are taking mutex */
+               struct clk *c = tegra_get_clock_by_name(
+                       is_lp_cluster() ? "cpu_lp" : "cpu_g");
                DEBUG_CLUSTER(("%s: %s freq %lu\r\n", __func__,
                        is_lp_cluster() ? "LP" : "G", clk_get_rate(c)));
        }
index b03a0f4..88a3f8f 100644 (file)
@@ -100,6 +100,7 @@ void tegra_idle_lp2_last(unsigned int flags);
 #ifdef CONFIG_ARCH_TEGRA_2x_SOC
 #define INSTRUMENT_CLUSTER_SWITCH 0    /* Must be zero for ARCH_TEGRA_2x_SOC */
 #define DEBUG_CLUSTER_SWITCH 0         /* Must be zero for ARCH_TEGRA_2x_SOC */
+#define PARAMETERIZE_CLUSTER_SWITCH 0  /* Must be zero for ARCH_TEGRA_2x_SOC */
 static inline int tegra_cluster_control(unsigned int us, unsigned int flags)
 { return -EPERM; }
 #define tegra_cluster_switch_prolog(flags) do {} while(0)
@@ -108,13 +109,12 @@ static inline bool is_g_cluster_present(void)
 { return true; }
 static inline unsigned int is_lp_cluster(void)
 { return 0; }
-static inline unsigned long tegra_get_lpcpu_max_rate(void)
-{ return 0; }
 #define tegra_lp0_suspend_mc() do {} while(0)
 #define tegra_lp0_resume_mc() do {} while(0)
 #else
 #define INSTRUMENT_CLUSTER_SWITCH 1    /* Should be zero for shipping code */
 #define DEBUG_CLUSTER_SWITCH 1         /* Should be zero for shipping code */
+#define PARAMETERIZE_CLUSTER_SWITCH 1  /* Should be zero for shipping code */
 int tegra_cluster_control(unsigned int us, unsigned int flags);
 void tegra_cluster_switch_prolog(unsigned int flags);
 void tegra_cluster_switch_epilog(unsigned int flags);
@@ -131,7 +131,6 @@ static inline unsigned int is_lp_cluster(void)
        reg = readl(FLOW_CTRL_CLUSTER_CONTROL);
        return (reg & 1); /* 0 == G, 1 == LP*/
 }
-unsigned long tegra_get_lpcpu_max_rate(void);
 void tegra_lp0_suspend_mc(void);
 void tegra_lp0_resume_mc(void);
 #endif
@@ -142,6 +141,13 @@ extern unsigned int tegra_cluster_debug;
 #else
 #define DEBUG_CLUSTER(x) do { } while (0)
 #endif
+#if PARAMETERIZE_CLUSTER_SWITCH
+void tegra_cluster_switch_set_parameters(unsigned int us, unsigned int flags);
+#else
+static inline void tegra_cluster_switch_set_parameters(
+       unsigned int us, unsigned int flags)
+{ }
+#endif
 
 static inline void flowctrl_writel(unsigned long val, void __iomem *addr)
 {
index eaded6c..e6c06fc 100644 (file)
 #include <linux/kobject.h>
 #include <linux/smp.h>
 #include <linux/io.h>
+#include <linux/clk.h>
 
 #include <mach/iomap.h>
 #include "power.h"
+#include "clock.h"
 
 #define SYSFS_CLUSTER_PRINTS      1    /* Nonzero: enable status prints */
 #define SYSFS_CLUSTER_TRACE_PRINTS 0   /* Nonzero: enable trace prints */
@@ -239,6 +241,15 @@ static ssize_t sysfscluster_store(struct kobject *kobj,
        int e;
        int tmp;
        int cnt;
+       struct clk *cpu_clk = tegra_get_clock_by_name("cpu");
+       struct clk *cpu_g_clk = tegra_get_clock_by_name("cpu_g");
+       struct clk *cpu_lp_clk = tegra_get_clock_by_name("cpu_lp");
+       struct clk *new_parent = NULL;
+
+       if (!cpu_clk || !cpu_g_clk || !cpu_lp_clk) {
+               ret = -ENOSYS;
+               goto fail;
+       }
 
        TRACE_CLUSTER(("+sysfscluster_store: %p, %d\n", buf, count));
 
@@ -282,12 +293,9 @@ static ssize_t sysfscluster_store(struct kobject *kobj,
                        request |= TEGRA_POWER_SDRAM_SELFREFRESH;
                }
 #endif
-               e = tegra_cluster_control(wake_ms * 1000, request);
-               if (e) {
-                       PRINT_CLUSTER(("cluster/active: request failed (%d)\n",
-                                      e));
-                       ret = e;
-               }
+               tegra_cluster_switch_set_parameters(wake_ms * 1000, request);
+               new_parent = (flags & TEGRA_POWER_CLUSTER_LP) ?
+                       cpu_lp_clk : cpu_g_clk;
                break;
 
        case ClusterAttr_Immediate:
@@ -372,6 +380,14 @@ static ssize_t sysfscluster_store(struct kobject *kobj,
 
        spin_unlock(&cluster_lock);
 
+       if (new_parent) {
+               e = clk_set_parent(cpu_clk, new_parent);
+               if (e) {
+                       PRINT_CLUSTER(("cluster/active: request failed (%d)\n",
+                                      e));
+                       ret = e;
+               }
+       }
 fail:
        TRACE_CLUSTER(("-sysfscluster_store: %d\n", count));
        return ret;
index 48cfebb..b6b3a6c 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * arch/arm/mach-tegra/tegra3_clocks.c
  *
- * Copyright (C) 2010 NVIDIA Corporation
+ * Copyright (C) 2010-2011 NVIDIA Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
 #define SUPER_STATE_RUN                        (0x2 << SUPER_STATE_SHIFT)
 #define SUPER_STATE_IRQ                        (0x3 << SUPER_STATE_SHIFT)
 #define SUPER_STATE_FIQ                        (0x4 << SUPER_STATE_SHIFT)
+#define SUPER_LP_DIV2_BYPASS           (0x1 << 16)
 #define SUPER_SOURCE_MASK              0xF
 #define        SUPER_FIQ_SOURCE_SHIFT          12
 #define        SUPER_IRQ_SOURCE_SHIFT          8
 #define        SUPER_IDLE_SOURCE_SHIFT         0
 
 #define SUPER_CLK_DIVIDER              0x04
-#define IS_PLLX_DIV2                   (!(clk_readl(0x370) & (1<<16)) && is_lp_cluster())
 
 #define BUS_CLK_DISABLE                        (1<<3)
 #define BUS_CLK_DIV_MASK               0x3
@@ -530,6 +530,8 @@ static void tegra3_super_clk_init(struct clk *c)
        shift = ((val & SUPER_STATE_MASK) == SUPER_STATE_IDLE) ?
                SUPER_IDLE_SOURCE_SHIFT : SUPER_RUN_SOURCE_SHIFT;
        source = (val >> shift) & SUPER_SOURCE_MASK;
+       if (c->flags & DIV_2)
+               source |= val & SUPER_LP_DIV2_BYPASS;
        for (sel = c->inputs; sel->input != NULL; sel++) {
                if (sel->value == source)
                        break;
@@ -546,10 +548,9 @@ static int tegra3_super_clk_enable(struct clk *c)
 
 static void tegra3_super_clk_disable(struct clk *c)
 {
-       pr_debug("%s on clock %s\n", __func__, c->name);
-
-       /* oops - don't disable the CPU clock! */
-       BUG();
+       /* since tegra 3 has 2 CPU super clocks - low power lp-mode clock and
+          geared up g-mode super clock - mode switch may request to disable
+          either of them; accept request with no affect on h/w */
 }
 
 static int tegra3_super_clk_set_parent(struct clk *c, struct clk *p)
@@ -565,8 +566,18 @@ static int tegra3_super_clk_set_parent(struct clk *c, struct clk *p)
                SUPER_IDLE_SOURCE_SHIFT : SUPER_RUN_SOURCE_SHIFT;
        for (sel = c->inputs; sel->input != NULL; sel++) {
                if (sel->input == p) {
+                       /* For LP mode super-clock switch between PLLX direct
+                          and divided-by-2 outputs is allowed only when other
+                          than PLLX clock source is current parent */
+                       if ((c->flags & DIV_2) && (p->flags & PLLX) &&
+                           ((sel->value ^ val) & SUPER_LP_DIV2_BYPASS)) {
+                               if (c->parent->flags & PLLX)
+                                       return -EINVAL;
+                               val ^= SUPER_LP_DIV2_BYPASS;
+                               clk_writel_delay(val, c->reg);
+                       }
                        val &= ~(SUPER_SOURCE_MASK << shift);
-                       val |= sel->value << shift;
+                       val |= (sel->value & SUPER_SOURCE_MASK) << shift;
 
                        if (c->refcnt)
                                clk_enable(p);
@@ -612,7 +623,7 @@ static struct clk_ops tegra_super_ops = {
  */
 static void tegra3_cpu_clk_init(struct clk *c)
 {
-       /* FIXME: max limits for different SKUs */
+       c->state = (!is_lp_cluster() == (c->u.cpu.mode == MODE_G))? ON : OFF;
 }
 
 static int tegra3_cpu_clk_enable(struct clk *c)
@@ -622,10 +633,9 @@ static int tegra3_cpu_clk_enable(struct clk *c)
 
 static void tegra3_cpu_clk_disable(struct clk *c)
 {
-       pr_debug("%s on clock %s\n", __func__, c->name);
-
-       /* oops - don't disable the CPU clock! */
-       BUG();
+       /* since tegra 3 has 2 virtual CPU clocks - low power lp-mode clock
+          and geared up g-mode clock - mode switch may request to disable
+          either of them; accept request with no affect on h/w */
 }
 
 static int tegra3_cpu_clk_set_rate(struct clk *c, unsigned long rate)
@@ -646,10 +656,12 @@ static int tegra3_cpu_clk_set_rate(struct clk *c, unsigned long rate)
        if (rate == clk_get_rate(c->u.cpu.backup))
                goto out;
 
-       ret = clk_set_rate(c->u.cpu.main, rate);
-       if (ret) {
-               pr_err("Failed to change cpu pll to %lu\n", rate);
-               goto out;
+       if (rate != clk_get_rate(c->u.cpu.main)) {
+               ret = clk_set_rate(c->u.cpu.main, rate);
+               if (ret) {
+                       pr_err("Failed to change cpu pll to %lu\n", rate);
+                       goto out;
+               }
        }
 
        ret = clk_set_parent(c->parent, c->u.cpu.main);
@@ -663,20 +675,149 @@ out:
        return ret;
 }
 
-static unsigned long tegra3_cpu_get_max_rate(struct clk *c)
-{
-       if (is_lp_cluster())
-               return c->u.cpu.lp_max_rate;
-       else
-               return c->max_rate;
-}
-
 static struct clk_ops tegra_cpu_ops = {
        .init     = tegra3_cpu_clk_init,
        .enable   = tegra3_cpu_clk_enable,
        .disable  = tegra3_cpu_clk_disable,
        .set_rate = tegra3_cpu_clk_set_rate,
-       .get_max_rate = tegra3_cpu_get_max_rate,
+};
+
+
+static void tegra3_cpu_cmplx_clk_init(struct clk *c)
+{
+       int i = !!is_lp_cluster();
+
+       BUG_ON(c->inputs[0].input->u.cpu.mode != MODE_G);
+       BUG_ON(c->inputs[1].input->u.cpu.mode != MODE_LP);
+       c->parent = c->inputs[i].input;
+}
+
+/* cpu complex clock provides second level vitualization (on top of
+   cpu virtual cpu rate control) in order to hide the CPU mode switch
+   sequence */
+#if PARAMETERIZE_CLUSTER_SWITCH
+static unsigned int switch_delay;
+static unsigned int switch_flags;
+static DEFINE_SPINLOCK(parameters_lock);
+
+void tegra_cluster_switch_set_parameters(unsigned int us, unsigned int flags)
+{
+       spin_lock(&parameters_lock);
+       switch_delay = us;
+       switch_flags = flags;
+       spin_unlock(&parameters_lock);
+}
+#endif
+
+static int tegra3_cpu_cmplx_clk_enable(struct clk *c)
+{
+       return 0;
+}
+
+static void tegra3_cpu_cmplx_clk_disable(struct clk *c)
+{
+       pr_debug("%s on clock %s\n", __func__, c->name);
+
+       /* oops - don't disable the CPU complex clock! */
+       BUG();
+}
+
+static int tegra3_cpu_cmplx_clk_set_rate(struct clk *c, unsigned long rate)
+{
+       return clk_set_rate(c->parent, rate);
+}
+
+static int tegra3_cpu_cmplx_clk_set_parent(struct clk *c, struct clk *p)
+{
+       int ret;
+       unsigned int flags, delay;
+       const struct clk_mux_sel *sel;
+       unsigned long rate = clk_get_rate(c->parent);
+
+       pr_debug("%s: %s %s\n", __func__, c->name, p->name);
+       BUG_ON(c->parent->u.cpu.mode != (is_lp_cluster() ? MODE_LP : MODE_G));
+
+       for (sel = c->inputs; sel->input != NULL; sel++) {
+               if (sel->input == p)
+                       break;
+       }
+       if (!sel->input)
+               return -EINVAL;
+
+#if PARAMETERIZE_CLUSTER_SWITCH
+       spin_lock(&parameters_lock);
+       flags = switch_flags;
+       delay = switch_delay;
+       switch_flags = 0;
+       spin_unlock(&parameters_lock);
+
+       if (flags) {
+               /* over-clocking after the switch - allow, but lower rate */
+               if (rate > p->max_rate) {
+                       rate = p->max_rate;
+                       ret = clk_set_rate(c->parent, rate);
+                       if (ret) {
+                               pr_err("%s: Failed to set rate %lu for %s\n",
+                                       __func__, rate, p->name);
+                               return ret;
+                       }
+               }
+       } else
+#endif
+       {
+               if (p == c->parent)             /* already switched - exit*/
+                       return 0;
+
+               if (rate > p->max_rate) {       /* over-clocking - no switch */
+                       pr_warn("%s: No %s mode switch to %s at rate %lu\n",
+                                __func__, c->name, p->name, rate);
+                       return -ECANCELED;
+               }
+               flags = TEGRA_POWER_CLUSTER_IMMEDIATE;
+               delay = 0;
+       }
+       flags |= (p->u.cpu.mode == MODE_LP) ? TEGRA_POWER_CLUSTER_LP :
+               TEGRA_POWER_CLUSTER_G;
+
+       /* Since in both LP and G mode CPU main and backup sources are the
+          same, set rate on the new parent just synchronizes super-clock
+          muxes before mode switch with no PLL re-locking */
+       ret = clk_set_rate(p, rate);
+       if (ret) {
+               pr_err("%s: Failed to set rate %lu for %s\n",
+                      __func__, rate, p->name);
+               return ret;
+       }
+
+       /* Enabling new parent scales new mode voltage rail in advanvce
+          before the switch happens*/
+       if (c->refcnt)
+               clk_enable(p);
+
+       /* switch CPU mode */
+       ret = tegra_cluster_control(delay, flags);
+       if (ret) {
+               if (c->refcnt)
+                       clk_disable(p);
+               pr_err("%s: Failed to switch %s mode to %s\n",
+                      __func__, c->name, p->name);
+               return ret;
+       }
+
+       /* Disabling old parent scales old mode voltage rail */
+       if (c->refcnt && c->parent)
+               clk_disable(c->parent);
+
+       clk_reparent(c, p);
+       return 0;
+}
+
+static struct clk_ops tegra_cpu_cmplx_ops = {
+       .init     = tegra3_cpu_cmplx_clk_init,
+       .enable   = tegra3_cpu_cmplx_clk_enable,
+       .disable  = tegra3_cpu_cmplx_clk_disable,
+       .set_rate = tegra3_cpu_cmplx_clk_set_rate,
+       .set_parent = tegra3_cpu_cmplx_clk_set_parent,
 };
 
 /* virtual cop clock functions. Used to acquire the fake 'cop' clock to
@@ -910,7 +1051,7 @@ static void tegra3_pll_clk_init(struct clk *c)
 
        if (c->flags & PLL_FIXED && !(val & PLL_BASE_OVERRIDE)) {
                const struct clk_pll_freq_table *sel;
-               unsigned long input_rate = clk_get_rate(c->parent);
+               unsigned long input_rate = clk_get_rate_locked(c->parent);
                for (sel = c->u.pll.freq_table; sel->input_rate != 0; sel++) {
                        if (sel->input_rate == input_rate &&
                                sel->output_rate == c->u.pll.fixed_rate) {
@@ -1286,14 +1427,10 @@ static void tegra3_pll_div_clk_init(struct clk *c)
                c->mul = 2;
        } else if (c->flags & DIV_2) {
                c->state = ON;
-               if (c->flags & PLLD) {
+               if (c->flags & (PLLD | PLLX)) {
                        c->div = 2;
                        c->mul = 1;
                }
-               else if (c->flags & PLLX) {
-                       c->div = (IS_PLLX_DIV2) ? 2 : 1;
-                       c->mul = 1;
-               }
                else
                        BUG();
        } else {
@@ -1371,16 +1508,9 @@ static int tegra3_pll_div_clk_set_rate(struct clk *c, unsigned long rate)
                        c->mul = 2;
                        return 0;
                }
-       } else if (c->flags & DIV_2) {
-               if (c->flags & PLLD) {
-                       return clk_set_rate(c->parent, rate * 2);
-               }
-               else if (c->flags & PLLX) {
-                       if (IS_PLLX_DIV2)
-                               rate *= 2;
-                       return clk_set_rate(c->parent, rate);
-               }
-       }
+       } else if (c->flags & DIV_2)
+               return clk_set_rate(c->parent, rate * 2);
+
        return -EINVAL;
 }
 
@@ -1395,13 +1525,11 @@ static long tegra3_pll_div_clk_round_rate(struct clk *c, unsigned long rate)
                if (divider < 0)
                        return divider;
                return DIV_ROUND_UP(parent_rate * 2, divider + 2);
-       }
-       return -EINVAL;
-}
+       } else if (c->flags & DIV_2)
+               /* no rounding - fixed DIV_2 dividers pass rate to parent PLL */
+               return rate;
 
-static void tegra3_pllx_div_clk_recalculate_rate(struct clk *c)
-{
-       c->div = (IS_PLLX_DIV2) ? 2 : 1;
+       return -EINVAL;
 }
 
 static struct clk_ops tegra_pll_div_ops = {
@@ -1412,21 +1540,6 @@ static struct clk_ops tegra_pll_div_ops = {
        .round_rate             = tegra3_pll_div_clk_round_rate,
 };
 
-static struct clk_ops tegra_plld_div_ops = {
-       .init                   = tegra3_pll_div_clk_init,
-       .enable                 = tegra3_pll_div_clk_enable,
-       .disable                = tegra3_pll_div_clk_disable,
-       .set_rate               = tegra3_pll_div_clk_set_rate,
-};
-
-static struct clk_ops tegra_pllx_div_ops = {
-       .init                   = tegra3_pll_div_clk_init,
-       .enable                 = tegra3_pll_div_clk_enable,
-       .disable                = tegra3_pll_div_clk_disable,
-       .set_rate               = tegra3_pll_div_clk_set_rate,
-       .recalculate_rate = tegra3_pllx_div_clk_recalculate_rate,
-};
-
 /* Periph clk ops */
 static inline u32 periph_clk_source_mask(struct clk *c)
 {
@@ -2421,7 +2534,7 @@ static struct clk tegra_pll_d = {
 
 static struct clk tegra_pll_d_out0 = {
        .name      = "pll_d_out0",
-       .ops       = &tegra_plld_div_ops,
+       .ops       = &tegra_pll_div_ops,
        .flags     = DIV_2 | PLLD,
        .parent    = &tegra_pll_d,
        .max_rate  = 500000000,
@@ -2448,7 +2561,7 @@ static struct clk tegra_pll_d2 = {
 
 static struct clk tegra_pll_d2_out0 = {
        .name      = "pll_d2_out0",
-       .ops       = &tegra_plld_div_ops,
+       .ops       = &tegra_pll_div_ops,
        .flags     = DIV_2 | PLLD,
        .parent    = &tegra_pll_d2,
        .max_rate  = 500000000,
@@ -2563,7 +2676,7 @@ static struct clk tegra_pll_x = {
 
 static struct clk tegra_pll_x_out0 = {
        .name      = "pll_x_out0",
-       .ops       = &tegra_pllx_div_ops,
+       .ops       = &tegra_pll_div_ops,
        .flags     = DIV_2 | PLLX,
        .parent    = &tegra_pll_x,
        .max_rate  = 1000000000,
@@ -2776,7 +2889,20 @@ static void init_clk_out_mux(void)
 }
 
 /* Peripheral muxes */
-static struct clk_mux_sel mux_cclk[] = {
+static struct clk_mux_sel mux_cclk_g[] = {
+       { .input = &tegra_clk_m,        .value = 0},
+       { .input = &tegra_pll_c,        .value = 1},
+       { .input = &tegra_clk_32k,      .value = 2},
+       { .input = &tegra_pll_m,        .value = 3},
+       { .input = &tegra_pll_p,        .value = 4},
+       { .input = &tegra_pll_p_out4,   .value = 5},
+       { .input = &tegra_pll_p_out3,   .value = 6},
+       /* { .input = &tegra_clk_d,     .value = 7}, - no use on tegra3 */
+       { .input = &tegra_pll_x,        .value = 8},
+       { 0, 0},
+};
+
+static struct clk_mux_sel mux_cclk_lp[] = {
        { .input = &tegra_clk_m,        .value = 0},
        { .input = &tegra_pll_c,        .value = 1},
        { .input = &tegra_clk_32k,      .value = 2},
@@ -2786,6 +2912,7 @@ static struct clk_mux_sel mux_cclk[] = {
        { .input = &tegra_pll_p_out3,   .value = 6},
        /* { .input = &tegra_clk_d,     .value = 7}, - no use on tegra3 */
        { .input = &tegra_pll_x_out0,   .value = 8},
+       { .input = &tegra_pll_x,        .value = 8 | SUPER_LP_DIV2_BYPASS},
        { 0, 0},
 };
 
@@ -2801,10 +2928,19 @@ static struct clk_mux_sel mux_sclk[] = {
        { 0, 0},
 };
 
-static struct clk tegra_clk_cclk = {
-       .name   = "cclk",
-       .inputs = mux_cclk,
-       .reg    = 0x20,
+static struct clk tegra_clk_cclk_g = {
+       .name   = "cclk_g",
+       .inputs = mux_cclk_g,
+       .reg    = 0x368,
+       .ops    = &tegra_super_ops,
+       .max_rate = 1000000000,
+};
+
+static struct clk tegra_clk_cclk_lp = {
+       .name   = "cclk_lp",
+       .flags  = DIV_2,
+       .inputs = mux_cclk_lp,
+       .reg    = 0x370,
        .ops    = &tegra_super_ops,
        .max_rate = 1000000000,
 };
@@ -2818,18 +2954,43 @@ static struct clk tegra_clk_sclk = {
        .min_rate = 120000000,
 };
 
-static struct clk tegra_clk_virtual_cpu = {
-       .name      = "cpu",
-       .parent    = &tegra_clk_cclk,
+static struct clk tegra_clk_virtual_cpu_g = {
+       .name      = "cpu_g",
+       .parent    = &tegra_clk_cclk_g,
        .ops       = &tegra_cpu_ops,
        .max_rate  = 1000000000,
        .u.cpu = {
-               .main      = &tegra_pll_x_out0,
+               .main      = &tegra_pll_x,
                .backup    = &tegra_pll_p,
-               .lp_max_rate = 456000000,
+               .mode      = MODE_G,
        },
 };
 
+static struct clk tegra_clk_virtual_cpu_lp = {
+       .name      = "cpu_lp",
+       .parent    = &tegra_clk_cclk_lp,
+       .ops       = &tegra_cpu_ops,
+       .max_rate  = 456000000,
+       .u.cpu = {
+               .main      = &tegra_pll_x,
+               .backup    = &tegra_pll_p,
+               .mode      = MODE_LP,
+       },
+};
+
+static struct clk_mux_sel mux_cpu_cmplx[] = {
+       { .input = &tegra_clk_virtual_cpu_g,    .value = 0},
+       { .input = &tegra_clk_virtual_cpu_lp,   .value = 1},
+       { 0, 0},
+};
+
+static struct clk tegra_clk_cpu_cmplx = {
+       .name      = "cpu",
+       .inputs    = mux_cpu_cmplx,
+       .ops       = &tegra_cpu_cmplx_ops,
+       .max_rate  = 1000000000,
+};
+
 static struct clk tegra_clk_twd = {
        .name     = "twd",
        .parent   = &tegra_clk_cclk,
@@ -2843,7 +3004,7 @@ static struct clk tegra_clk_cop = {
        .name      = "cop",
        .parent    = &tegra_clk_sclk,
        .ops       = &tegra_cop_ops,
-       .max_rate  = 335000000,
+       .max_rate  = 333500000,
 };
 
 static struct clk tegra_clk_hclk = {
@@ -3220,11 +3381,14 @@ struct clk *tegra_ptr_clks[] = {
        &tegra_cml0_clk,
        &tegra_cml1_clk,
        &tegra_pciex_clk,
-       &tegra_clk_cclk,
+       &tegra_clk_cclk_g,
+       &tegra_clk_cclk_lp,
        &tegra_clk_sclk,
        &tegra_clk_hclk,
        &tegra_clk_pclk,
-       &tegra_clk_virtual_cpu,
+       &tegra_clk_virtual_cpu_g,
+       &tegra_clk_virtual_cpu_lp,
+       &tegra_clk_cpu_cmplx,
        &tegra_clk_blink,
        &tegra_clk_cop,
        &tegra_clk_emc,
@@ -3276,11 +3440,6 @@ void __init tegra_soc_init_clocks(void)
                tegra3_init_one_clock(&tegra_clk_out_list[i]);
 }
 
-unsigned long tegra_get_lpcpu_max_rate(void)
-{
-       return tegra_clk_virtual_cpu.u.cpu.lp_max_rate;
-}
-
 #ifdef CONFIG_CPU_FREQ
 
 /*
@@ -3345,7 +3504,7 @@ unsigned long tegra_emc_to_cpu_ratio(unsigned long cpu_rate)
 
 #ifdef CONFIG_PM
 static u32 clk_rst_suspend[RST_DEVICES_NUM + CLK_OUT_ENB_NUM +
-                          PERIPH_CLK_SOURCE_NUM + 16];
+                          PERIPH_CLK_SOURCE_NUM + 18];
 
 void tegra_clk_suspend(void)
 {
@@ -3363,8 +3522,10 @@ void tegra_clk_suspend(void)
        *ctx++ = clk_readl(tegra_pll_a_out0.reg);
        *ctx++ = clk_readl(tegra_pll_c_out1.reg);
 
-       *ctx++ = clk_readl(tegra_clk_cclk.reg);
-       *ctx++ = clk_readl(tegra_clk_cclk.reg + SUPER_CLK_DIVIDER);
+       *ctx++ = clk_readl(tegra_clk_cclk_g.reg);
+       *ctx++ = clk_readl(tegra_clk_cclk_g.reg + SUPER_CLK_DIVIDER);
+       *ctx++ = clk_readl(tegra_clk_cclk_lp.reg);
+       *ctx++ = clk_readl(tegra_clk_cclk_lp.reg + SUPER_CLK_DIVIDER);
 
        *ctx++ = clk_readl(tegra_clk_sclk.reg);
        *ctx++ = clk_readl(tegra_clk_sclk.reg + SUPER_CLK_DIVIDER);
@@ -3419,8 +3580,10 @@ void tegra_clk_resume(void)
        clk_writel(*ctx++, tegra_pll_a_out0.reg);
        clk_writel(*ctx++, tegra_pll_c_out1.reg);
 
-       clk_writel(*ctx++, tegra_clk_cclk.reg);
-       clk_writel(*ctx++, tegra_clk_cclk.reg + SUPER_CLK_DIVIDER);
+       clk_writel(*ctx++, tegra_clk_cclk_g.reg);
+       clk_writel(*ctx++, tegra_clk_cclk_g.reg + SUPER_CLK_DIVIDER);
+       clk_writel(*ctx++, tegra_clk_cclk_lp.reg);
+       clk_writel(*ctx++, tegra_clk_cclk_lp.reg + SUPER_CLK_DIVIDER);
 
        clk_writel(*ctx++, tegra_clk_sclk.reg);
        clk_writel(*ctx++, tegra_clk_sclk.reg + SUPER_CLK_DIVIDER);