gpu: nvgpu: Remove redundant locked variable
[linux-3.10.git] / drivers / gpu / nvgpu / gk20a / pmu_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/pmu_gk20a.c
3  *
4  * GK20A PMU (aka. gPMU outside gk20a context)
5  *
6  * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/delay.h>        /* for mdelay */
23 #include <linux/firmware.h>
24 #include <linux/clk.h>
25 #include <linux/module.h>
26 #include <linux/debugfs.h>
27 #include <linux/dma-mapping.h>
28
29 #include "gk20a.h"
30 #include "hw_mc_gk20a.h"
31 #include "hw_pwr_gk20a.h"
32 #include "hw_top_gk20a.h"
33
34 #define GK20A_PMU_UCODE_IMAGE   "gpmu_ucode.bin"
35
36 #define gk20a_dbg_pmu(fmt, arg...) \
37         gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
38
39 static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu);
40 static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
41                 u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt);
42 static void gk20a_init_pmu_setup_hw2_workqueue(struct work_struct *work);
43 static void pmu_save_zbc(struct gk20a *g, u32 entries);
44 static void ap_callback_init_and_enable_ctrl(
45                 struct gk20a *g, struct pmu_msg *msg,
46                 void *param, u32 seq_desc, u32 status);
47 static int gk20a_pmu_ap_send_command(struct gk20a *g,
48                         union pmu_ap_cmd *p_ap_cmd, bool b_block);
49
50 static u32 pmu_cmdline_size_v0(struct pmu_gk20a *pmu)
51 {
52         return sizeof(struct pmu_cmdline_args_v0);
53 }
54
55 static u32 pmu_cmdline_size_v1(struct pmu_gk20a *pmu)
56 {
57         return sizeof(struct pmu_cmdline_args_v1);
58 }
59
60 static void set_pmu_cmdline_args_cpufreq_v1(struct pmu_gk20a *pmu, u32 freq)
61 {
62         pmu->args_v1.cpu_freq_hz = freq;
63 }
64
65 static void set_pmu_cmdline_args_cpufreq_v0(struct pmu_gk20a *pmu, u32 freq)
66 {
67         pmu->args_v0.cpu_freq_hz = freq;
68 }
69
70 static void *get_pmu_cmdline_args_ptr_v1(struct pmu_gk20a *pmu)
71 {
72         return (void *)(&pmu->args_v1);
73 }
74
75 static void *get_pmu_cmdline_args_ptr_v0(struct pmu_gk20a *pmu)
76 {
77         return (void *)(&pmu->args_v0);
78 }
79
80 static u32 get_pmu_allocation_size_v1(struct pmu_gk20a *pmu)
81 {
82         return sizeof(struct pmu_allocation_v1);
83 }
84
85 static u32 get_pmu_allocation_size_v0(struct pmu_gk20a *pmu)
86 {
87         return sizeof(struct pmu_allocation_v0);
88 }
89
90 static void set_pmu_allocation_ptr_v1(struct pmu_gk20a *pmu,
91         void **pmu_alloc_ptr, void *assign_ptr)
92 {
93         struct pmu_allocation_v1 **pmu_a_ptr =
94                 (struct pmu_allocation_v1 **)pmu_alloc_ptr;
95         *pmu_a_ptr = (struct pmu_allocation_v1 *)assign_ptr;
96 }
97
98 static void set_pmu_allocation_ptr_v0(struct pmu_gk20a *pmu,
99         void **pmu_alloc_ptr, void *assign_ptr)
100 {
101         struct pmu_allocation_v0 **pmu_a_ptr =
102                 (struct pmu_allocation_v0 **)pmu_alloc_ptr;
103         *pmu_a_ptr = (struct pmu_allocation_v0 *)assign_ptr;
104 }
105
106 static void pmu_allocation_set_dmem_size_v1(struct pmu_gk20a *pmu,
107         void *pmu_alloc_ptr, u16 size)
108 {
109         struct pmu_allocation_v1 *pmu_a_ptr =
110                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
111         pmu_a_ptr->alloc.dmem.size = size;
112 }
113
114 static void pmu_allocation_set_dmem_size_v0(struct pmu_gk20a *pmu,
115         void *pmu_alloc_ptr, u16 size)
116 {
117         struct pmu_allocation_v0 *pmu_a_ptr =
118                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
119         pmu_a_ptr->alloc.dmem.size = size;
120 }
121
122 static u16 pmu_allocation_get_dmem_size_v1(struct pmu_gk20a *pmu,
123         void *pmu_alloc_ptr)
124 {
125         struct pmu_allocation_v1 *pmu_a_ptr =
126                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
127         return pmu_a_ptr->alloc.dmem.size;
128 }
129
130 static u16 pmu_allocation_get_dmem_size_v0(struct pmu_gk20a *pmu,
131         void *pmu_alloc_ptr)
132 {
133         struct pmu_allocation_v0 *pmu_a_ptr =
134                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
135         return pmu_a_ptr->alloc.dmem.size;
136 }
137
138 static u32 pmu_allocation_get_dmem_offset_v1(struct pmu_gk20a *pmu,
139         void *pmu_alloc_ptr)
140 {
141         struct pmu_allocation_v1 *pmu_a_ptr =
142                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
143         return pmu_a_ptr->alloc.dmem.offset;
144 }
145
146 static u32 pmu_allocation_get_dmem_offset_v0(struct pmu_gk20a *pmu,
147         void *pmu_alloc_ptr)
148 {
149         struct pmu_allocation_v0 *pmu_a_ptr =
150                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
151         return pmu_a_ptr->alloc.dmem.offset;
152 }
153
154 static u32 *pmu_allocation_get_dmem_offset_addr_v1(struct pmu_gk20a *pmu,
155         void *pmu_alloc_ptr)
156 {
157         struct pmu_allocation_v1 *pmu_a_ptr =
158                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
159         return &pmu_a_ptr->alloc.dmem.offset;
160 }
161
162 static u32 *pmu_allocation_get_dmem_offset_addr_v0(struct pmu_gk20a *pmu,
163         void *pmu_alloc_ptr)
164 {
165         struct pmu_allocation_v0 *pmu_a_ptr =
166                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
167         return &pmu_a_ptr->alloc.dmem.offset;
168 }
169
170 static void pmu_allocation_set_dmem_offset_v1(struct pmu_gk20a *pmu,
171         void *pmu_alloc_ptr, u32 offset)
172 {
173         struct pmu_allocation_v1 *pmu_a_ptr =
174                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
175         pmu_a_ptr->alloc.dmem.offset = offset;
176 }
177
178 static void pmu_allocation_set_dmem_offset_v0(struct pmu_gk20a *pmu,
179         void *pmu_alloc_ptr, u32 offset)
180 {
181         struct pmu_allocation_v0 *pmu_a_ptr =
182                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
183         pmu_a_ptr->alloc.dmem.offset = offset;
184 }
185
186 static void *get_pmu_msg_pmu_init_msg_ptr_v1(struct pmu_init_msg *init)
187 {
188         return (void *)(&(init->pmu_init_v1));
189 }
190
191 static u16 get_pmu_init_msg_pmu_sw_mg_off_v1(union pmu_init_msg_pmu *init_msg)
192 {
193         struct pmu_init_msg_pmu_v1 *init =
194                 (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
195         return init->sw_managed_area_offset;
196 }
197
198 static u16 get_pmu_init_msg_pmu_sw_mg_size_v1(union pmu_init_msg_pmu *init_msg)
199 {
200         struct pmu_init_msg_pmu_v1 *init =
201                 (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
202         return init->sw_managed_area_size;
203 }
204
205 static void *get_pmu_msg_pmu_init_msg_ptr_v0(struct pmu_init_msg *init)
206 {
207         return (void *)(&(init->pmu_init_v0));
208 }
209
210 static u16 get_pmu_init_msg_pmu_sw_mg_off_v0(union pmu_init_msg_pmu *init_msg)
211 {
212         struct pmu_init_msg_pmu_v0 *init =
213                 (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
214         return init->sw_managed_area_offset;
215 }
216
217 static u16 get_pmu_init_msg_pmu_sw_mg_size_v0(union pmu_init_msg_pmu *init_msg)
218 {
219         struct pmu_init_msg_pmu_v0 *init =
220                 (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
221         return init->sw_managed_area_size;
222 }
223
224 static u32 get_pmu_perfmon_cmd_start_size_v1(void)
225 {
226         return sizeof(struct pmu_perfmon_cmd_start_v1);
227 }
228
229 static u32 get_pmu_perfmon_cmd_start_size_v0(void)
230 {
231         return sizeof(struct pmu_perfmon_cmd_start_v0);
232 }
233
234 static int get_perfmon_cmd_start_offsetofvar_v1(
235         enum pmu_perfmon_cmd_start_fields field)
236 {
237         switch (field) {
238         case COUNTER_ALLOC:
239                 return offsetof(struct pmu_perfmon_cmd_start_v1,
240                 counter_alloc);
241         default:
242                 return -EINVAL;
243                 break;
244         }
245         return 0;
246 }
247
248 static int get_perfmon_cmd_start_offsetofvar_v0(
249         enum pmu_perfmon_cmd_start_fields field)
250 {
251         switch (field) {
252         case COUNTER_ALLOC:
253                 return offsetof(struct pmu_perfmon_cmd_start_v0,
254                 counter_alloc);
255         default:
256                 return -EINVAL;
257                 break;
258         }
259         return 0;
260 }
261
262 static u32 get_pmu_perfmon_cmd_init_size_v1(void)
263 {
264         return sizeof(struct pmu_perfmon_cmd_init_v1);
265 }
266
267 static u32 get_pmu_perfmon_cmd_init_size_v0(void)
268 {
269         return sizeof(struct pmu_perfmon_cmd_init_v0);
270 }
271
272 static int get_perfmon_cmd_init_offsetofvar_v1(
273         enum pmu_perfmon_cmd_start_fields field)
274 {
275         switch (field) {
276         case COUNTER_ALLOC:
277                 return offsetof(struct pmu_perfmon_cmd_init_v1,
278                 counter_alloc);
279         default:
280                 return -EINVAL;
281                 break;
282         }
283         return 0;
284 }
285
286 static int get_perfmon_cmd_init_offsetofvar_v0(
287         enum pmu_perfmon_cmd_start_fields field)
288 {
289         switch (field) {
290         case COUNTER_ALLOC:
291                 return offsetof(struct pmu_perfmon_cmd_init_v0,
292                 counter_alloc);
293         default:
294                 return -EINVAL;
295                 break;
296         }
297         return 0;
298 }
299
300 static void perfmon_start_set_cmd_type_v1(struct pmu_perfmon_cmd *pc, u8 value)
301 {
302         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
303         start->cmd_type = value;
304 }
305
306 static void perfmon_start_set_cmd_type_v0(struct pmu_perfmon_cmd *pc, u8 value)
307 {
308         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
309         start->cmd_type = value;
310 }
311
312 static void perfmon_start_set_group_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
313 {
314         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
315         start->group_id = value;
316 }
317
318 static void perfmon_start_set_group_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
319 {
320         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
321         start->group_id = value;
322 }
323
324 static void perfmon_start_set_state_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
325 {
326         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
327         start->state_id = value;
328 }
329
330 static void perfmon_start_set_state_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
331 {
332         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
333         start->state_id = value;
334 }
335
336 static void perfmon_start_set_flags_v1(struct pmu_perfmon_cmd *pc, u8 value)
337 {
338         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
339         start->flags = value;
340 }
341
342 static void perfmon_start_set_flags_v0(struct pmu_perfmon_cmd *pc, u8 value)
343 {
344         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
345         start->flags = value;
346 }
347
348 static u8 perfmon_start_get_flags_v1(struct pmu_perfmon_cmd *pc)
349 {
350         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
351         return start->flags;
352 }
353
354 static u8 perfmon_start_get_flags_v0(struct pmu_perfmon_cmd *pc)
355 {
356         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
357         return start->flags;
358 }
359
360 static void perfmon_cmd_init_set_sample_buffer_v1(struct pmu_perfmon_cmd *pc,
361         u16 value)
362 {
363         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
364         init->sample_buffer = value;
365 }
366
367 static void perfmon_cmd_init_set_sample_buffer_v0(struct pmu_perfmon_cmd *pc,
368         u16 value)
369 {
370         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
371         init->sample_buffer = value;
372 }
373
374 static void perfmon_cmd_init_set_dec_cnt_v1(struct pmu_perfmon_cmd *pc,
375         u8 value)
376 {
377         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
378         init->to_decrease_count = value;
379 }
380
381 static void perfmon_cmd_init_set_dec_cnt_v0(struct pmu_perfmon_cmd *pc,
382         u8 value)
383 {
384         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
385         init->to_decrease_count = value;
386 }
387
388 static void perfmon_cmd_init_set_base_cnt_id_v1(struct pmu_perfmon_cmd *pc,
389         u8 value)
390 {
391         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
392         init->base_counter_id = value;
393 }
394
395 static void perfmon_cmd_init_set_base_cnt_id_v0(struct pmu_perfmon_cmd *pc,
396         u8 value)
397 {
398         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
399         init->base_counter_id = value;
400 }
401
402 static void perfmon_cmd_init_set_samp_period_us_v1(struct pmu_perfmon_cmd *pc,
403         u32 value)
404 {
405         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
406         init->sample_period_us = value;
407 }
408
409 static void perfmon_cmd_init_set_samp_period_us_v0(struct pmu_perfmon_cmd *pc,
410         u32 value)
411 {
412         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
413         init->sample_period_us = value;
414 }
415
416 static void perfmon_cmd_init_set_num_cnt_v1(struct pmu_perfmon_cmd *pc,
417         u8 value)
418 {
419         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
420         init->num_counters = value;
421 }
422
423 static void perfmon_cmd_init_set_num_cnt_v0(struct pmu_perfmon_cmd *pc,
424         u8 value)
425 {
426         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
427         init->num_counters = value;
428 }
429
430 static void perfmon_cmd_init_set_mov_avg_v1(struct pmu_perfmon_cmd *pc,
431         u8 value)
432 {
433         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
434         init->samples_in_moving_avg = value;
435 }
436
437 static void perfmon_cmd_init_set_mov_avg_v0(struct pmu_perfmon_cmd *pc,
438         u8 value)
439 {
440         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
441         init->samples_in_moving_avg = value;
442 }
443
444 static void get_pmu_init_msg_pmu_queue_params_v0(struct pmu_queue *queue,
445         u32 id, void *pmu_init_msg)
446 {
447         struct pmu_init_msg_pmu_v0 *init =
448                 (struct pmu_init_msg_pmu_v0 *)pmu_init_msg;
449         queue->index    = init->queue_info[id].index;
450         queue->offset   = init->queue_info[id].offset;
451         queue->size = init->queue_info[id].size;
452 }
453
454 static void get_pmu_init_msg_pmu_queue_params_v1(struct pmu_queue *queue,
455         u32 id, void *pmu_init_msg)
456 {
457         struct pmu_init_msg_pmu_v1 *init =
458                 (struct pmu_init_msg_pmu_v1 *)pmu_init_msg;
459         queue->index    = init->queue_info[id].index;
460         queue->offset   = init->queue_info[id].offset;
461         queue->size = init->queue_info[id].size;
462 }
463
464 static void *get_pmu_sequence_in_alloc_ptr_v1(struct pmu_sequence *seq)
465 {
466         return (void *)(&seq->in_v1);
467 }
468
469 static void *get_pmu_sequence_in_alloc_ptr_v0(struct pmu_sequence *seq)
470 {
471         return (void *)(&seq->in_v0);
472 }
473
474 static void *get_pmu_sequence_out_alloc_ptr_v1(struct pmu_sequence *seq)
475 {
476         return (void *)(&seq->out_v1);
477 }
478
479 static void *get_pmu_sequence_out_alloc_ptr_v0(struct pmu_sequence *seq)
480 {
481         return (void *)(&seq->out_v0);
482 }
483
484 static int gk20a_init_pmu(struct pmu_gk20a *pmu)
485 {
486         struct gk20a *g = pmu->g;
487         switch (pmu->desc->app_version) {
488         case APP_VERSION_1:
489                 g->ops.pmu_ver.cmd_id_zbc_table_update = 16;
490                 g->ops.pmu_ver.get_pmu_cmdline_args_size =
491                         pmu_cmdline_size_v1;
492                 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
493                         set_pmu_cmdline_args_cpufreq_v1;
494                 g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
495                         get_pmu_cmdline_args_ptr_v1;
496                 g->ops.pmu_ver.get_pmu_allocation_struct_size =
497                         get_pmu_allocation_size_v1;
498                 g->ops.pmu_ver.set_pmu_allocation_ptr =
499                         set_pmu_allocation_ptr_v1;
500                 g->ops.pmu_ver.pmu_allocation_set_dmem_size =
501                         pmu_allocation_set_dmem_size_v1;
502                 g->ops.pmu_ver.pmu_allocation_get_dmem_size =
503                         pmu_allocation_get_dmem_size_v1;
504                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
505                         pmu_allocation_get_dmem_offset_v1;
506                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
507                         pmu_allocation_get_dmem_offset_addr_v1;
508                 g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
509                         pmu_allocation_set_dmem_offset_v1;
510                 g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
511                         get_pmu_init_msg_pmu_queue_params_v1;
512                 g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
513                         get_pmu_msg_pmu_init_msg_ptr_v1;
514                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
515                         get_pmu_init_msg_pmu_sw_mg_off_v1;
516                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
517                         get_pmu_init_msg_pmu_sw_mg_size_v1;
518                 g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
519                         get_pmu_perfmon_cmd_start_size_v1;
520                 g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
521                         get_perfmon_cmd_start_offsetofvar_v1;
522                 g->ops.pmu_ver.perfmon_start_set_cmd_type =
523                         perfmon_start_set_cmd_type_v1;
524                 g->ops.pmu_ver.perfmon_start_set_group_id =
525                         perfmon_start_set_group_id_v1;
526                 g->ops.pmu_ver.perfmon_start_set_state_id =
527                         perfmon_start_set_state_id_v1;
528                 g->ops.pmu_ver.perfmon_start_set_flags =
529                         perfmon_start_set_flags_v1;
530                 g->ops.pmu_ver.perfmon_start_get_flags =
531                         perfmon_start_get_flags_v1;
532                 g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
533                         get_pmu_perfmon_cmd_init_size_v1;
534                 g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
535                         get_perfmon_cmd_init_offsetofvar_v1;
536                 g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
537                         perfmon_cmd_init_set_sample_buffer_v1;
538                 g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
539                         perfmon_cmd_init_set_dec_cnt_v1;
540                 g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
541                         perfmon_cmd_init_set_base_cnt_id_v1;
542                 g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
543                         perfmon_cmd_init_set_samp_period_us_v1;
544                 g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
545                         perfmon_cmd_init_set_num_cnt_v1;
546                 g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
547                         perfmon_cmd_init_set_mov_avg_v1;
548                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
549                         get_pmu_sequence_in_alloc_ptr_v1;
550                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
551                         get_pmu_sequence_out_alloc_ptr_v1;
552                 break;
553         case APP_VERSION_0:
554                 g->ops.pmu_ver.cmd_id_zbc_table_update = 14;
555                 g->ops.pmu_ver.get_pmu_cmdline_args_size =
556                         pmu_cmdline_size_v0;
557                 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
558                         set_pmu_cmdline_args_cpufreq_v0;
559                 g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
560                         get_pmu_cmdline_args_ptr_v0;
561                 g->ops.pmu_ver.get_pmu_allocation_struct_size =
562                         get_pmu_allocation_size_v0;
563                 g->ops.pmu_ver.set_pmu_allocation_ptr =
564                         set_pmu_allocation_ptr_v0;
565                 g->ops.pmu_ver.pmu_allocation_set_dmem_size =
566                         pmu_allocation_set_dmem_size_v0;
567                 g->ops.pmu_ver.pmu_allocation_get_dmem_size =
568                         pmu_allocation_get_dmem_size_v0;
569                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
570                         pmu_allocation_get_dmem_offset_v0;
571                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
572                         pmu_allocation_get_dmem_offset_addr_v0;
573                 g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
574                         pmu_allocation_set_dmem_offset_v0;
575                 g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
576                         get_pmu_init_msg_pmu_queue_params_v0;
577                 g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
578                         get_pmu_msg_pmu_init_msg_ptr_v0;
579                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
580                         get_pmu_init_msg_pmu_sw_mg_off_v0;
581                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
582                         get_pmu_init_msg_pmu_sw_mg_size_v0;
583                 g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
584                         get_pmu_perfmon_cmd_start_size_v0;
585                 g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
586                         get_perfmon_cmd_start_offsetofvar_v0;
587                 g->ops.pmu_ver.perfmon_start_set_cmd_type =
588                         perfmon_start_set_cmd_type_v0;
589                 g->ops.pmu_ver.perfmon_start_set_group_id =
590                         perfmon_start_set_group_id_v0;
591                 g->ops.pmu_ver.perfmon_start_set_state_id =
592                         perfmon_start_set_state_id_v0;
593                 g->ops.pmu_ver.perfmon_start_set_flags =
594                         perfmon_start_set_flags_v0;
595                 g->ops.pmu_ver.perfmon_start_get_flags =
596                         perfmon_start_get_flags_v0;
597                 g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
598                         get_pmu_perfmon_cmd_init_size_v0;
599                 g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
600                         get_perfmon_cmd_init_offsetofvar_v0;
601                 g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
602                         perfmon_cmd_init_set_sample_buffer_v0;
603                 g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
604                         perfmon_cmd_init_set_dec_cnt_v0;
605                 g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
606                         perfmon_cmd_init_set_base_cnt_id_v0;
607                 g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
608                         perfmon_cmd_init_set_samp_period_us_v0;
609                 g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
610                         perfmon_cmd_init_set_num_cnt_v0;
611                 g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
612                         perfmon_cmd_init_set_mov_avg_v0;
613                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
614                         get_pmu_sequence_in_alloc_ptr_v0;
615                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
616                         get_pmu_sequence_out_alloc_ptr_v0;
617                 break;
618         default:
619                 gk20a_err(dev_from_gk20a(pmu->g),
620                 "PMU code version not supported\n");
621                 return -EINVAL;
622                 break;
623         }
624         return 0;
625 }
626
627 static void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
628                 u32 src, u8 *dst, u32 size, u8 port)
629 {
630         struct gk20a *g = pmu->g;
631         u32 i, words, bytes;
632         u32 data, addr_mask;
633         u32 *dst_u32 = (u32*)dst;
634
635         if (size == 0) {
636                 gk20a_err(dev_from_gk20a(g),
637                         "size is zero");
638                 return;
639         }
640
641         if (src & 0x3) {
642                 gk20a_err(dev_from_gk20a(g),
643                         "src (0x%08x) not 4-byte aligned", src);
644                 return;
645         }
646
647         mutex_lock(&pmu->pmu_copy_lock);
648
649         words = size >> 2;
650         bytes = size & 0x3;
651
652         addr_mask = pwr_falcon_dmemc_offs_m() |
653                     pwr_falcon_dmemc_blk_m();
654
655         src &= addr_mask;
656
657         gk20a_writel(g, pwr_falcon_dmemc_r(port),
658                 src | pwr_falcon_dmemc_aincr_f(1));
659
660         for (i = 0; i < words; i++)
661                 dst_u32[i] = gk20a_readl(g, pwr_falcon_dmemd_r(port));
662
663         if (bytes > 0) {
664                 data = gk20a_readl(g, pwr_falcon_dmemd_r(port));
665                 for (i = 0; i < bytes; i++) {
666                         dst[(words << 2) + i] = ((u8 *)&data)[i];
667                         gk20a_dbg_pmu("read: dst_u8[%d]=0x%08x",
668                                         i, dst[(words << 2) + i]);
669                 }
670         }
671         mutex_unlock(&pmu->pmu_copy_lock);
672         return;
673 }
674
675 static void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
676                 u32 dst, u8 *src, u32 size, u8 port)
677 {
678         struct gk20a *g = pmu->g;
679         u32 i, words, bytes;
680         u32 data, addr_mask;
681         u32 *src_u32 = (u32*)src;
682
683         if (size == 0) {
684                 gk20a_err(dev_from_gk20a(g),
685                         "size is zero");
686                 return;
687         }
688
689         if (dst & 0x3) {
690                 gk20a_err(dev_from_gk20a(g),
691                         "dst (0x%08x) not 4-byte aligned", dst);
692                 return;
693         }
694
695         mutex_lock(&pmu->pmu_copy_lock);
696
697         words = size >> 2;
698         bytes = size & 0x3;
699
700         addr_mask = pwr_falcon_dmemc_offs_m() |
701                     pwr_falcon_dmemc_blk_m();
702
703         dst &= addr_mask;
704
705         gk20a_writel(g, pwr_falcon_dmemc_r(port),
706                 dst | pwr_falcon_dmemc_aincw_f(1));
707
708         for (i = 0; i < words; i++)
709                 gk20a_writel(g, pwr_falcon_dmemd_r(port), src_u32[i]);
710
711         if (bytes > 0) {
712                 data = 0;
713                 for (i = 0; i < bytes; i++)
714                         ((u8 *)&data)[i] = src[(words << 2) + i];
715                 gk20a_writel(g, pwr_falcon_dmemd_r(port), data);
716         }
717
718         data = gk20a_readl(g, pwr_falcon_dmemc_r(port)) & addr_mask;
719         size = ALIGN(size, 4);
720         if (data != dst + size) {
721                 gk20a_err(dev_from_gk20a(g),
722                         "copy failed. bytes written %d, expected %d",
723                         data - dst, size);
724         }
725         mutex_unlock(&pmu->pmu_copy_lock);
726         return;
727 }
728
729 static int pmu_idle(struct pmu_gk20a *pmu)
730 {
731         struct gk20a *g = pmu->g;
732         unsigned long end_jiffies = jiffies +
733                 msecs_to_jiffies(2000);
734         u32 idle_stat;
735
736         /* wait for pmu idle */
737         do {
738                 idle_stat = gk20a_readl(g, pwr_falcon_idlestate_r());
739
740                 if (pwr_falcon_idlestate_falcon_busy_v(idle_stat) == 0 &&
741                     pwr_falcon_idlestate_ext_busy_v(idle_stat) == 0) {
742                         break;
743                 }
744
745                 if (time_after_eq(jiffies, end_jiffies)) {
746                         gk20a_err(dev_from_gk20a(g),
747                                 "timeout waiting pmu idle : 0x%08x",
748                                 idle_stat);
749                         return -EBUSY;
750                 }
751                 usleep_range(100, 200);
752         } while (1);
753
754         gk20a_dbg_fn("done");
755         return 0;
756 }
757
758 static void pmu_enable_irq(struct pmu_gk20a *pmu, bool enable)
759 {
760         struct gk20a *g = pmu->g;
761
762         gk20a_dbg_fn("");
763
764         gk20a_writel(g, mc_intr_mask_0_r(),
765                 gk20a_readl(g, mc_intr_mask_0_r()) &
766                 ~mc_intr_mask_0_pmu_enabled_f());
767         gk20a_writel(g, mc_intr_mask_1_r(),
768                 gk20a_readl(g, mc_intr_mask_1_r()) &
769                 ~mc_intr_mask_1_pmu_enabled_f());
770
771         gk20a_writel(g, pwr_falcon_irqmclr_r(),
772                 pwr_falcon_irqmclr_gptmr_f(1)  |
773                 pwr_falcon_irqmclr_wdtmr_f(1)  |
774                 pwr_falcon_irqmclr_mthd_f(1)   |
775                 pwr_falcon_irqmclr_ctxsw_f(1)  |
776                 pwr_falcon_irqmclr_halt_f(1)   |
777                 pwr_falcon_irqmclr_exterr_f(1) |
778                 pwr_falcon_irqmclr_swgen0_f(1) |
779                 pwr_falcon_irqmclr_swgen1_f(1) |
780                 pwr_falcon_irqmclr_ext_f(0xff));
781
782         if (enable) {
783                 /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 */
784                 gk20a_writel(g, pwr_falcon_irqdest_r(),
785                         pwr_falcon_irqdest_host_gptmr_f(0)    |
786                         pwr_falcon_irqdest_host_wdtmr_f(1)    |
787                         pwr_falcon_irqdest_host_mthd_f(0)     |
788                         pwr_falcon_irqdest_host_ctxsw_f(0)    |
789                         pwr_falcon_irqdest_host_halt_f(1)     |
790                         pwr_falcon_irqdest_host_exterr_f(0)   |
791                         pwr_falcon_irqdest_host_swgen0_f(1)   |
792                         pwr_falcon_irqdest_host_swgen1_f(0)   |
793                         pwr_falcon_irqdest_host_ext_f(0xff)   |
794                         pwr_falcon_irqdest_target_gptmr_f(1)  |
795                         pwr_falcon_irqdest_target_wdtmr_f(0)  |
796                         pwr_falcon_irqdest_target_mthd_f(0)   |
797                         pwr_falcon_irqdest_target_ctxsw_f(0)  |
798                         pwr_falcon_irqdest_target_halt_f(0)   |
799                         pwr_falcon_irqdest_target_exterr_f(0) |
800                         pwr_falcon_irqdest_target_swgen0_f(0) |
801                         pwr_falcon_irqdest_target_swgen1_f(0) |
802                         pwr_falcon_irqdest_target_ext_f(0xff));
803
804                 /* 0=disable, 1=enable */
805                 gk20a_writel(g, pwr_falcon_irqmset_r(),
806                         pwr_falcon_irqmset_gptmr_f(1)  |
807                         pwr_falcon_irqmset_wdtmr_f(1)  |
808                         pwr_falcon_irqmset_mthd_f(0)   |
809                         pwr_falcon_irqmset_ctxsw_f(0)  |
810                         pwr_falcon_irqmset_halt_f(1)   |
811                         pwr_falcon_irqmset_exterr_f(1) |
812                         pwr_falcon_irqmset_swgen0_f(1) |
813                         pwr_falcon_irqmset_swgen1_f(1));
814
815                 gk20a_writel(g, mc_intr_mask_0_r(),
816                         gk20a_readl(g, mc_intr_mask_0_r()) |
817                         mc_intr_mask_0_pmu_enabled_f());
818         }
819
820         gk20a_dbg_fn("done");
821 }
822
823 static int pmu_enable_hw(struct pmu_gk20a *pmu, bool enable)
824 {
825         struct gk20a *g = pmu->g;
826
827         gk20a_dbg_fn("");
828
829         if (enable) {
830                 int retries = GR_IDLE_CHECK_MAX / GR_IDLE_CHECK_DEFAULT;
831                 gk20a_enable(g, mc_enable_pwr_enabled_f());
832
833                 do {
834                         u32 w = gk20a_readl(g, pwr_falcon_dmactl_r()) &
835                                 (pwr_falcon_dmactl_dmem_scrubbing_m() |
836                                  pwr_falcon_dmactl_imem_scrubbing_m());
837
838                         if (!w) {
839                                 gk20a_dbg_fn("done");
840                                 return 0;
841                         }
842                         udelay(GR_IDLE_CHECK_DEFAULT);
843                 } while (--retries || !tegra_platform_is_silicon());
844
845                 gk20a_disable(g, mc_enable_pwr_enabled_f());
846                 gk20a_err(dev_from_gk20a(g), "Falcon mem scrubbing timeout");
847
848                 return -ETIMEDOUT;
849         } else {
850                 gk20a_disable(g, mc_enable_pwr_enabled_f());
851                 return 0;
852         }
853 }
854
855 static int pmu_enable(struct pmu_gk20a *pmu, bool enable)
856 {
857         struct gk20a *g = pmu->g;
858         u32 pmc_enable;
859         int err;
860
861         gk20a_dbg_fn("");
862
863         if (!enable) {
864                 pmc_enable = gk20a_readl(g, mc_enable_r());
865                 if (mc_enable_pwr_v(pmc_enable) !=
866                     mc_enable_pwr_disabled_v()) {
867
868                         pmu_enable_irq(pmu, false);
869                         pmu_enable_hw(pmu, false);
870                 }
871         } else {
872                 err = pmu_enable_hw(pmu, true);
873                 if (err)
874                         return err;
875
876                 /* TBD: post reset */
877
878                 err = pmu_idle(pmu);
879                 if (err)
880                         return err;
881
882                 pmu_enable_irq(pmu, true);
883         }
884
885         gk20a_dbg_fn("done");
886         return 0;
887 }
888
889 static int pmu_reset(struct pmu_gk20a *pmu)
890 {
891         int err;
892
893         err = pmu_idle(pmu);
894         if (err)
895                 return err;
896
897         /* TBD: release pmu hw mutex */
898
899         err = pmu_enable(pmu, false);
900         if (err)
901                 return err;
902
903         /* TBD: cancel all sequences */
904         /* TBD: init all sequences and state tables */
905         /* TBD: restore pre-init message handler */
906
907         err = pmu_enable(pmu, true);
908         if (err)
909                 return err;
910
911         return 0;
912 }
913
914 static int pmu_bootstrap(struct pmu_gk20a *pmu)
915 {
916         struct gk20a *g = pmu->g;
917         struct gk20a_platform *platform = platform_get_drvdata(g->dev);
918         struct mm_gk20a *mm = &g->mm;
919         struct pmu_ucode_desc *desc = pmu->desc;
920         u64 addr_code, addr_data, addr_load;
921         u32 i, blocks, addr_args;
922
923         gk20a_dbg_fn("");
924
925         gk20a_writel(g, pwr_falcon_itfen_r(),
926                 gk20a_readl(g, pwr_falcon_itfen_r()) |
927                 pwr_falcon_itfen_ctxen_enable_f());
928         gk20a_writel(g, pwr_pmu_new_instblk_r(),
929                 pwr_pmu_new_instblk_ptr_f(
930                         mm->pmu.inst_block.cpu_pa >> 12) |
931                 pwr_pmu_new_instblk_valid_f(1) |
932                 pwr_pmu_new_instblk_target_sys_coh_f());
933
934         /* TBD: load all other surfaces */
935
936         g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
937                 clk_get_rate(platform->clk[1]));
938
939         addr_args = (pwr_falcon_hwcfg_dmem_size_v(
940                 gk20a_readl(g, pwr_falcon_hwcfg_r()))
941                         << GK20A_PMU_DMEM_BLKSIZE2) -
942                 g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
943
944         pmu_copy_to_dmem(pmu, addr_args,
945                         (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
946                         g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
947
948         gk20a_writel(g, pwr_falcon_dmemc_r(0),
949                 pwr_falcon_dmemc_offs_f(0) |
950                 pwr_falcon_dmemc_blk_f(0)  |
951                 pwr_falcon_dmemc_aincw_f(1));
952
953         addr_code = u64_lo32((pmu->ucode.pmu_va +
954                         desc->app_start_offset +
955                         desc->app_resident_code_offset) >> 8) ;
956         addr_data = u64_lo32((pmu->ucode.pmu_va +
957                         desc->app_start_offset +
958                         desc->app_resident_data_offset) >> 8);
959         addr_load = u64_lo32((pmu->ucode.pmu_va +
960                         desc->bootloader_start_offset) >> 8);
961
962         gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE);
963         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
964         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_size);
965         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_code_size);
966         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_imem_entry);
967         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_data);
968         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size);
969         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
970         gk20a_writel(g, pwr_falcon_dmemd_r(0), 0x1);
971         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_args);
972
973         gk20a_writel(g, pwr_falcon_dmatrfbase_r(),
974                 addr_load - (desc->bootloader_imem_offset >> 8));
975
976         blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8;
977
978         for (i = 0; i < blocks; i++) {
979                 gk20a_writel(g, pwr_falcon_dmatrfmoffs_r(),
980                         desc->bootloader_imem_offset + (i << 8));
981                 gk20a_writel(g, pwr_falcon_dmatrffboffs_r(),
982                         desc->bootloader_imem_offset + (i << 8));
983                 gk20a_writel(g, pwr_falcon_dmatrfcmd_r(),
984                         pwr_falcon_dmatrfcmd_imem_f(1)  |
985                         pwr_falcon_dmatrfcmd_write_f(0) |
986                         pwr_falcon_dmatrfcmd_size_f(6)  |
987                         pwr_falcon_dmatrfcmd_ctxdma_f(GK20A_PMU_DMAIDX_UCODE));
988         }
989
990         gk20a_writel(g, pwr_falcon_bootvec_r(),
991                 pwr_falcon_bootvec_vec_f(desc->bootloader_entry_point));
992
993         gk20a_writel(g, pwr_falcon_cpuctl_r(),
994                 pwr_falcon_cpuctl_startcpu_f(1));
995
996         gk20a_writel(g, pwr_falcon_os_r(), desc->app_version);
997
998         return 0;
999 }
1000
1001 static void pmu_seq_init(struct pmu_gk20a *pmu)
1002 {
1003         u32 i;
1004
1005         memset(pmu->seq, 0,
1006                 sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES);
1007         memset(pmu->pmu_seq_tbl, 0,
1008                 sizeof(pmu->pmu_seq_tbl));
1009
1010         for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++)
1011                 pmu->seq[i].id = i;
1012 }
1013
1014 static int pmu_seq_acquire(struct pmu_gk20a *pmu,
1015                         struct pmu_sequence **pseq)
1016 {
1017         struct gk20a *g = pmu->g;
1018         struct pmu_sequence *seq;
1019         u32 index;
1020
1021         mutex_lock(&pmu->pmu_seq_lock);
1022         index = find_first_zero_bit(pmu->pmu_seq_tbl,
1023                                 sizeof(pmu->pmu_seq_tbl));
1024         if (index >= sizeof(pmu->pmu_seq_tbl)) {
1025                 gk20a_err(dev_from_gk20a(g),
1026                         "no free sequence available");
1027                 mutex_unlock(&pmu->pmu_seq_lock);
1028                 return -EAGAIN;
1029         }
1030         set_bit(index, pmu->pmu_seq_tbl);
1031         mutex_unlock(&pmu->pmu_seq_lock);
1032
1033         seq = &pmu->seq[index];
1034         seq->state = PMU_SEQ_STATE_PENDING;
1035
1036         *pseq = seq;
1037         return 0;
1038 }
1039
1040 static void pmu_seq_release(struct pmu_gk20a *pmu,
1041                         struct pmu_sequence *seq)
1042 {
1043         struct gk20a *g = pmu->g;
1044         seq->state      = PMU_SEQ_STATE_FREE;
1045         seq->desc       = PMU_INVALID_SEQ_DESC;
1046         seq->callback   = NULL;
1047         seq->cb_params  = NULL;
1048         seq->msg        = NULL;
1049         seq->out_payload = NULL;
1050         g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
1051                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr(seq), 0);
1052         g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
1053                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr(seq), 0);
1054
1055         clear_bit(seq->id, pmu->pmu_seq_tbl);
1056 }
1057
1058 static int pmu_queue_init(struct pmu_gk20a *pmu,
1059                 u32 id, union pmu_init_msg_pmu *init)
1060 {
1061         struct gk20a *g = pmu->g;
1062         struct pmu_queue *queue = &pmu->queue[id];
1063         queue->id       = id;
1064         g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params(queue, id, init);
1065
1066         queue->mutex_id = id;
1067         mutex_init(&queue->mutex);
1068
1069         gk20a_dbg_pmu("queue %d: index %d, offset 0x%08x, size 0x%08x",
1070                 id, queue->index, queue->offset, queue->size);
1071
1072         return 0;
1073 }
1074
1075 static int pmu_queue_head(struct pmu_gk20a *pmu, struct pmu_queue *queue,
1076                         u32 *head, bool set)
1077 {
1078         struct gk20a *g = pmu->g;
1079
1080         BUG_ON(!head);
1081
1082         if (PMU_IS_COMMAND_QUEUE(queue->id)) {
1083
1084                 if (queue->index >= pwr_pmu_queue_head__size_1_v())
1085                         return -EINVAL;
1086
1087                 if (!set)
1088                         *head = pwr_pmu_queue_head_address_v(
1089                                 gk20a_readl(g,
1090                                         pwr_pmu_queue_head_r(queue->index)));
1091                 else
1092                         gk20a_writel(g,
1093                                 pwr_pmu_queue_head_r(queue->index),
1094                                 pwr_pmu_queue_head_address_f(*head));
1095         } else {
1096                 if (!set)
1097                         *head = pwr_pmu_msgq_head_val_v(
1098                                 gk20a_readl(g, pwr_pmu_msgq_head_r()));
1099                 else
1100                         gk20a_writel(g,
1101                                 pwr_pmu_msgq_head_r(),
1102                                 pwr_pmu_msgq_head_val_f(*head));
1103         }
1104
1105         return 0;
1106 }
1107
1108 static int pmu_queue_tail(struct pmu_gk20a *pmu, struct pmu_queue *queue,
1109                         u32 *tail, bool set)
1110 {
1111         struct gk20a *g = pmu->g;
1112
1113         BUG_ON(!tail);
1114
1115         if (PMU_IS_COMMAND_QUEUE(queue->id)) {
1116
1117                 if (queue->index >= pwr_pmu_queue_tail__size_1_v())
1118                         return -EINVAL;
1119
1120                 if (!set)
1121                         *tail = pwr_pmu_queue_tail_address_v(
1122                                 gk20a_readl(g,
1123                                         pwr_pmu_queue_tail_r(queue->index)));
1124                 else
1125                         gk20a_writel(g,
1126                                 pwr_pmu_queue_tail_r(queue->index),
1127                                 pwr_pmu_queue_tail_address_f(*tail));
1128         } else {
1129                 if (!set)
1130                         *tail = pwr_pmu_msgq_tail_val_v(
1131                                 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
1132                 else
1133                         gk20a_writel(g,
1134                                 pwr_pmu_msgq_tail_r(),
1135                                 pwr_pmu_msgq_tail_val_f(*tail));
1136         }
1137
1138         return 0;
1139 }
1140
1141 static inline void pmu_queue_read(struct pmu_gk20a *pmu,
1142                         u32 offset, u8 *dst, u32 size)
1143 {
1144         pmu_copy_from_dmem(pmu, offset, dst, size, 0);
1145 }
1146
1147 static inline void pmu_queue_write(struct pmu_gk20a *pmu,
1148                         u32 offset, u8 *src, u32 size)
1149 {
1150         pmu_copy_to_dmem(pmu, offset, src, size, 0);
1151 }
1152
1153 int pmu_mutex_acquire(struct pmu_gk20a *pmu, u32 id, u32 *token)
1154 {
1155         struct gk20a *g = pmu->g;
1156         struct pmu_mutex *mutex;
1157         u32 data, owner, max_retry;
1158
1159         if (!pmu->initialized)
1160                 return 0;
1161
1162         BUG_ON(!token);
1163         BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
1164         BUG_ON(id > pmu->mutex_cnt);
1165
1166         mutex = &pmu->mutex[id];
1167
1168         owner = pwr_pmu_mutex_value_v(
1169                 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1170
1171         if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) {
1172                 BUG_ON(mutex->ref_cnt == 0);
1173                 gk20a_dbg_pmu("already acquired by owner : 0x%08x", *token);
1174                 mutex->ref_cnt++;
1175                 return 0;
1176         }
1177
1178         max_retry = 40;
1179         do {
1180                 data = pwr_pmu_mutex_id_value_v(
1181                         gk20a_readl(g, pwr_pmu_mutex_id_r()));
1182                 if (data == pwr_pmu_mutex_id_value_init_v() ||
1183                     data == pwr_pmu_mutex_id_value_not_avail_v()) {
1184                         gk20a_warn(dev_from_gk20a(g),
1185                                 "fail to generate mutex token: val 0x%08x",
1186                                 owner);
1187                         usleep_range(20, 40);
1188                         continue;
1189                 }
1190
1191                 owner = data;
1192                 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
1193                         pwr_pmu_mutex_value_f(owner));
1194
1195                 data = pwr_pmu_mutex_value_v(
1196                         gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1197
1198                 if (owner == data) {
1199                         mutex->ref_cnt = 1;
1200                         gk20a_dbg_pmu("mutex acquired: id=%d, token=0x%x",
1201                                 mutex->index, *token);
1202                         *token = owner;
1203                         return 0;
1204                 } else {
1205                         gk20a_dbg_info("fail to acquire mutex idx=0x%08x",
1206                                 mutex->index);
1207
1208                         data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
1209                         data = set_field(data,
1210                                 pwr_pmu_mutex_id_release_value_m(),
1211                                 pwr_pmu_mutex_id_release_value_f(owner));
1212                         gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
1213
1214                         usleep_range(20, 40);
1215                         continue;
1216                 }
1217         } while (max_retry-- > 0);
1218
1219         return -EBUSY;
1220 }
1221
1222 int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token)
1223 {
1224         struct gk20a *g = pmu->g;
1225         struct pmu_mutex *mutex;
1226         u32 owner, data;
1227
1228         if (!pmu->initialized)
1229                 return 0;
1230
1231         BUG_ON(!token);
1232         BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
1233         BUG_ON(id > pmu->mutex_cnt);
1234
1235         mutex = &pmu->mutex[id];
1236
1237         owner = pwr_pmu_mutex_value_v(
1238                 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1239
1240         if (*token != owner) {
1241                 gk20a_err(dev_from_gk20a(g),
1242                         "requester 0x%08x NOT match owner 0x%08x",
1243                         *token, owner);
1244                 return -EINVAL;
1245         }
1246
1247         if (--mutex->ref_cnt == 0) {
1248                 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
1249                         pwr_pmu_mutex_value_initial_lock_f());
1250
1251                 data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
1252                 data = set_field(data, pwr_pmu_mutex_id_release_value_m(),
1253                         pwr_pmu_mutex_id_release_value_f(owner));
1254                 gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
1255
1256                 gk20a_dbg_pmu("mutex released: id=%d, token=0x%x",
1257                         mutex->index, *token);
1258         }
1259
1260         return 0;
1261 }
1262
1263 static int pmu_queue_lock(struct pmu_gk20a *pmu,
1264                         struct pmu_queue *queue)
1265 {
1266         int err;
1267
1268         if (PMU_IS_MESSAGE_QUEUE(queue->id))
1269                 return 0;
1270
1271         if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
1272                 mutex_lock(&queue->mutex);
1273                 return 0;
1274         }
1275
1276         err = pmu_mutex_acquire(pmu, queue->mutex_id, &queue->mutex_lock);
1277         return err;
1278 }
1279
1280 static int pmu_queue_unlock(struct pmu_gk20a *pmu,
1281                         struct pmu_queue *queue)
1282 {
1283         int err;
1284
1285         if (PMU_IS_MESSAGE_QUEUE(queue->id))
1286                 return 0;
1287
1288         if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
1289                 mutex_unlock(&queue->mutex);
1290                 return 0;
1291         }
1292
1293         err = pmu_mutex_release(pmu, queue->mutex_id, &queue->mutex_lock);
1294         return err;
1295 }
1296
1297 /* called by pmu_read_message, no lock */
1298 static bool pmu_queue_is_empty(struct pmu_gk20a *pmu,
1299                         struct pmu_queue *queue)
1300 {
1301         u32 head, tail;
1302
1303         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1304         if (queue->opened && queue->oflag == OFLAG_READ)
1305                 tail = queue->position;
1306         else
1307                 pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
1308
1309         return head == tail;
1310 }
1311
1312 static bool pmu_queue_has_room(struct pmu_gk20a *pmu,
1313                         struct pmu_queue *queue, u32 size, bool *need_rewind)
1314 {
1315         u32 head, tail, free;
1316         bool rewind = false;
1317
1318         size = ALIGN(size, QUEUE_ALIGNMENT);
1319
1320         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1321         pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
1322
1323         if (head >= tail) {
1324                 free = queue->offset + queue->size - head;
1325                 free -= PMU_CMD_HDR_SIZE;
1326
1327                 if (size > free) {
1328                         rewind = true;
1329                         head = queue->offset;
1330                 }
1331         }
1332
1333         if (head < tail)
1334                 free = tail - head - 1;
1335
1336         if (need_rewind)
1337                 *need_rewind = rewind;
1338
1339         return size <= free;
1340 }
1341
1342 static int pmu_queue_push(struct pmu_gk20a *pmu,
1343                         struct pmu_queue *queue, void *data, u32 size)
1344 {
1345         gk20a_dbg_fn("");
1346
1347         if (!queue->opened && queue->oflag == OFLAG_WRITE){
1348                 gk20a_err(dev_from_gk20a(pmu->g),
1349                         "queue not opened for write");
1350                 return -EINVAL;
1351         }
1352
1353         pmu_queue_write(pmu, queue->position, data, size);
1354         queue->position += ALIGN(size, QUEUE_ALIGNMENT);
1355         return 0;
1356 }
1357
1358 static int pmu_queue_pop(struct pmu_gk20a *pmu,
1359                         struct pmu_queue *queue, void *data, u32 size,
1360                         u32 *bytes_read)
1361 {
1362         u32 head, tail, used;
1363
1364         *bytes_read = 0;
1365
1366         if (!queue->opened && queue->oflag == OFLAG_READ){
1367                 gk20a_err(dev_from_gk20a(pmu->g),
1368                         "queue not opened for read");
1369                 return -EINVAL;
1370         }
1371
1372         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1373         tail = queue->position;
1374
1375         if (head == tail)
1376                 return 0;
1377
1378         if (head > tail)
1379                 used = head - tail;
1380         else
1381                 used = queue->offset + queue->size - tail;
1382
1383         if (size > used) {
1384                 gk20a_warn(dev_from_gk20a(pmu->g),
1385                         "queue size smaller than request read");
1386                 size = used;
1387         }
1388
1389         pmu_queue_read(pmu, tail, data, size);
1390         queue->position += ALIGN(size, QUEUE_ALIGNMENT);
1391         *bytes_read = size;
1392         return 0;
1393 }
1394
1395 static void pmu_queue_rewind(struct pmu_gk20a *pmu,
1396                         struct pmu_queue *queue)
1397 {
1398         struct pmu_cmd cmd;
1399
1400         gk20a_dbg_fn("");
1401
1402         if (!queue->opened) {
1403                 gk20a_err(dev_from_gk20a(pmu->g),
1404                         "queue not opened");
1405                 return;
1406         }
1407
1408         if (queue->oflag == OFLAG_WRITE) {
1409                 cmd.hdr.unit_id = PMU_UNIT_REWIND;
1410                 cmd.hdr.size = PMU_CMD_HDR_SIZE;
1411                 pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size);
1412                 gk20a_dbg_pmu("queue %d rewinded", queue->id);
1413         }
1414
1415         queue->position = queue->offset;
1416         return;
1417 }
1418
1419 /* open for read and lock the queue */
1420 static int pmu_queue_open_read(struct pmu_gk20a *pmu,
1421                         struct pmu_queue *queue)
1422 {
1423         int err;
1424
1425         err = pmu_queue_lock(pmu, queue);
1426         if (err)
1427                 return err;
1428
1429         if (queue->opened)
1430                 BUG();
1431
1432         pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET);
1433         queue->oflag = OFLAG_READ;
1434         queue->opened = true;
1435
1436         return 0;
1437 }
1438
1439 /* open for write and lock the queue
1440    make sure there's enough free space for the write */
1441 static int pmu_queue_open_write(struct pmu_gk20a *pmu,
1442                         struct pmu_queue *queue, u32 size)
1443 {
1444         bool rewind = false;
1445         int err;
1446
1447         err = pmu_queue_lock(pmu, queue);
1448         if (err)
1449                 return err;
1450
1451         if (queue->opened)
1452                 BUG();
1453
1454         if (!pmu_queue_has_room(pmu, queue, size, &rewind)) {
1455                 gk20a_err(dev_from_gk20a(pmu->g), "queue full");
1456                 return -EAGAIN;
1457         }
1458
1459         pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET);
1460         queue->oflag = OFLAG_WRITE;
1461         queue->opened = true;
1462
1463         if (rewind)
1464                 pmu_queue_rewind(pmu, queue);
1465
1466         return 0;
1467 }
1468
1469 /* close and unlock the queue */
1470 static int pmu_queue_close(struct pmu_gk20a *pmu,
1471                         struct pmu_queue *queue, bool commit)
1472 {
1473         if (!queue->opened)
1474                 return 0;
1475
1476         if (commit) {
1477                 if (queue->oflag == OFLAG_READ) {
1478                         pmu_queue_tail(pmu, queue,
1479                                 &queue->position, QUEUE_SET);
1480                 }
1481                 else {
1482                         pmu_queue_head(pmu, queue,
1483                                 &queue->position, QUEUE_SET);
1484                 }
1485         }
1486
1487         queue->opened = false;
1488
1489         pmu_queue_unlock(pmu, queue);
1490
1491         return 0;
1492 }
1493
1494 static void gk20a_save_pmu_sw_state(struct pmu_gk20a *pmu,
1495                         struct gk20a_pmu_save_state *save)
1496 {
1497         save->seq = pmu->seq;
1498         save->next_seq_desc = pmu->next_seq_desc;
1499         save->mutex = pmu->mutex;
1500         save->mutex_cnt = pmu->mutex_cnt;
1501         save->desc = pmu->desc;
1502         save->ucode = pmu->ucode;
1503         save->elpg_enable = pmu->elpg_enable;
1504         save->pg_wq = pmu->pg_wq;
1505         save->seq_buf = pmu->seq_buf;
1506         save->pg_buf = pmu->pg_buf;
1507         save->sw_ready = pmu->sw_ready;
1508         save->pg_init = pmu->pg_init;
1509 }
1510
1511 static void gk20a_restore_pmu_sw_state(struct pmu_gk20a *pmu,
1512                         struct gk20a_pmu_save_state *save)
1513 {
1514         pmu->seq = save->seq;
1515         pmu->next_seq_desc = save->next_seq_desc;
1516         pmu->mutex = save->mutex;
1517         pmu->mutex_cnt = save->mutex_cnt;
1518         pmu->desc = save->desc;
1519         pmu->ucode = save->ucode;
1520         pmu->elpg_enable = save->elpg_enable;
1521         pmu->pg_wq = save->pg_wq;
1522         pmu->seq_buf = save->seq_buf;
1523         pmu->pg_buf = save->pg_buf;
1524         pmu->sw_ready = save->sw_ready;
1525         pmu->pg_init = save->pg_init;
1526 }
1527
1528 void gk20a_remove_pmu_support(struct pmu_gk20a *pmu)
1529 {
1530         struct gk20a_pmu_save_state save;
1531
1532         gk20a_dbg_fn("");
1533
1534         gk20a_allocator_destroy(&pmu->dmem);
1535
1536         /* Save the stuff you don't want to lose */
1537         gk20a_save_pmu_sw_state(pmu, &save);
1538
1539         /* this function is also called by pmu_destory outside gk20a deinit that
1540            releases gk20a struct so fill up with zeros here. */
1541         memset(pmu, 0, sizeof(struct pmu_gk20a));
1542
1543         /* Restore stuff you want to keep */
1544         gk20a_restore_pmu_sw_state(pmu, &save);
1545 }
1546
1547 int gk20a_init_pmu_reset_enable_hw(struct gk20a *g)
1548 {
1549         struct pmu_gk20a *pmu = &g->pmu;
1550
1551         gk20a_dbg_fn("");
1552
1553         pmu_enable_hw(pmu, true);
1554
1555         return 0;
1556 }
1557
1558 static void pmu_elpg_enable_allow(struct work_struct *work);
1559
1560 int gk20a_init_pmu_setup_sw(struct gk20a *g)
1561 {
1562         struct pmu_gk20a *pmu = &g->pmu;
1563         struct mm_gk20a *mm = &g->mm;
1564         struct vm_gk20a *vm = &mm->pmu.vm;
1565         struct device *d = dev_from_gk20a(g);
1566         int i, err = 0;
1567         u8 *ptr;
1568         void *ucode_ptr;
1569         struct sg_table *sgt_pmu_ucode;
1570         struct sg_table *sgt_seq_buf;
1571         DEFINE_DMA_ATTRS(attrs);
1572         dma_addr_t iova;
1573
1574         gk20a_dbg_fn("");
1575
1576         if (pmu->sw_ready) {
1577                 for (i = 0; i < pmu->mutex_cnt; i++) {
1578                         pmu->mutex[i].id    = i;
1579                         pmu->mutex[i].index = i;
1580                 }
1581                 pmu_seq_init(pmu);
1582
1583                 gk20a_dbg_fn("skip init");
1584                 goto skip_init;
1585         }
1586
1587         /* no infoRom script from vbios? */
1588
1589         /* TBD: sysmon subtask */
1590
1591         pmu->mutex_cnt = pwr_pmu_mutex__size_1_v();
1592         pmu->mutex = kzalloc(pmu->mutex_cnt *
1593                 sizeof(struct pmu_mutex), GFP_KERNEL);
1594         if (!pmu->mutex) {
1595                 err = -ENOMEM;
1596                 goto err;
1597         }
1598
1599         for (i = 0; i < pmu->mutex_cnt; i++) {
1600                 pmu->mutex[i].id    = i;
1601                 pmu->mutex[i].index = i;
1602         }
1603
1604         pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES *
1605                 sizeof(struct pmu_sequence), GFP_KERNEL);
1606         if (!pmu->seq) {
1607                 err = -ENOMEM;
1608                 goto err_free_mutex;
1609         }
1610
1611         pmu_seq_init(pmu);
1612
1613         if (!g->pmu_fw) {
1614                 g->pmu_fw = gk20a_request_firmware(g, GK20A_PMU_UCODE_IMAGE);
1615                 if (!g->pmu_fw) {
1616                         gk20a_err(d, "failed to load pmu ucode!!");
1617                         err = -ENOENT;
1618                         goto err_free_seq;
1619                 }
1620         }
1621
1622         gk20a_dbg_fn("firmware loaded");
1623
1624         pmu->desc = (struct pmu_ucode_desc *)g->pmu_fw->data;
1625         pmu->ucode_image = (u32 *)((u8 *)pmu->desc +
1626                         pmu->desc->descriptor_size);
1627
1628
1629         INIT_DELAYED_WORK(&pmu->elpg_enable, pmu_elpg_enable_allow);
1630         INIT_WORK(&pmu->pg_init, gk20a_init_pmu_setup_hw2_workqueue);
1631
1632         gk20a_init_pmu_vm(mm);
1633
1634         dma_set_attr(DMA_ATTR_READ_ONLY, &attrs);
1635         pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
1636                                         &iova,
1637                                         GFP_KERNEL,
1638                                         &attrs);
1639         if (!pmu->ucode.cpuva) {
1640                 gk20a_err(d, "failed to allocate memory\n");
1641                 err = -ENOMEM;
1642                 goto err_release_fw;
1643         }
1644
1645         pmu->ucode.iova = iova;
1646         pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
1647                                         &iova,
1648                                         GFP_KERNEL);
1649         if (!pmu->seq_buf.cpuva) {
1650                 gk20a_err(d, "failed to allocate memory\n");
1651                 err = -ENOMEM;
1652                 goto err_free_pmu_ucode;
1653         }
1654
1655         pmu->seq_buf.iova = iova;
1656         init_waitqueue_head(&pmu->pg_wq);
1657
1658         err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
1659                                 pmu->ucode.cpuva,
1660                                 pmu->ucode.iova,
1661                                 GK20A_PMU_UCODE_SIZE_MAX);
1662         if (err) {
1663                 gk20a_err(d, "failed to allocate sg table\n");
1664                 goto err_free_seq_buf;
1665         }
1666
1667         pmu->ucode.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode,
1668                                         GK20A_PMU_UCODE_SIZE_MAX,
1669                                         0, /* flags */
1670                                         gk20a_mem_flag_read_only);
1671         if (!pmu->ucode.pmu_va) {
1672                 gk20a_err(d, "failed to map pmu ucode memory!!");
1673                 goto err_free_ucode_sgt;
1674         }
1675
1676         err = gk20a_get_sgtable(d, &sgt_seq_buf,
1677                                 pmu->seq_buf.cpuva,
1678                                 pmu->seq_buf.iova,
1679                                 GK20A_PMU_SEQ_BUF_SIZE);
1680         if (err) {
1681                 gk20a_err(d, "failed to allocate sg table\n");
1682                 goto err_unmap_ucode;
1683         }
1684
1685         pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf,
1686                                         GK20A_PMU_SEQ_BUF_SIZE,
1687                                         0, /* flags */
1688                                         gk20a_mem_flag_none);
1689         if (!pmu->seq_buf.pmu_va) {
1690                 gk20a_err(d, "failed to map pmu ucode memory!!");
1691                 goto err_free_seq_buf_sgt;
1692         }
1693
1694         ptr = (u8 *)pmu->seq_buf.cpuva;
1695         if (!ptr) {
1696                 gk20a_err(d, "failed to map cpu ptr for zbc buffer");
1697                 goto err_unmap_seq_buf;
1698         }
1699
1700         /* TBD: remove this if ZBC save/restore is handled by PMU
1701          * end an empty ZBC sequence for now */
1702         ptr[0] = 0x16; /* opcode EXIT */
1703         ptr[1] = 0; ptr[2] = 1; ptr[3] = 0;
1704         ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0;
1705
1706         pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;
1707
1708         ucode_ptr = pmu->ucode.cpuva;
1709
1710         for (i = 0; i < (pmu->desc->app_start_offset +
1711                         pmu->desc->app_size) >> 2; i++)
1712                 gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]);
1713
1714         gk20a_free_sgtable(&sgt_pmu_ucode);
1715         gk20a_free_sgtable(&sgt_seq_buf);
1716
1717 skip_init:
1718         mutex_init(&pmu->elpg_mutex);
1719         mutex_init(&pmu->isr_mutex);
1720         mutex_init(&pmu->pmu_copy_lock);
1721         mutex_init(&pmu->pmu_seq_lock);
1722
1723         pmu->perfmon_counter.index = 3; /* GR & CE2 */
1724         pmu->perfmon_counter.group_id = PMU_DOMAIN_GROUP_PSTATE;
1725
1726         pmu->remove_support = gk20a_remove_pmu_support;
1727         err = gk20a_init_pmu(pmu);
1728         if (err) {
1729                 gk20a_err(d, "failed to set function pointers\n");
1730                 return err;
1731         }
1732
1733         gk20a_dbg_fn("done");
1734         return 0;
1735
1736  err_unmap_seq_buf:
1737         gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va,
1738                 GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none);
1739  err_free_seq_buf_sgt:
1740         gk20a_free_sgtable(&sgt_seq_buf);
1741  err_unmap_ucode:
1742         gk20a_gmmu_unmap(vm, pmu->ucode.pmu_va,
1743                 GK20A_PMU_UCODE_SIZE_MAX, gk20a_mem_flag_none);
1744  err_free_ucode_sgt:
1745         gk20a_free_sgtable(&sgt_pmu_ucode);
1746  err_free_seq_buf:
1747         dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
1748                 pmu->seq_buf.cpuva, pmu->seq_buf.iova);
1749         pmu->seq_buf.cpuva = NULL;
1750         pmu->seq_buf.iova = 0;
1751  err_free_pmu_ucode:
1752         dma_free_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
1753                 pmu->ucode.cpuva, pmu->ucode.iova, &attrs);
1754         pmu->ucode.cpuva = NULL;
1755         pmu->ucode.iova = 0;
1756  err_release_fw:
1757         release_firmware(g->pmu_fw);
1758  err_free_seq:
1759         kfree(pmu->seq);
1760  err_free_mutex:
1761         kfree(pmu->mutex);
1762  err:
1763         gk20a_dbg_fn("fail");
1764         return err;
1765 }
1766
1767 static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
1768                         void *param, u32 handle, u32 status);
1769
1770 static void pmu_handle_pg_buf_config_msg(struct gk20a *g, struct pmu_msg *msg,
1771                         void *param, u32 handle, u32 status)
1772 {
1773         struct pmu_gk20a *pmu = param;
1774         struct pmu_pg_msg_eng_buf_stat *eng_buf_stat = &msg->msg.pg.eng_buf_stat;
1775
1776         gk20a_dbg_fn("");
1777
1778         if (status != 0) {
1779                 gk20a_err(dev_from_gk20a(g), "PGENG cmd aborted");
1780                 /* TBD: disable ELPG */
1781                 return;
1782         }
1783
1784         if (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_FAILED) {
1785                 gk20a_err(dev_from_gk20a(g), "failed to load PGENG buffer");
1786         }
1787
1788         pmu->buf_loaded = (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_LOADED);
1789         wake_up(&pmu->pg_wq);
1790 }
1791
1792 int gk20a_init_pmu_setup_hw1(struct gk20a *g)
1793 {
1794         struct pmu_gk20a *pmu = &g->pmu;
1795         int err;
1796
1797         gk20a_dbg_fn("");
1798
1799         pmu_reset(pmu);
1800
1801         /* setup apertures - virtual */
1802         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
1803                 pwr_fbif_transcfg_mem_type_virtual_f());
1804         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
1805                 pwr_fbif_transcfg_mem_type_virtual_f());
1806         /* setup apertures - physical */
1807         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
1808                 pwr_fbif_transcfg_mem_type_physical_f() |
1809                 pwr_fbif_transcfg_target_local_fb_f());
1810         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
1811                 pwr_fbif_transcfg_mem_type_physical_f() |
1812                 pwr_fbif_transcfg_target_coherent_sysmem_f());
1813         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
1814                 pwr_fbif_transcfg_mem_type_physical_f() |
1815                 pwr_fbif_transcfg_target_noncoherent_sysmem_f());
1816
1817         /* TBD: load pmu ucode */
1818         err = pmu_bootstrap(pmu);
1819         if (err)
1820                 return err;
1821
1822         return 0;
1823
1824 }
1825
1826 static int gk20a_aelpg_init(struct gk20a *g);
1827 static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id);
1828
1829
1830 static void gk20a_init_pmu_setup_hw2_workqueue(struct work_struct *work)
1831 {
1832         struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init);
1833         struct gk20a *g = pmu->g;
1834         gk20a_init_pmu_setup_hw2(g);
1835 }
1836
1837 int gk20a_init_pmu_setup_hw2(struct gk20a *g)
1838 {
1839         struct pmu_gk20a *pmu = &g->pmu;
1840         struct mm_gk20a *mm = &g->mm;
1841         struct vm_gk20a *vm = &mm->pmu.vm;
1842         struct device *d = dev_from_gk20a(g);
1843         struct pmu_cmd cmd;
1844         u32 desc;
1845         long remain;
1846         int err;
1847         bool status;
1848         u32 size;
1849         struct sg_table *sgt_pg_buf;
1850         dma_addr_t iova;
1851
1852         gk20a_dbg_fn("");
1853
1854         if (!support_gk20a_pmu())
1855                 return 0;
1856
1857         size = 0;
1858         err = gr_gk20a_fecs_get_reglist_img_size(g, &size);
1859         if (err) {
1860                 gk20a_err(dev_from_gk20a(g),
1861                         "fail to query fecs pg buffer size");
1862                 return err;
1863         }
1864
1865         if (!pmu->sw_ready) {
1866                 pmu->pg_buf.cpuva = dma_alloc_coherent(d, size,
1867                                                 &iova,
1868                                                 GFP_KERNEL);
1869                 if (!pmu->pg_buf.cpuva) {
1870                         gk20a_err(d, "failed to allocate memory\n");
1871                         err = -ENOMEM;
1872                         goto err;
1873                 }
1874
1875                 pmu->pg_buf.iova = iova;
1876                 pmu->pg_buf.size = size;
1877
1878                 err = gk20a_get_sgtable(d, &sgt_pg_buf,
1879                                         pmu->pg_buf.cpuva,
1880                                         pmu->pg_buf.iova,
1881                                         size);
1882                 if (err) {
1883                         gk20a_err(d, "failed to create sg table\n");
1884                         goto err_free_pg_buf;
1885                 }
1886
1887                 pmu->pg_buf.pmu_va = gk20a_gmmu_map(vm,
1888                                         &sgt_pg_buf,
1889                                         size,
1890                                         0, /* flags */
1891                                         gk20a_mem_flag_none);
1892                 if (!pmu->pg_buf.pmu_va) {
1893                         gk20a_err(d, "failed to map fecs pg buffer");
1894                         err = -ENOMEM;
1895                         goto err_free_sgtable;
1896                 }
1897
1898                 gk20a_free_sgtable(&sgt_pg_buf);
1899         }
1900
1901         /*
1902          * This is the actual point at which sw setup is complete, so set the
1903          * sw_ready flag here.
1904          */
1905         pmu->sw_ready = true;
1906
1907         /* TBD: acquire pmu hw mutex */
1908
1909         /* TBD: post reset again? */
1910
1911         /* PMU_INIT message handler will send PG_INIT */
1912         remain = wait_event_timeout(
1913                         pmu->pg_wq,
1914                         (status = (pmu->elpg_ready &&
1915                                 pmu->stat_dmem_offset != 0 &&
1916                                 pmu->elpg_stat == PMU_ELPG_STAT_OFF)),
1917                         msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)));
1918         if (status == 0) {
1919                 gk20a_err(dev_from_gk20a(g),
1920                         "PG_INIT_ACK failed, remaining timeout : 0x%lx", remain);
1921                 pmu_dump_falcon_stats(pmu);
1922                 return -EBUSY;
1923         }
1924
1925         err = gr_gk20a_fecs_set_reglist_bind_inst(g, mm->pmu.inst_block.cpu_pa);
1926         if (err) {
1927                 gk20a_err(dev_from_gk20a(g),
1928                         "fail to bind pmu inst to gr");
1929                 return err;
1930         }
1931
1932         err = gr_gk20a_fecs_set_reglist_virual_addr(g, pmu->pg_buf.pmu_va);
1933         if (err) {
1934                 gk20a_err(dev_from_gk20a(g),
1935                         "fail to set pg buffer pmu va");
1936                 return err;
1937         }
1938
1939         memset(&cmd, 0, sizeof(struct pmu_cmd));
1940         cmd.hdr.unit_id = PMU_UNIT_PG;
1941         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
1942         cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
1943         cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
1944         cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_FECS;
1945         cmd.cmd.pg.eng_buf_load.buf_size = pmu->pg_buf.size;
1946         cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.pmu_va >> 8);
1947         cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.pmu_va & 0xFF);
1948         cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
1949
1950         pmu->buf_loaded = false;
1951         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
1952                         pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
1953
1954         remain = wait_event_timeout(
1955                         pmu->pg_wq,
1956                         pmu->buf_loaded,
1957                         msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)));
1958         if (!pmu->buf_loaded) {
1959                 gk20a_err(dev_from_gk20a(g),
1960                         "PGENG FECS buffer load failed, remaining timeout : 0x%lx",
1961                         remain);
1962                 return -EBUSY;
1963         }
1964
1965         memset(&cmd, 0, sizeof(struct pmu_cmd));
1966         cmd.hdr.unit_id = PMU_UNIT_PG;
1967         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
1968         cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
1969         cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
1970         cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_ZBC;
1971         cmd.cmd.pg.eng_buf_load.buf_size = pmu->seq_buf.size;
1972         cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.pmu_va >> 8);
1973         cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.pmu_va & 0xFF);
1974         cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
1975
1976         pmu->buf_loaded = false;
1977         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
1978                         pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
1979
1980         remain = wait_event_timeout(
1981                         pmu->pg_wq,
1982                         pmu->buf_loaded,
1983                         msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)));
1984         if (!pmu->buf_loaded) {
1985                 gk20a_err(dev_from_gk20a(g),
1986                         "PGENG ZBC buffer load failed, remaining timeout 0x%lx",
1987                         remain);
1988                 return -EBUSY;
1989         }
1990
1991         /*
1992          * FIXME: To enable ELPG, we increase the PMU ext2priv timeout unit to
1993          * 7. This prevents PMU stalling on Host register accesses. Once the
1994          * cause for this hang is discovered and fixed, this WAR should be
1995          * removed.
1996          */
1997         gk20a_writel(g, 0x10a164, 0x109ff);
1998
1999         pmu->initialized = true;
2000
2001         /*
2002          * We can't guarantee that gr code to enable ELPG will be
2003          * invoked, so we explicitly call disable-enable here
2004          * to enable elpg.
2005          */
2006         gk20a_pmu_disable_elpg(g);
2007
2008         pmu->zbc_ready = true;
2009         /* Save zbc table after PMU is initialized. */
2010         pmu_save_zbc(g, 0xf);
2011
2012         if (g->elpg_enabled)
2013                 gk20a_pmu_enable_elpg(g);
2014
2015         udelay(50);
2016
2017         /* Enable AELPG */
2018         if (g->aelpg_enabled) {
2019                 gk20a_aelpg_init(g);
2020                 gk20a_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS);
2021         }
2022
2023         return 0;
2024
2025  err_free_sgtable:
2026         gk20a_free_sgtable(&sgt_pg_buf);
2027  err_free_pg_buf:
2028         dma_free_coherent(d, size,
2029                 pmu->pg_buf.cpuva, pmu->pg_buf.iova);
2030         pmu->pg_buf.cpuva = NULL;
2031         pmu->pg_buf.iova = 0;
2032  err:
2033         return err;
2034 }
2035
2036 int gk20a_init_pmu_support(struct gk20a *g)
2037 {
2038         struct pmu_gk20a *pmu = &g->pmu;
2039         u32 err;
2040
2041         gk20a_dbg_fn("");
2042
2043         if (pmu->initialized)
2044                 return 0;
2045
2046         pmu->g = g;
2047
2048         err = gk20a_init_pmu_reset_enable_hw(g);
2049         if (err)
2050                 return err;
2051
2052         if (support_gk20a_pmu()) {
2053                 err = gk20a_init_pmu_setup_sw(g);
2054                 if (err)
2055                         return err;
2056
2057                 err = gk20a_init_pmu_setup_hw1(g);
2058                 if (err)
2059                         return err;
2060         }
2061
2062         return err;
2063 }
2064
2065 static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
2066                         void *param, u32 handle, u32 status)
2067 {
2068         struct pmu_gk20a *pmu = param;
2069         struct pmu_pg_msg_elpg_msg *elpg_msg = &msg->msg.pg.elpg_msg;
2070
2071         gk20a_dbg_fn("");
2072
2073         if (status != 0) {
2074                 gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
2075                 /* TBD: disable ELPG */
2076                 return;
2077         }
2078
2079         switch (elpg_msg->msg) {
2080         case PMU_PG_ELPG_MSG_INIT_ACK:
2081                 gk20a_dbg_pmu("INIT_PG is acknowledged from PMU");
2082                 pmu->elpg_ready = true;
2083                 wake_up(&pmu->pg_wq);
2084                 break;
2085         case PMU_PG_ELPG_MSG_ALLOW_ACK:
2086                 gk20a_dbg_pmu("ALLOW is acknowledged from PMU");
2087                 pmu->elpg_stat = PMU_ELPG_STAT_ON;
2088                 wake_up(&pmu->pg_wq);
2089                 break;
2090         case PMU_PG_ELPG_MSG_DISALLOW_ACK:
2091                 gk20a_dbg_pmu("DISALLOW is acknowledged from PMU");
2092                 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
2093                 wake_up(&pmu->pg_wq);
2094                 break;
2095         default:
2096                 gk20a_err(dev_from_gk20a(g),
2097                         "unsupported ELPG message : 0x%04x", elpg_msg->msg);
2098         }
2099
2100         return;
2101 }
2102
2103 static void pmu_handle_pg_stat_msg(struct gk20a *g, struct pmu_msg *msg,
2104                         void *param, u32 handle, u32 status)
2105 {
2106         struct pmu_gk20a *pmu = param;
2107
2108         gk20a_dbg_fn("");
2109
2110         if (status != 0) {
2111                 gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
2112                 /* TBD: disable ELPG */
2113                 return;
2114         }
2115
2116         switch (msg->msg.pg.stat.sub_msg_id) {
2117         case PMU_PG_STAT_MSG_RESP_DMEM_OFFSET:
2118                 gk20a_dbg_pmu("ALLOC_DMEM_OFFSET is acknowledged from PMU");
2119                 pmu->stat_dmem_offset = msg->msg.pg.stat.data;
2120                 wake_up(&pmu->pg_wq);
2121                 break;
2122         default:
2123                 break;
2124         }
2125 }
2126
2127 static int pmu_init_powergating(struct pmu_gk20a *pmu)
2128 {
2129         struct gk20a *g = pmu->g;
2130         struct pmu_cmd cmd;
2131         u32 seq;
2132
2133         gk20a_dbg_fn("");
2134
2135         if (tegra_cpu_is_asim()) {
2136                 /* TBD: calculate threshold for silicon */
2137                 gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
2138                                 PMU_PG_IDLE_THRESHOLD_SIM);
2139                 gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
2140                                 PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM);
2141         } else {
2142                 /* TBD: calculate threshold for silicon */
2143                 gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
2144                                 PMU_PG_IDLE_THRESHOLD);
2145                 gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
2146                                 PMU_PG_POST_POWERUP_IDLE_THRESHOLD);
2147         }
2148
2149         /* init ELPG */
2150         memset(&cmd, 0, sizeof(struct pmu_cmd));
2151         cmd.hdr.unit_id = PMU_UNIT_PG;
2152         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
2153         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
2154         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
2155         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_INIT;
2156
2157         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2158                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
2159
2160         /* alloc dmem for powergating state log */
2161         pmu->stat_dmem_offset = 0;
2162         memset(&cmd, 0, sizeof(struct pmu_cmd));
2163         cmd.hdr.unit_id = PMU_UNIT_PG;
2164         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_stat);
2165         cmd.cmd.pg.stat.cmd_type = PMU_PG_CMD_ID_PG_STAT;
2166         cmd.cmd.pg.stat.engine_id = ENGINE_GR_GK20A;
2167         cmd.cmd.pg.stat.sub_cmd_id = PMU_PG_STAT_CMD_ALLOC_DMEM;
2168         cmd.cmd.pg.stat.data = 0;
2169
2170         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
2171                         pmu_handle_pg_stat_msg, pmu, &seq, ~0);
2172
2173         /* disallow ELPG initially
2174            PMU ucode requires a disallow cmd before allow cmd */
2175         pmu->elpg_stat = PMU_ELPG_STAT_ON; /* set for wait_event PMU_ELPG_STAT_OFF */
2176         memset(&cmd, 0, sizeof(struct pmu_cmd));
2177         cmd.hdr.unit_id = PMU_UNIT_PG;
2178         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
2179         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
2180         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
2181         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
2182
2183         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2184                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
2185
2186         /* start with elpg disabled until first enable call */
2187         pmu->elpg_refcnt = 1;
2188
2189         return 0;
2190 }
2191
2192 static int pmu_init_perfmon(struct pmu_gk20a *pmu)
2193 {
2194         struct gk20a *g = pmu->g;
2195         struct pmu_v *pv = &g->ops.pmu_ver;
2196         struct pmu_cmd cmd;
2197         struct pmu_payload payload;
2198         u32 seq;
2199         u32 data;
2200         int err;
2201
2202         gk20a_dbg_fn("");
2203
2204         pmu->perfmon_ready = 0;
2205
2206         /* use counter #3 for GR && CE2 busy cycles */
2207         gk20a_writel(g, pwr_pmu_idle_mask_r(3),
2208                 pwr_pmu_idle_mask_gr_enabled_f() |
2209                 pwr_pmu_idle_mask_ce_2_enabled_f());
2210
2211         /* disable idle filtering for counters 3 and 6 */
2212         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3));
2213         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2214                         pwr_pmu_idle_ctrl_filter_m(),
2215                         pwr_pmu_idle_ctrl_value_busy_f() |
2216                         pwr_pmu_idle_ctrl_filter_disabled_f());
2217         gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data);
2218
2219         /* use counter #6 for total cycles */
2220         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6));
2221         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2222                         pwr_pmu_idle_ctrl_filter_m(),
2223                         pwr_pmu_idle_ctrl_value_always_f() |
2224                         pwr_pmu_idle_ctrl_filter_disabled_f());
2225         gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data);
2226
2227         /*
2228          * We don't want to disturb counters #3 and #6, which are used by
2229          * perfmon, so we add wiring also to counters #1 and #2 for
2230          * exposing raw counter readings.
2231          */
2232         gk20a_writel(g, pwr_pmu_idle_mask_r(1),
2233                 pwr_pmu_idle_mask_gr_enabled_f() |
2234                 pwr_pmu_idle_mask_ce_2_enabled_f());
2235
2236         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1));
2237         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2238                         pwr_pmu_idle_ctrl_filter_m(),
2239                         pwr_pmu_idle_ctrl_value_busy_f() |
2240                         pwr_pmu_idle_ctrl_filter_disabled_f());
2241         gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data);
2242
2243         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2));
2244         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2245                         pwr_pmu_idle_ctrl_filter_m(),
2246                         pwr_pmu_idle_ctrl_value_always_f() |
2247                         pwr_pmu_idle_ctrl_filter_disabled_f());
2248         gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
2249
2250         pmu->sample_buffer = 0;
2251         err = pmu->dmem.alloc(&pmu->dmem, &pmu->sample_buffer, 2 * sizeof(u16));
2252         if (err) {
2253                 gk20a_err(dev_from_gk20a(g),
2254                         "failed to allocate perfmon sample buffer");
2255                 return -ENOMEM;
2256         }
2257
2258         /* init PERFMON */
2259         memset(&cmd, 0, sizeof(struct pmu_cmd));
2260         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2261         cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size();
2262         cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT;
2263         /* buffer to save counter values for pmu perfmon */
2264         pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon,
2265         (u16)pmu->sample_buffer);
2266         /* number of sample periods below lower threshold
2267            before pmu triggers perfmon decrease event
2268            TBD: = 15 */
2269         pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15);
2270         /* index of base counter, aka. always ticking counter */
2271         pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6);
2272         /* microseconds interval between pmu polls perf counters */
2273         pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700);
2274         /* number of perfmon counters
2275            counter #3 (GR and CE2) for gk20a */
2276         pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1);
2277         /* moving average window for sample periods
2278            TBD: = 3000000 / sample_period_us = 17 */
2279         pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17);
2280
2281         memset(&payload, 0, sizeof(struct pmu_payload));
2282         payload.in.buf = &pmu->perfmon_counter;
2283         payload.in.size = sizeof(struct pmu_perfmon_counter);
2284         payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC);
2285
2286         gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
2287                         NULL, NULL, &seq, ~0);
2288
2289         return 0;
2290 }
2291
2292 static int pmu_process_init_msg(struct pmu_gk20a *pmu,
2293                         struct pmu_msg *msg)
2294 {
2295         struct gk20a *g = pmu->g;
2296         struct pmu_v *pv = &g->ops.pmu_ver;
2297         union pmu_init_msg_pmu *init;
2298         struct pmu_sha1_gid_data gid_data;
2299         u32 i, tail = 0;
2300
2301         tail = pwr_pmu_msgq_tail_val_v(
2302                 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
2303
2304         pmu_copy_from_dmem(pmu, tail,
2305                 (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0);
2306
2307         if (msg->hdr.unit_id != PMU_UNIT_INIT) {
2308                 gk20a_err(dev_from_gk20a(g),
2309                         "expecting init msg");
2310                 return -EINVAL;
2311         }
2312
2313         pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE,
2314                 (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0);
2315
2316         if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) {
2317                 gk20a_err(dev_from_gk20a(g),
2318                         "expecting init msg");
2319                 return -EINVAL;
2320         }
2321
2322         tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT);
2323         gk20a_writel(g, pwr_pmu_msgq_tail_r(),
2324                 pwr_pmu_msgq_tail_val_f(tail));
2325
2326         init = pv->get_pmu_msg_pmu_init_msg_ptr(&(msg->msg.init));
2327         if (!pmu->gid_info.valid) {
2328
2329                 pmu_copy_from_dmem(pmu,
2330                         pv->get_pmu_init_msg_pmu_sw_mg_off(init),
2331                         (u8 *)&gid_data,
2332                         sizeof(struct pmu_sha1_gid_data), 0);
2333
2334                 pmu->gid_info.valid =
2335                         (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE);
2336
2337                 if (pmu->gid_info.valid) {
2338
2339                         BUG_ON(sizeof(pmu->gid_info.gid) !=
2340                                 sizeof(gid_data.gid));
2341
2342                         memcpy(pmu->gid_info.gid, gid_data.gid,
2343                                 sizeof(pmu->gid_info.gid));
2344                 }
2345         }
2346
2347         for (i = 0; i < PMU_QUEUE_COUNT; i++)
2348                 pmu_queue_init(pmu, i, init);
2349
2350         gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem",
2351                         pv->get_pmu_init_msg_pmu_sw_mg_off(init),
2352                         pv->get_pmu_init_msg_pmu_sw_mg_size(init),
2353                         PMU_DMEM_ALLOC_ALIGNMENT);
2354
2355         pmu->pmu_ready = true;
2356
2357         return 0;
2358 }
2359
2360 static bool pmu_read_message(struct pmu_gk20a *pmu, struct pmu_queue *queue,
2361                         struct pmu_msg *msg, int *status)
2362 {
2363         struct gk20a *g = pmu->g;
2364         u32 read_size, bytes_read;
2365         int err;
2366
2367         *status = 0;
2368
2369         if (pmu_queue_is_empty(pmu, queue))
2370                 return false;
2371
2372         err = pmu_queue_open_read(pmu, queue);
2373         if (err) {
2374                 gk20a_err(dev_from_gk20a(g),
2375                         "fail to open queue %d for read", queue->id);
2376                 *status = err;
2377                 return false;
2378         }
2379
2380         err = pmu_queue_pop(pmu, queue, &msg->hdr,
2381                         PMU_MSG_HDR_SIZE, &bytes_read);
2382         if (err || bytes_read != PMU_MSG_HDR_SIZE) {
2383                 gk20a_err(dev_from_gk20a(g),
2384                         "fail to read msg from queue %d", queue->id);
2385                 *status = err | -EINVAL;
2386                 goto clean_up;
2387         }
2388
2389         if (msg->hdr.unit_id == PMU_UNIT_REWIND) {
2390                 pmu_queue_rewind(pmu, queue);
2391                 /* read again after rewind */
2392                 err = pmu_queue_pop(pmu, queue, &msg->hdr,
2393                                 PMU_MSG_HDR_SIZE, &bytes_read);
2394                 if (err || bytes_read != PMU_MSG_HDR_SIZE) {
2395                         gk20a_err(dev_from_gk20a(g),
2396                                 "fail to read msg from queue %d", queue->id);
2397                         *status = err | -EINVAL;
2398                         goto clean_up;
2399                 }
2400         }
2401
2402         if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) {
2403                 gk20a_err(dev_from_gk20a(g),
2404                         "read invalid unit_id %d from queue %d",
2405                         msg->hdr.unit_id, queue->id);
2406                         *status = -EINVAL;
2407                         goto clean_up;
2408         }
2409
2410         if (msg->hdr.size > PMU_MSG_HDR_SIZE) {
2411                 read_size = msg->hdr.size - PMU_MSG_HDR_SIZE;
2412                 err = pmu_queue_pop(pmu, queue, &msg->msg,
2413                         read_size, &bytes_read);
2414                 if (err || bytes_read != read_size) {
2415                         gk20a_err(dev_from_gk20a(g),
2416                                 "fail to read msg from queue %d", queue->id);
2417                         *status = err;
2418                         goto clean_up;
2419                 }
2420         }
2421
2422         err = pmu_queue_close(pmu, queue, true);
2423         if (err) {
2424                 gk20a_err(dev_from_gk20a(g),
2425                         "fail to close queue %d", queue->id);
2426                 *status = err;
2427                 return false;
2428         }
2429
2430         return true;
2431
2432 clean_up:
2433         err = pmu_queue_close(pmu, queue, false);
2434         if (err)
2435                 gk20a_err(dev_from_gk20a(g),
2436                         "fail to close queue %d", queue->id);
2437         return false;
2438 }
2439
2440 static int pmu_response_handle(struct pmu_gk20a *pmu,
2441                         struct pmu_msg *msg)
2442 {
2443         struct gk20a *g = pmu->g;
2444         struct pmu_sequence *seq;
2445         struct pmu_v *pv = &g->ops.pmu_ver;
2446         int ret = 0;
2447
2448         gk20a_dbg_fn("");
2449
2450         seq = &pmu->seq[msg->hdr.seq_id];
2451         if (seq->state != PMU_SEQ_STATE_USED &&
2452             seq->state != PMU_SEQ_STATE_CANCELLED) {
2453                 gk20a_err(dev_from_gk20a(g),
2454                         "msg for an unknown sequence %d", seq->id);
2455                 return -EINVAL;
2456         }
2457
2458         if (msg->hdr.unit_id == PMU_UNIT_RC &&
2459             msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) {
2460                 gk20a_err(dev_from_gk20a(g),
2461                         "unhandled cmd: seq %d", seq->id);
2462         }
2463         else if (seq->state != PMU_SEQ_STATE_CANCELLED) {
2464                 if (seq->msg) {
2465                         if (seq->msg->hdr.size >= msg->hdr.size) {
2466                                 memcpy(seq->msg, msg, msg->hdr.size);
2467                                 if (pv->pmu_allocation_get_dmem_size(pmu,
2468                                 pv->get_pmu_seq_out_a_ptr(seq)) != 0) {
2469                                         pmu_copy_from_dmem(pmu,
2470                                         pv->pmu_allocation_get_dmem_offset(pmu,
2471                                         pv->get_pmu_seq_out_a_ptr(seq)),
2472                                         seq->out_payload,
2473                                         pv->pmu_allocation_get_dmem_size(pmu,
2474                                         pv->get_pmu_seq_out_a_ptr(seq)), 0);
2475                                 }
2476                         } else {
2477                                 gk20a_err(dev_from_gk20a(g),
2478                                         "sequence %d msg buffer too small",
2479                                         seq->id);
2480                         }
2481                 }
2482         } else
2483                 seq->callback = NULL;
2484         if (pv->pmu_allocation_get_dmem_size(pmu,
2485                         pv->get_pmu_seq_in_a_ptr(seq)) != 0)
2486                 pmu->dmem.free(&pmu->dmem,
2487                 pv->pmu_allocation_get_dmem_offset(pmu,
2488                 pv->get_pmu_seq_in_a_ptr(seq)),
2489                 pv->pmu_allocation_get_dmem_size(pmu,
2490                 pv->get_pmu_seq_in_a_ptr(seq)));
2491         if (pv->pmu_allocation_get_dmem_size(pmu,
2492                         pv->get_pmu_seq_out_a_ptr(seq)) != 0)
2493                 pmu->dmem.free(&pmu->dmem,
2494                 pv->pmu_allocation_get_dmem_offset(pmu,
2495                 pv->get_pmu_seq_out_a_ptr(seq)),
2496                 pv->pmu_allocation_get_dmem_size(pmu,
2497                 pv->get_pmu_seq_out_a_ptr(seq)));
2498
2499         if (seq->callback)
2500                 seq->callback(g, msg, seq->cb_params, seq->desc, ret);
2501
2502         pmu_seq_release(pmu, seq);
2503
2504         /* TBD: notify client waiting for available dmem */
2505
2506         gk20a_dbg_fn("done");
2507
2508         return 0;
2509 }
2510
2511 static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
2512                                  u32 *var, u32 val);
2513
2514 static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg,
2515                         void *param, u32 handle, u32 status)
2516 {
2517         struct pmu_gk20a *pmu = param;
2518         pmu->zbc_save_done = 1;
2519 }
2520
2521 static void pmu_save_zbc(struct gk20a *g, u32 entries)
2522 {
2523         struct pmu_gk20a *pmu = &g->pmu;
2524         struct pmu_cmd cmd;
2525         u32 seq;
2526
2527         if (!pmu->pmu_ready || !entries || !pmu->zbc_ready)
2528                 return;
2529
2530         memset(&cmd, 0, sizeof(struct pmu_cmd));
2531         cmd.hdr.unit_id = PMU_UNIT_PG;
2532         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_zbc_cmd);
2533         cmd.cmd.zbc.cmd_type = g->ops.pmu_ver.cmd_id_zbc_table_update;
2534         cmd.cmd.zbc.entry_mask = ZBC_MASK(entries);
2535
2536         pmu->zbc_save_done = 0;
2537
2538         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2539                            pmu_handle_zbc_msg, pmu, &seq, ~0);
2540         pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
2541                               &pmu->zbc_save_done, 1);
2542         if (!pmu->zbc_save_done)
2543                 gk20a_err(dev_from_gk20a(g), "ZBC save timeout");
2544 }
2545
2546 void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
2547 {
2548         if (g->pmu.zbc_ready)
2549                 pmu_save_zbc(g, entries);
2550 }
2551
2552 static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu)
2553 {
2554         struct gk20a *g = pmu->g;
2555         struct pmu_v *pv = &g->ops.pmu_ver;
2556         struct pmu_cmd cmd;
2557         struct pmu_payload payload;
2558         u32 current_rate = 0;
2559         u32 seq;
2560
2561         /* PERFMON Start */
2562         memset(&cmd, 0, sizeof(struct pmu_cmd));
2563         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2564         cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size();
2565         pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon,
2566                 PMU_PERFMON_CMD_ID_START);
2567         pv->perfmon_start_set_group_id(&cmd.cmd.perfmon,
2568                 PMU_DOMAIN_GROUP_PSTATE);
2569         pv->perfmon_start_set_state_id(&cmd.cmd.perfmon,
2570                 pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);
2571
2572         current_rate = rate_gpu_to_gpc2clk(gk20a_clk_get_rate(g));
2573         if (current_rate >= gpc_pll_params.max_freq)
2574                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2575                 PMU_PERFMON_FLAG_ENABLE_DECREASE);
2576         else if (current_rate <= gpc_pll_params.min_freq)
2577                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2578                 PMU_PERFMON_FLAG_ENABLE_INCREASE);
2579         else
2580                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2581                 PMU_PERFMON_FLAG_ENABLE_INCREASE |
2582                 PMU_PERFMON_FLAG_ENABLE_DECREASE);
2583
2584         pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2585                 pv->perfmon_start_get_flags(&cmd.cmd.perfmon) |
2586                 PMU_PERFMON_FLAG_CLEAR_PREV);
2587
2588         memset(&payload, 0, sizeof(struct pmu_payload));
2589
2590         /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */
2591         pmu->perfmon_counter.upper_threshold = 3000; /* 30% */
2592         /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */
2593         pmu->perfmon_counter.lower_threshold = 1000; /* 10% */
2594         pmu->perfmon_counter.valid = true;
2595
2596         payload.in.buf = &pmu->perfmon_counter;
2597         payload.in.size = sizeof(pmu->perfmon_counter);
2598         payload.in.offset =
2599                 pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC);
2600
2601         gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
2602                         NULL, NULL, &seq, ~0);
2603
2604         return 0;
2605 }
2606
2607 static int pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu)
2608 {
2609         struct gk20a *g = pmu->g;
2610         struct pmu_cmd cmd;
2611         u32 seq;
2612
2613         /* PERFMON Stop */
2614         memset(&cmd, 0, sizeof(struct pmu_cmd));
2615         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2616         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop);
2617         cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP;
2618
2619         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
2620                         NULL, NULL, &seq, ~0);
2621         return 0;
2622 }
2623
2624 static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu,
2625                         struct pmu_perfmon_msg *msg)
2626 {
2627         struct gk20a *g = pmu->g;
2628         u32 rate;
2629
2630         gk20a_dbg_fn("");
2631
2632         switch (msg->msg_type) {
2633         case PMU_PERFMON_MSG_ID_INCREASE_EVENT:
2634                 gk20a_dbg_pmu("perfmon increase event: "
2635                         "state_id %d, ground_id %d, pct %d",
2636                         msg->gen.state_id, msg->gen.group_id, msg->gen.data);
2637                 /* increase gk20a clock freq by 20% */
2638                 rate = gk20a_clk_get_rate(g);
2639                 gk20a_clk_set_rate(g, rate * 6 / 5);
2640                 break;
2641         case PMU_PERFMON_MSG_ID_DECREASE_EVENT:
2642                 gk20a_dbg_pmu("perfmon decrease event: "
2643                         "state_id %d, ground_id %d, pct %d",
2644                         msg->gen.state_id, msg->gen.group_id, msg->gen.data);
2645                 /* decrease gk20a clock freq by 10% */
2646                 rate = gk20a_clk_get_rate(g);
2647                 gk20a_clk_set_rate(g, (rate / 10) * 7);
2648                 break;
2649         case PMU_PERFMON_MSG_ID_INIT_EVENT:
2650                 pmu->perfmon_ready = 1;
2651                 gk20a_dbg_pmu("perfmon init event");
2652                 break;
2653         default:
2654                 break;
2655         }
2656
2657         /* restart sampling */
2658         if (IS_ENABLED(CONFIG_GK20A_PERFMON))
2659                 return pmu_perfmon_start_sampling(pmu);
2660         return 0;
2661 }
2662
2663
2664 static int pmu_handle_event(struct pmu_gk20a *pmu, struct pmu_msg *msg)
2665 {
2666         int err;
2667
2668         gk20a_dbg_fn("");
2669
2670         switch (msg->hdr.unit_id) {
2671         case PMU_UNIT_PERFMON:
2672                 err = pmu_handle_perfmon_event(pmu, &msg->msg.perfmon);
2673                 break;
2674         default:
2675                 break;
2676         }
2677
2678         return err;
2679 }
2680
2681 static int pmu_process_message(struct pmu_gk20a *pmu)
2682 {
2683         struct pmu_msg msg;
2684         int status;
2685
2686         if (unlikely(!pmu->pmu_ready)) {
2687                 pmu_process_init_msg(pmu, &msg);
2688                 pmu_init_powergating(pmu);
2689                 pmu_init_perfmon(pmu);
2690                 return 0;
2691         }
2692
2693         while (pmu_read_message(pmu,
2694                 &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) {
2695
2696                 gk20a_dbg_pmu("read msg hdr: "
2697                                 "unit_id = 0x%08x, size = 0x%08x, "
2698                                 "ctrl_flags = 0x%08x, seq_id = 0x%08x",
2699                                 msg.hdr.unit_id, msg.hdr.size,
2700                                 msg.hdr.ctrl_flags, msg.hdr.seq_id);
2701
2702                 msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK;
2703
2704                 if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) {
2705                         pmu_handle_event(pmu, &msg);
2706                 } else {
2707                         pmu_response_handle(pmu, &msg);
2708                 }
2709         }
2710
2711         return 0;
2712 }
2713
2714 static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
2715                                  u32 *var, u32 val)
2716 {
2717         struct gk20a *g = pmu->g;
2718         unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
2719         unsigned long delay = GR_IDLE_CHECK_DEFAULT;
2720
2721         do {
2722                 if (*var == val)
2723                         return 0;
2724
2725                 if (gk20a_readl(g, pwr_falcon_irqstat_r()))
2726                         gk20a_pmu_isr(g);
2727
2728                 usleep_range(delay, delay * 2);
2729                 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
2730         } while (time_before(jiffies, end_jiffies) ||
2731                         !tegra_platform_is_silicon());
2732
2733         return -ETIMEDOUT;
2734 }
2735
2736 static void pmu_dump_elpg_stats(struct pmu_gk20a *pmu)
2737 {
2738         struct gk20a *g = pmu->g;
2739         struct pmu_pg_stats stats;
2740
2741         pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
2742                 (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
2743
2744         gk20a_dbg_pmu("pg_entry_start_timestamp : 0x%016llx",
2745                 stats.pg_entry_start_timestamp);
2746         gk20a_dbg_pmu("pg_exit_start_timestamp : 0x%016llx",
2747                 stats.pg_exit_start_timestamp);
2748         gk20a_dbg_pmu("pg_ingating_start_timestamp : 0x%016llx",
2749                 stats.pg_ingating_start_timestamp);
2750         gk20a_dbg_pmu("pg_ungating_start_timestamp : 0x%016llx",
2751                 stats.pg_ungating_start_timestamp);
2752         gk20a_dbg_pmu("pg_avg_entry_time_us : 0x%08x",
2753                 stats.pg_avg_entry_time_us);
2754         gk20a_dbg_pmu("pg_avg_exit_time_us : 0x%08x",
2755                 stats.pg_avg_exit_time_us);
2756         gk20a_dbg_pmu("pg_ingating_cnt : 0x%08x",
2757                 stats.pg_ingating_cnt);
2758         gk20a_dbg_pmu("pg_ingating_time_us : 0x%08x",
2759                 stats.pg_ingating_time_us);
2760         gk20a_dbg_pmu("pg_ungating_count : 0x%08x",
2761                 stats.pg_ungating_count);
2762         gk20a_dbg_pmu("pg_ungating_time_us 0x%08x: ",
2763                 stats.pg_ungating_time_us);
2764         gk20a_dbg_pmu("pg_gating_cnt : 0x%08x",
2765                 stats.pg_gating_cnt);
2766         gk20a_dbg_pmu("pg_gating_deny_cnt : 0x%08x",
2767                 stats.pg_gating_deny_cnt);
2768
2769         /*
2770            Turn on PG_DEBUG in ucode and locate symbol "ElpgLog" offset
2771            in .nm file, e.g. 0x1000066c. use 0x66c.
2772         u32 i, val[20];
2773         pmu_copy_from_dmem(pmu, 0x66c,
2774                 (u8 *)val, sizeof(val), 0);
2775         gk20a_dbg_pmu("elpg log begin");
2776         for (i = 0; i < 20; i++)
2777                 gk20a_dbg_pmu("0x%08x", val[i]);
2778         gk20a_dbg_pmu("elpg log end");
2779         */
2780
2781         gk20a_dbg_pmu("pwr_pmu_idle_mask_supp_r(3): 0x%08x",
2782                 gk20a_readl(g, pwr_pmu_idle_mask_supp_r(3)));
2783         gk20a_dbg_pmu("pwr_pmu_idle_mask_1_supp_r(3): 0x%08x",
2784                 gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(3)));
2785         gk20a_dbg_pmu("pwr_pmu_idle_ctrl_supp_r(3): 0x%08x",
2786                 gk20a_readl(g, pwr_pmu_idle_ctrl_supp_r(3)));
2787         gk20a_dbg_pmu("pwr_pmu_pg_idle_cnt_r(0): 0x%08x",
2788                 gk20a_readl(g, pwr_pmu_pg_idle_cnt_r(0)));
2789         gk20a_dbg_pmu("pwr_pmu_pg_intren_r(0): 0x%08x",
2790                 gk20a_readl(g, pwr_pmu_pg_intren_r(0)));
2791
2792         gk20a_dbg_pmu("pwr_pmu_idle_count_r(3): 0x%08x",
2793                 gk20a_readl(g, pwr_pmu_idle_count_r(3)));
2794         gk20a_dbg_pmu("pwr_pmu_idle_count_r(4): 0x%08x",
2795                 gk20a_readl(g, pwr_pmu_idle_count_r(4)));
2796         gk20a_dbg_pmu("pwr_pmu_idle_count_r(7): 0x%08x",
2797                 gk20a_readl(g, pwr_pmu_idle_count_r(7)));
2798
2799         /*
2800          TBD: script can't generate those registers correctly
2801         gk20a_dbg_pmu("pwr_pmu_idle_status_r(): 0x%08x",
2802                 gk20a_readl(g, pwr_pmu_idle_status_r()));
2803         gk20a_dbg_pmu("pwr_pmu_pg_ctrl_r(): 0x%08x",
2804                 gk20a_readl(g, pwr_pmu_pg_ctrl_r()));
2805         */
2806 }
2807
2808 static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu)
2809 {
2810         struct gk20a *g = pmu->g;
2811         int i;
2812
2813         gk20a_err(dev_from_gk20a(g), "pwr_falcon_os_r : %d",
2814                 gk20a_readl(g, pwr_falcon_os_r()));
2815         gk20a_err(dev_from_gk20a(g), "pwr_falcon_cpuctl_r : 0x%x",
2816                 gk20a_readl(g, pwr_falcon_cpuctl_r()));
2817         gk20a_err(dev_from_gk20a(g), "pwr_falcon_idlestate_r : 0x%x",
2818                 gk20a_readl(g, pwr_falcon_idlestate_r()));
2819         gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox0_r : 0x%x",
2820                 gk20a_readl(g, pwr_falcon_mailbox0_r()));
2821         gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox1_r : 0x%x",
2822                 gk20a_readl(g, pwr_falcon_mailbox1_r()));
2823         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqstat_r : 0x%x",
2824                 gk20a_readl(g, pwr_falcon_irqstat_r()));
2825         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmode_r : 0x%x",
2826                 gk20a_readl(g, pwr_falcon_irqmode_r()));
2827         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmask_r : 0x%x",
2828                 gk20a_readl(g, pwr_falcon_irqmask_r()));
2829         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqdest_r : 0x%x",
2830                 gk20a_readl(g, pwr_falcon_irqdest_r()));
2831
2832         for (i = 0; i < pwr_pmu_mailbox__size_1_v(); i++)
2833                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_mailbox_r(%d) : 0x%x",
2834                         i, gk20a_readl(g, pwr_pmu_mailbox_r(i)));
2835
2836         for (i = 0; i < pwr_pmu_debug__size_1_v(); i++)
2837                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_debug_r(%d) : 0x%x",
2838                         i, gk20a_readl(g, pwr_pmu_debug_r(i)));
2839
2840         for (i = 0; i < 6/*NV_PPWR_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) {
2841                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2842                         pwr_pmu_falcon_icd_cmd_opc_rstat_f() |
2843                         pwr_pmu_falcon_icd_cmd_idx_f(i));
2844                 gk20a_err(dev_from_gk20a(g), "pmu_rstat (%d) : 0x%x",
2845                         i, gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2846         }
2847
2848         i = gk20a_readl(g, pwr_pmu_bar0_error_status_r());
2849         gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_error_status_r : 0x%x", i);
2850         if (i != 0) {
2851                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_addr_r : 0x%x",
2852                         gk20a_readl(g, pwr_pmu_bar0_addr_r()));
2853                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_data_r : 0x%x",
2854                         gk20a_readl(g, pwr_pmu_bar0_data_r()));
2855                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_timeout_r : 0x%x",
2856                         gk20a_readl(g, pwr_pmu_bar0_timeout_r()));
2857                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_ctl_r : 0x%x",
2858                         gk20a_readl(g, pwr_pmu_bar0_ctl_r()));
2859         }
2860
2861         i = gk20a_readl(g, pwr_pmu_bar0_fecs_error_r());
2862         gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_fecs_error_r : 0x%x", i);
2863
2864         i = gk20a_readl(g, pwr_falcon_exterrstat_r());
2865         gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterrstat_r : 0x%x", i);
2866         if (pwr_falcon_exterrstat_valid_v(i) ==
2867                         pwr_falcon_exterrstat_valid_true_v()) {
2868                 gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterraddr_r : 0x%x",
2869                         gk20a_readl(g, pwr_falcon_exterraddr_r()));
2870                 gk20a_err(dev_from_gk20a(g), "top_fs_status_r : 0x%x",
2871                         gk20a_readl(g, top_fs_status_r()));
2872                 gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x",
2873                         gk20a_readl(g, mc_enable_r()));
2874         }
2875
2876         gk20a_err(dev_from_gk20a(g), "pwr_falcon_engctl_r : 0x%x",
2877                 gk20a_readl(g, pwr_falcon_engctl_r()));
2878         gk20a_err(dev_from_gk20a(g), "pwr_falcon_curctx_r : 0x%x",
2879                 gk20a_readl(g, pwr_falcon_curctx_r()));
2880         gk20a_err(dev_from_gk20a(g), "pwr_falcon_nxtctx_r : 0x%x",
2881                 gk20a_readl(g, pwr_falcon_nxtctx_r()));
2882
2883         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2884                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2885                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_IMB));
2886         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_IMB : 0x%x",
2887                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2888
2889         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2890                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2891                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_DMB));
2892         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_DMB : 0x%x",
2893                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2894
2895         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2896                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2897                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CSW));
2898         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CSW : 0x%x",
2899                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2900
2901         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2902                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2903                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CTX));
2904         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CTX : 0x%x",
2905                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2906
2907         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2908                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2909                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_EXCI));
2910         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_EXCI : 0x%x",
2911                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2912
2913         for (i = 0; i < 4; i++) {
2914                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2915                         pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2916                         pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_PC));
2917                 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_PC : 0x%x",
2918                         gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2919
2920                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2921                         pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2922                         pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_SP));
2923                 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_SP : 0x%x",
2924                         gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2925         }
2926
2927         /* PMU may crash due to FECS crash. Dump FECS status */
2928         gk20a_fecs_dump_falcon_stats(g);
2929 }
2930
2931 void gk20a_pmu_isr(struct gk20a *g)
2932 {
2933         struct pmu_gk20a *pmu = &g->pmu;
2934         struct pmu_queue *queue;
2935         u32 intr, mask;
2936         bool recheck = false;
2937
2938         gk20a_dbg_fn("");
2939
2940         mutex_lock(&pmu->isr_mutex);
2941
2942         mask = gk20a_readl(g, pwr_falcon_irqmask_r()) &
2943                 gk20a_readl(g, pwr_falcon_irqdest_r());
2944
2945         intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask;
2946
2947         gk20a_dbg_pmu("received falcon interrupt: 0x%08x", intr);
2948
2949         if (!intr) {
2950                 mutex_unlock(&pmu->isr_mutex);
2951                 return;
2952         }
2953
2954         if (intr & pwr_falcon_irqstat_halt_true_f()) {
2955                 gk20a_err(dev_from_gk20a(g),
2956                         "pmu halt intr not implemented");
2957                 pmu_dump_falcon_stats(pmu);
2958         }
2959         if (intr & pwr_falcon_irqstat_exterr_true_f()) {
2960                 gk20a_err(dev_from_gk20a(g),
2961                         "pmu exterr intr not implemented. Clearing interrupt.");
2962                 pmu_dump_falcon_stats(pmu);
2963
2964                 gk20a_writel(g, pwr_falcon_exterrstat_r(),
2965                         gk20a_readl(g, pwr_falcon_exterrstat_r()) &
2966                                 ~pwr_falcon_exterrstat_valid_m());
2967         }
2968         if (intr & pwr_falcon_irqstat_swgen0_true_f()) {
2969                 pmu_process_message(pmu);
2970                 recheck = true;
2971         }
2972
2973         gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);
2974
2975         if (recheck) {
2976                 queue = &pmu->queue[PMU_MESSAGE_QUEUE];
2977                 if (!pmu_queue_is_empty(pmu, queue))
2978                         gk20a_writel(g, pwr_falcon_irqsset_r(),
2979                                 pwr_falcon_irqsset_swgen0_set_f());
2980         }
2981
2982         mutex_unlock(&pmu->isr_mutex);
2983 }
2984
2985 static bool pmu_validate_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
2986                         struct pmu_msg *msg, struct pmu_payload *payload,
2987                         u32 queue_id)
2988 {
2989         struct gk20a *g = pmu->g;
2990         struct pmu_queue *queue;
2991         u32 in_size, out_size;
2992
2993         if (!PMU_IS_SW_COMMAND_QUEUE(queue_id))
2994                 goto invalid_cmd;
2995
2996         queue = &pmu->queue[queue_id];
2997         if (cmd->hdr.size < PMU_CMD_HDR_SIZE)
2998                 goto invalid_cmd;
2999
3000         if (cmd->hdr.size > (queue->size >> 1))
3001                 goto invalid_cmd;
3002
3003         if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE)
3004                 goto invalid_cmd;
3005
3006         if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id))
3007                 goto invalid_cmd;
3008
3009         if (payload == NULL)
3010                 return true;
3011
3012         if (payload->in.buf == NULL && payload->out.buf == NULL)
3013                 goto invalid_cmd;
3014
3015         if ((payload->in.buf != NULL && payload->in.size == 0) ||
3016             (payload->out.buf != NULL && payload->out.size == 0))
3017                 goto invalid_cmd;
3018
3019         in_size = PMU_CMD_HDR_SIZE;
3020         if (payload->in.buf) {
3021                 in_size += payload->in.offset;
3022                 in_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
3023         }
3024
3025         out_size = PMU_CMD_HDR_SIZE;
3026         if (payload->out.buf) {
3027                 out_size += payload->out.offset;
3028                 out_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
3029         }
3030
3031         if (in_size > cmd->hdr.size || out_size > cmd->hdr.size)
3032                 goto invalid_cmd;
3033
3034
3035         if ((payload->in.offset != 0 && payload->in.buf == NULL) ||
3036             (payload->out.offset != 0 && payload->out.buf == NULL))
3037                 goto invalid_cmd;
3038
3039         return true;
3040
3041 invalid_cmd:
3042         gk20a_err(dev_from_gk20a(g), "invalid pmu cmd :\n"
3043                 "queue_id=%d,\n"
3044                 "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n"
3045                 "payload in=%p, in_size=%d, in_offset=%d,\n"
3046                 "payload out=%p, out_size=%d, out_offset=%d",
3047                 queue_id, cmd->hdr.size, cmd->hdr.unit_id,
3048                 msg, msg?msg->hdr.unit_id:~0,
3049                 &payload->in, payload->in.size, payload->in.offset,
3050                 &payload->out, payload->out.size, payload->out.offset);
3051
3052         return false;
3053 }
3054
3055 static int pmu_write_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
3056                         u32 queue_id, unsigned long timeout)
3057 {
3058         struct gk20a *g = pmu->g;
3059         struct pmu_queue *queue;
3060         unsigned long end_jiffies = jiffies +
3061                 msecs_to_jiffies(timeout);
3062         int err;
3063
3064         gk20a_dbg_fn("");
3065
3066         queue = &pmu->queue[queue_id];
3067
3068         do {
3069                 err = pmu_queue_open_write(pmu, queue, cmd->hdr.size);
3070                 if (err == -EAGAIN && time_before(jiffies, end_jiffies))
3071                         usleep_range(1000, 2000);
3072                 else
3073                         break;
3074         } while (1);
3075
3076         if (err)
3077                 goto clean_up;
3078
3079         pmu_queue_push(pmu, queue, cmd, cmd->hdr.size);
3080
3081         err = pmu_queue_close(pmu, queue, true);
3082
3083 clean_up:
3084         if (err)
3085                 gk20a_err(dev_from_gk20a(g),
3086                         "fail to write cmd to queue %d", queue_id);
3087         else
3088                 gk20a_dbg_fn("done");
3089
3090         return err;
3091 }
3092
3093 int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
3094                 struct pmu_msg *msg, struct pmu_payload *payload,
3095                 u32 queue_id, pmu_callback callback, void* cb_param,
3096                 u32 *seq_desc, unsigned long timeout)
3097 {
3098         struct pmu_gk20a *pmu = &g->pmu;
3099         struct pmu_v *pv = &g->ops.pmu_ver;
3100         struct pmu_sequence *seq;
3101         void *in = NULL, *out = NULL;
3102         int err;
3103
3104         gk20a_dbg_fn("");
3105
3106         BUG_ON(!cmd);
3107         BUG_ON(!seq_desc);
3108         BUG_ON(!pmu->pmu_ready);
3109
3110         if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id))
3111                 return -EINVAL;
3112
3113         err = pmu_seq_acquire(pmu, &seq);
3114         if (err)
3115                 return err;
3116
3117         cmd->hdr.seq_id = seq->id;
3118
3119         cmd->hdr.ctrl_flags = 0;
3120         cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS;
3121         cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR;
3122
3123         seq->callback = callback;
3124         seq->cb_params = cb_param;
3125         seq->msg = msg;
3126         seq->out_payload = NULL;
3127         seq->desc = pmu->next_seq_desc++;
3128
3129         if (payload)
3130                 seq->out_payload = payload->out.buf;
3131
3132         *seq_desc = seq->desc;
3133
3134         if (payload && payload->in.offset != 0) {
3135                 pv->set_pmu_allocation_ptr(pmu, &in,
3136                 ((u8 *)&cmd->cmd + payload->in.offset));
3137
3138                 if (payload->in.buf != payload->out.buf)
3139                         pv->pmu_allocation_set_dmem_size(pmu, in,
3140                         (u16)payload->in.size);
3141                 else
3142                         pv->pmu_allocation_set_dmem_size(pmu, in,
3143                         (u16)max(payload->in.size, payload->out.size));
3144
3145                 err = pmu->dmem.alloc(&pmu->dmem,
3146                 pv->pmu_allocation_get_dmem_offset_addr(pmu, in),
3147                 pv->pmu_allocation_get_dmem_size(pmu, in));
3148                 if (err)
3149                         goto clean_up;
3150
3151                 pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu,
3152                 in)),
3153                         payload->in.buf, payload->in.size, 0);
3154                 pv->pmu_allocation_set_dmem_size(pmu,
3155                 pv->get_pmu_seq_in_a_ptr(seq),
3156                 pv->pmu_allocation_get_dmem_size(pmu, in));
3157                 pv->pmu_allocation_set_dmem_offset(pmu,
3158                 pv->get_pmu_seq_in_a_ptr(seq),
3159                 pv->pmu_allocation_get_dmem_offset(pmu, in));
3160         }
3161
3162         if (payload && payload->out.offset != 0) {
3163                 pv->set_pmu_allocation_ptr(pmu, &out,
3164                 ((u8 *)&cmd->cmd + payload->out.offset));
3165                 pv->pmu_allocation_set_dmem_size(pmu, out,
3166                 (u16)payload->out.size);
3167
3168                 if (payload->out.buf != payload->in.buf) {
3169                         err = pmu->dmem.alloc(&pmu->dmem,
3170                         pv->pmu_allocation_get_dmem_offset_addr(pmu, out),
3171                         pv->pmu_allocation_get_dmem_size(pmu, out));
3172                         if (err)
3173                                 goto clean_up;
3174                 } else {
3175                         BUG_ON(in == NULL);
3176                         pv->pmu_allocation_set_dmem_offset(pmu, out,
3177                         pv->pmu_allocation_get_dmem_offset(pmu, in));
3178                 }
3179
3180                 pv->pmu_allocation_set_dmem_size(pmu,
3181                 pv->get_pmu_seq_out_a_ptr(seq),
3182                 pv->pmu_allocation_get_dmem_size(pmu, out));
3183                 pv->pmu_allocation_set_dmem_offset(pmu,
3184                 pv->get_pmu_seq_out_a_ptr(seq),
3185                 pv->pmu_allocation_get_dmem_offset(pmu, out));
3186         }
3187
3188         seq->state = PMU_SEQ_STATE_USED;
3189         err = pmu_write_cmd(pmu, cmd, queue_id, timeout);
3190         if (err)
3191                 seq->state = PMU_SEQ_STATE_PENDING;
3192
3193         gk20a_dbg_fn("done");
3194
3195         return 0;
3196
3197 clean_up:
3198         gk20a_dbg_fn("fail");
3199         if (in)
3200                 pmu->dmem.free(&pmu->dmem,
3201                 pv->pmu_allocation_get_dmem_offset(pmu, in),
3202                 pv->pmu_allocation_get_dmem_size(pmu, in));
3203         if (out)
3204                 pmu->dmem.free(&pmu->dmem,
3205                 pv->pmu_allocation_get_dmem_offset(pmu, out),
3206                 pv->pmu_allocation_get_dmem_size(pmu, out));
3207
3208         pmu_seq_release(pmu, seq);
3209         return err;
3210 }
3211
3212 static int gk20a_pmu_enable_elpg_locked(struct gk20a *g)
3213 {
3214         struct pmu_gk20a *pmu = &g->pmu;
3215         struct pmu_cmd cmd;
3216         u32 seq, status;
3217
3218         gk20a_dbg_fn("");
3219
3220         memset(&cmd, 0, sizeof(struct pmu_cmd));
3221         cmd.hdr.unit_id = PMU_UNIT_PG;
3222         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
3223         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
3224         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
3225         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_ALLOW;
3226
3227         /* no need to wait ack for ELPG enable but set pending to sync
3228            with follow up ELPG disable */
3229         pmu->elpg_stat = PMU_ELPG_STAT_ON_PENDING;
3230
3231         status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3232                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
3233
3234         BUG_ON(status != 0);
3235
3236         gk20a_dbg_fn("done");
3237         return 0;
3238 }
3239
3240 int gk20a_pmu_enable_elpg(struct gk20a *g)
3241 {
3242         struct pmu_gk20a *pmu = &g->pmu;
3243         struct gr_gk20a *gr = &g->gr;
3244
3245         int ret = 0;
3246
3247         gk20a_dbg_fn("");
3248
3249         if (!pmu->elpg_ready || !pmu->initialized)
3250                 goto exit;
3251
3252         mutex_lock(&pmu->elpg_mutex);
3253
3254         pmu->elpg_refcnt++;
3255         if (pmu->elpg_refcnt <= 0)
3256                 goto exit_unlock;
3257
3258         /* something is not right if we end up in following code path */
3259         if (unlikely(pmu->elpg_refcnt > 1)) {
3260                 gk20a_warn(dev_from_gk20a(g),
3261                 "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
3262                 __func__, pmu->elpg_refcnt);
3263                 WARN_ON(1);
3264         }
3265
3266         /* do NOT enable elpg until golden ctx is created,
3267            which is related with the ctx that ELPG save and restore. */
3268         if (unlikely(!gr->ctx_vars.golden_image_initialized))
3269                 goto exit_unlock;
3270
3271         /* return if ELPG is already on or on_pending or off_on_pending */
3272         if (pmu->elpg_stat != PMU_ELPG_STAT_OFF)
3273                 goto exit_unlock;
3274
3275         /* if ELPG is not allowed right now, mark that it should be enabled
3276          * immediately after it is allowed */
3277         if (!pmu->elpg_enable_allow) {
3278                 pmu->elpg_stat = PMU_ELPG_STAT_OFF_ON_PENDING;
3279                 goto exit_unlock;
3280         }
3281
3282         ret = gk20a_pmu_enable_elpg_locked(g);
3283
3284 exit_unlock:
3285         mutex_unlock(&pmu->elpg_mutex);
3286 exit:
3287         gk20a_dbg_fn("done");
3288         return ret;
3289 }
3290
3291 static void pmu_elpg_enable_allow(struct work_struct *work)
3292 {
3293         struct pmu_gk20a *pmu = container_of(to_delayed_work(work),
3294                                         struct pmu_gk20a, elpg_enable);
3295
3296         gk20a_dbg_fn("");
3297
3298         mutex_lock(&pmu->elpg_mutex);
3299
3300         /* It is ok to enabled powergating now */
3301         pmu->elpg_enable_allow = true;
3302
3303         /* do we have pending requests? */
3304         if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) {
3305                 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
3306                 gk20a_pmu_enable_elpg_locked(pmu->g);
3307         }
3308
3309         mutex_unlock(&pmu->elpg_mutex);
3310
3311         gk20a_dbg_fn("done");
3312 }
3313
3314 static int gk20a_pmu_disable_elpg_defer_enable(struct gk20a *g, bool enable)
3315 {
3316         struct pmu_gk20a *pmu = &g->pmu;
3317         struct pmu_cmd cmd;
3318         u32 seq;
3319         int ret = 0;
3320
3321         gk20a_dbg_fn("");
3322
3323         if (!pmu->elpg_ready || !pmu->initialized)
3324                 return 0;
3325
3326         /* remove the work from queue */
3327         cancel_delayed_work_sync(&pmu->elpg_enable);
3328
3329         mutex_lock(&pmu->elpg_mutex);
3330
3331         pmu->elpg_refcnt--;
3332         if (pmu->elpg_refcnt > 0) {
3333                 gk20a_warn(dev_from_gk20a(g),
3334                 "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
3335                 __func__, pmu->elpg_refcnt);
3336                 WARN_ON(1);
3337                 ret = 0;
3338                 goto exit_unlock;
3339         }
3340
3341         /* cancel off_on_pending and return */
3342         if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) {
3343                 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
3344                 ret = 0;
3345                 goto exit_reschedule;
3346         }
3347         /* wait if on_pending */
3348         else if (pmu->elpg_stat == PMU_ELPG_STAT_ON_PENDING) {
3349
3350                 pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
3351                                       &pmu->elpg_stat, PMU_ELPG_STAT_ON);
3352
3353                 if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
3354                         gk20a_err(dev_from_gk20a(g),
3355                                 "ELPG_ALLOW_ACK failed, elpg_stat=%d",
3356                                 pmu->elpg_stat);
3357                         pmu_dump_elpg_stats(pmu);
3358                         pmu_dump_falcon_stats(pmu);
3359                         ret = -EBUSY;
3360                         goto exit_unlock;
3361                 }
3362         }
3363         /* return if ELPG is already off */
3364         else if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
3365                 ret = 0;
3366                 goto exit_reschedule;
3367         }
3368
3369         memset(&cmd, 0, sizeof(struct pmu_cmd));
3370         cmd.hdr.unit_id = PMU_UNIT_PG;
3371         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
3372         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
3373         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
3374         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
3375
3376         pmu->elpg_stat = PMU_ELPG_STAT_OFF_PENDING;
3377
3378         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3379                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
3380
3381         pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
3382                               &pmu->elpg_stat, PMU_ELPG_STAT_OFF);
3383         if (pmu->elpg_stat != PMU_ELPG_STAT_OFF) {
3384                 gk20a_err(dev_from_gk20a(g),
3385                         "ELPG_DISALLOW_ACK failed");
3386                 pmu_dump_elpg_stats(pmu);
3387                 pmu_dump_falcon_stats(pmu);
3388                 ret = -EBUSY;
3389                 goto exit_unlock;
3390         }
3391
3392 exit_reschedule:
3393         if (enable) {
3394                 pmu->elpg_enable_allow = false;
3395                 schedule_delayed_work(&pmu->elpg_enable,
3396                         msecs_to_jiffies(PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC));
3397         } else
3398                 pmu->elpg_enable_allow = true;
3399
3400
3401 exit_unlock:
3402         mutex_unlock(&pmu->elpg_mutex);
3403         gk20a_dbg_fn("done");
3404         return ret;
3405 }
3406
3407 int gk20a_pmu_disable_elpg(struct gk20a *g)
3408 {
3409         return gk20a_pmu_disable_elpg_defer_enable(g, true);
3410 }
3411
3412 int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable)
3413 {
3414         struct pmu_gk20a *pmu = &g->pmu;
3415         int err;
3416
3417         gk20a_dbg_fn("");
3418
3419         if (enable)
3420                 err = pmu_perfmon_start_sampling(pmu);
3421         else
3422                 err = pmu_perfmon_stop_sampling(pmu);
3423
3424         return err;
3425 }
3426
3427 int gk20a_pmu_destroy(struct gk20a *g)
3428 {
3429         struct pmu_gk20a *pmu = &g->pmu;
3430         u32 elpg_ingating_time, elpg_ungating_time, gating_cnt;
3431
3432         gk20a_dbg_fn("");
3433
3434         if (!support_gk20a_pmu())
3435                 return 0;
3436
3437         /* make sure the pending operations are finished before we continue */
3438         cancel_delayed_work_sync(&pmu->elpg_enable);
3439         cancel_work_sync(&pmu->pg_init);
3440
3441         gk20a_pmu_get_elpg_residency_gating(g, &elpg_ingating_time,
3442                 &elpg_ungating_time, &gating_cnt);
3443
3444         gk20a_pmu_disable_elpg_defer_enable(g, false);
3445         pmu->initialized = false;
3446
3447         /* update the s/w ELPG residency counters */
3448         g->pg_ingating_time_us += (u64)elpg_ingating_time;
3449         g->pg_ungating_time_us += (u64)elpg_ungating_time;
3450         g->pg_gating_cnt += gating_cnt;
3451
3452         pmu_enable(pmu, false);
3453
3454         if (pmu->remove_support) {
3455                 pmu->remove_support(pmu);
3456                 pmu->remove_support = NULL;
3457         }
3458
3459         gk20a_dbg_fn("done");
3460         return 0;
3461 }
3462
3463 int gk20a_pmu_load_norm(struct gk20a *g, u32 *load)
3464 {
3465         struct pmu_gk20a *pmu = &g->pmu;
3466         u16 _load = 0;
3467
3468         if (!pmu->perfmon_ready) {
3469                 *load = 0;
3470                 return 0;
3471         }
3472
3473         pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0);
3474         *load = _load / 10;
3475
3476         return 0;
3477 }
3478
3479 void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
3480                                  u32 *total_cycles)
3481 {
3482         if (!g->power_on) {
3483                 *busy_cycles = 0;
3484                 *total_cycles = 0;
3485                 return;
3486         }
3487
3488         gk20a_busy(g->dev);
3489         *busy_cycles = pwr_pmu_idle_count_value_v(
3490                 gk20a_readl(g, pwr_pmu_idle_count_r(1)));
3491         rmb();
3492         *total_cycles = pwr_pmu_idle_count_value_v(
3493                 gk20a_readl(g, pwr_pmu_idle_count_r(2)));
3494         gk20a_idle(g->dev);
3495 }
3496
3497 void gk20a_pmu_reset_load_counters(struct gk20a *g)
3498 {
3499         u32 reg_val = pwr_pmu_idle_count_reset_f(1);
3500
3501         if (!g->power_on)
3502                 return;
3503
3504         gk20a_busy(g->dev);
3505         gk20a_writel(g, pwr_pmu_idle_count_r(2), reg_val);
3506         wmb();
3507         gk20a_writel(g, pwr_pmu_idle_count_r(1), reg_val);
3508         gk20a_idle(g->dev);
3509 }
3510
3511 static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
3512                         u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt)
3513 {
3514         struct pmu_gk20a *pmu = &g->pmu;
3515         struct pmu_pg_stats stats;
3516
3517         if (!pmu->initialized) {
3518                 *ingating_time = 0;
3519                 *ungating_time = 0;
3520                 *gating_cnt = 0;
3521                 return 0;
3522         }
3523
3524         pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
3525                 (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
3526
3527         *ingating_time = stats.pg_ingating_time_us;
3528         *ungating_time = stats.pg_ungating_time_us;
3529         *gating_cnt = stats.pg_gating_cnt;
3530
3531         return 0;
3532 }
3533
3534 /* Send an Adaptive Power (AP) related command to PMU */
3535 static int gk20a_pmu_ap_send_command(struct gk20a *g,
3536                         union pmu_ap_cmd *p_ap_cmd, bool b_block)
3537 {
3538         struct pmu_gk20a *pmu = &g->pmu;
3539         /* FIXME: where is the PG structure defined?? */
3540         u32 status = 0;
3541         struct pmu_cmd cmd;
3542         u32 seq;
3543         pmu_callback p_callback = NULL;
3544
3545         memset(&cmd, 0, sizeof(struct pmu_cmd));
3546
3547         /* Copy common members */
3548         cmd.hdr.unit_id = PMU_UNIT_PG;
3549         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(union pmu_ap_cmd);
3550
3551         cmd.cmd.pg.ap_cmd.cmn.cmd_type = PMU_PG_CMD_ID_AP;
3552         cmd.cmd.pg.ap_cmd.cmn.cmd_id = p_ap_cmd->cmn.cmd_id;
3553
3554         /* Copy other members of command */
3555         switch (p_ap_cmd->cmn.cmd_id) {
3556         case PMU_AP_CMD_ID_INIT:
3557                 cmd.cmd.pg.ap_cmd.init.pg_sampling_period_us =
3558                         p_ap_cmd->init.pg_sampling_period_us;
3559                 p_callback = ap_callback_init_and_enable_ctrl;
3560                 break;
3561
3562         case PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL:
3563                 cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.ctrl_id =
3564                 p_ap_cmd->init_and_enable_ctrl.ctrl_id;
3565                 memcpy(
3566                 (void *)&(cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.params),
3567                         (void *)&(p_ap_cmd->init_and_enable_ctrl.params),
3568                         sizeof(struct pmu_ap_ctrl_init_params));
3569
3570                 p_callback = ap_callback_init_and_enable_ctrl;
3571                 break;
3572
3573         case PMU_AP_CMD_ID_ENABLE_CTRL:
3574                 cmd.cmd.pg.ap_cmd.enable_ctrl.ctrl_id =
3575                         p_ap_cmd->enable_ctrl.ctrl_id;
3576                 break;
3577
3578         case PMU_AP_CMD_ID_DISABLE_CTRL:
3579                 cmd.cmd.pg.ap_cmd.disable_ctrl.ctrl_id =
3580                         p_ap_cmd->disable_ctrl.ctrl_id;
3581                 break;
3582
3583         case PMU_AP_CMD_ID_KICK_CTRL:
3584                 cmd.cmd.pg.ap_cmd.kick_ctrl.ctrl_id =
3585                         p_ap_cmd->kick_ctrl.ctrl_id;
3586                 cmd.cmd.pg.ap_cmd.kick_ctrl.skip_count =
3587                         p_ap_cmd->kick_ctrl.skip_count;
3588                 break;
3589
3590         default:
3591                 gk20a_dbg_pmu("%s: Invalid Adaptive Power command %d\n",
3592                         __func__, p_ap_cmd->cmn.cmd_id);
3593                 return 0x2f;
3594         }
3595
3596         status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3597                         p_callback, pmu, &seq, ~0);
3598
3599         if (!status) {
3600                 gk20a_dbg_pmu(
3601                         "%s: Unable to submit Adaptive Power Command %d\n",
3602                         __func__, p_ap_cmd->cmn.cmd_id);
3603                 goto err_return;
3604         }
3605
3606         /* TODO: Implement blocking calls (b_block) */
3607
3608 err_return:
3609         return status;
3610 }
3611
3612 static void ap_callback_init_and_enable_ctrl(
3613                 struct gk20a *g, struct pmu_msg *msg,
3614                 void *param, u32 seq_desc, u32 status)
3615 {
3616         /* Define p_ap (i.e pointer to pmu_ap structure) */
3617         WARN_ON(!msg);
3618
3619         if (!status) {
3620                 switch (msg->msg.pg.ap_msg.cmn.msg_id) {
3621                 case PMU_AP_MSG_ID_INIT_ACK:
3622                         break;
3623
3624                 default:
3625                         gk20a_dbg_pmu(
3626                         "%s: Invalid Adaptive Power Message: %x\n",
3627                         __func__, msg->msg.pg.ap_msg.cmn.msg_id);
3628                         break;
3629                 }
3630         }
3631 }
3632
3633 static int gk20a_aelpg_init(struct gk20a *g)
3634 {
3635         int status = 0;
3636
3637         /* Remove reliance on app_ctrl field. */
3638         union pmu_ap_cmd ap_cmd;
3639
3640         /* TODO: Check for elpg being ready? */
3641         ap_cmd.init.cmd_id = PMU_AP_CMD_ID_INIT;
3642         ap_cmd.init.pg_sampling_period_us =
3643                 APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US;
3644
3645         status = gk20a_pmu_ap_send_command(g, &ap_cmd, false);
3646         return status;
3647 }
3648
3649 static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id)
3650 {
3651         int status = 0;
3652         union pmu_ap_cmd ap_cmd;
3653
3654         /* TODO: Probably check if ELPG is ready? */
3655
3656         ap_cmd.init_and_enable_ctrl.cmd_id = PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL;
3657         ap_cmd.init_and_enable_ctrl.ctrl_id = ctrl_id;
3658         ap_cmd.init_and_enable_ctrl.params.min_idle_filter_us =
3659                 APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US;
3660         ap_cmd.init_and_enable_ctrl.params.min_target_saving_us =
3661                 APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US;
3662         ap_cmd.init_and_enable_ctrl.params.power_break_even_us =
3663                 APCTRL_POWER_BREAKEVEN_DEFAULT_US;
3664         ap_cmd.init_and_enable_ctrl.params.cycles_per_sample_max =
3665                 APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT;
3666
3667         switch (ctrl_id) {
3668         case PMU_AP_CTRL_ID_GRAPHICS:
3669                 break;
3670         default:
3671                 break;
3672         }
3673
3674         status = gk20a_pmu_ap_send_command(g, &ap_cmd, true);
3675         return status;
3676 }
3677
3678 #if CONFIG_DEBUG_FS
3679 static int elpg_residency_show(struct seq_file *s, void *data)
3680 {
3681         struct gk20a *g = s->private;
3682         u32 ingating_time = 0;
3683         u32 ungating_time = 0;
3684         u32 gating_cnt;
3685         u64 total_ingating, total_ungating, residency, divisor, dividend;
3686
3687         /* Don't unnecessarily power on the device */
3688         if (g->power_on) {
3689                 gk20a_busy(g->dev);
3690                 gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
3691                         &ungating_time, &gating_cnt);
3692                 gk20a_idle(g->dev);
3693         }
3694         total_ingating = g->pg_ingating_time_us + (u64)ingating_time;
3695         total_ungating = g->pg_ungating_time_us + (u64)ungating_time;
3696         divisor = total_ingating + total_ungating;
3697
3698         /* We compute the residency on a scale of 1000 */
3699         dividend = total_ingating * 1000;
3700
3701         if (divisor)
3702                 residency = div64_u64(dividend, divisor);
3703         else
3704                 residency = 0;
3705
3706         seq_printf(s, "Time in ELPG: %llu us\n"
3707                         "Time out of ELPG: %llu us\n"
3708                         "ELPG residency ratio: %llu\n",
3709                         total_ingating, total_ungating, residency);
3710         return 0;
3711
3712 }
3713
3714 static int elpg_residency_open(struct inode *inode, struct file *file)
3715 {
3716         return single_open(file, elpg_residency_show, inode->i_private);
3717 }
3718
3719 static const struct file_operations elpg_residency_fops = {
3720         .open           = elpg_residency_open,
3721         .read           = seq_read,
3722         .llseek         = seq_lseek,
3723         .release        = single_release,
3724 };
3725
3726 static int elpg_transitions_show(struct seq_file *s, void *data)
3727 {
3728         struct gk20a *g = s->private;
3729         u32 ingating_time, ungating_time, total_gating_cnt;
3730         u32 gating_cnt = 0;
3731
3732         if (g->power_on) {
3733                 gk20a_busy(g->dev);
3734                 gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
3735                         &ungating_time, &gating_cnt);
3736                 gk20a_idle(g->dev);
3737         }
3738         total_gating_cnt = g->pg_gating_cnt + gating_cnt;
3739
3740         seq_printf(s, "%u\n", total_gating_cnt);
3741         return 0;
3742
3743 }
3744
3745 static int elpg_transitions_open(struct inode *inode, struct file *file)
3746 {
3747         return single_open(file, elpg_transitions_show, inode->i_private);
3748 }
3749
3750 static const struct file_operations elpg_transitions_fops = {
3751         .open           = elpg_transitions_open,
3752         .read           = seq_read,
3753         .llseek         = seq_lseek,
3754         .release        = single_release,
3755 };
3756
3757 int gk20a_pmu_debugfs_init(struct platform_device *dev)
3758 {
3759         struct dentry *d;
3760         struct gk20a_platform *platform = platform_get_drvdata(dev);
3761         struct gk20a *g = get_gk20a(dev);
3762
3763         d = debugfs_create_file(
3764                 "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
3765                                                 &elpg_residency_fops);
3766         if (!d)
3767                 goto err_out;
3768
3769         d = debugfs_create_file(
3770                 "elpg_transitions", S_IRUGO, platform->debugfs, g,
3771                                                 &elpg_transitions_fops);
3772         if (!d)
3773                 goto err_out;
3774
3775         return 0;
3776
3777 err_out:
3778         pr_err("%s: Failed to make debugfs node\n", __func__);
3779         debugfs_remove_recursive(platform->debugfs);
3780         return -ENOMEM;
3781 }
3782 #endif