]> nv-tegra.nvidia Code Review - linux-3.10.git/blob - drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
gpu: nvgpu: Do not dump top_fs_status
[linux-3.10.git] / drivers / gpu / nvgpu / gk20a / pmu_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/pmu_gk20a.c
3  *
4  * GK20A PMU (aka. gPMU outside gk20a context)
5  *
6  * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/delay.h>        /* for mdelay */
23 #include <linux/firmware.h>
24 #include <linux/clk.h>
25 #include <linux/module.h>
26 #include <linux/debugfs.h>
27 #include <linux/dma-mapping.h>
28
29 #include "gk20a.h"
30 #include "hw_mc_gk20a.h"
31 #include "hw_pwr_gk20a.h"
32 #include "hw_top_gk20a.h"
33
34 #define GK20A_PMU_UCODE_IMAGE   "gpmu_ucode.bin"
35
36 #define gk20a_dbg_pmu(fmt, arg...) \
37         gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
38
39 static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu);
40 static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
41                 u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt);
42 static void gk20a_init_pmu_setup_hw2_workqueue(struct work_struct *work);
43 static void pmu_save_zbc(struct gk20a *g, u32 entries);
44 static void ap_callback_init_and_enable_ctrl(
45                 struct gk20a *g, struct pmu_msg *msg,
46                 void *param, u32 seq_desc, u32 status);
47 static int gk20a_pmu_ap_send_command(struct gk20a *g,
48                         union pmu_ap_cmd *p_ap_cmd, bool b_block);
49
50 static u32 pmu_cmdline_size_v0(struct pmu_gk20a *pmu)
51 {
52         return sizeof(struct pmu_cmdline_args_v0);
53 }
54
55 static u32 pmu_cmdline_size_v1(struct pmu_gk20a *pmu)
56 {
57         return sizeof(struct pmu_cmdline_args_v1);
58 }
59
60 static void set_pmu_cmdline_args_cpufreq_v1(struct pmu_gk20a *pmu, u32 freq)
61 {
62         pmu->args_v1.cpu_freq_hz = freq;
63 }
64
65 static void set_pmu_cmdline_args_cpufreq_v0(struct pmu_gk20a *pmu, u32 freq)
66 {
67         pmu->args_v0.cpu_freq_hz = freq;
68 }
69
70 static void *get_pmu_cmdline_args_ptr_v1(struct pmu_gk20a *pmu)
71 {
72         return (void *)(&pmu->args_v1);
73 }
74
75 static void *get_pmu_cmdline_args_ptr_v0(struct pmu_gk20a *pmu)
76 {
77         return (void *)(&pmu->args_v0);
78 }
79
80 static u32 get_pmu_allocation_size_v1(struct pmu_gk20a *pmu)
81 {
82         return sizeof(struct pmu_allocation_v1);
83 }
84
85 static u32 get_pmu_allocation_size_v0(struct pmu_gk20a *pmu)
86 {
87         return sizeof(struct pmu_allocation_v0);
88 }
89
90 static void set_pmu_allocation_ptr_v1(struct pmu_gk20a *pmu,
91         void **pmu_alloc_ptr, void *assign_ptr)
92 {
93         struct pmu_allocation_v1 **pmu_a_ptr =
94                 (struct pmu_allocation_v1 **)pmu_alloc_ptr;
95         *pmu_a_ptr = (struct pmu_allocation_v1 *)assign_ptr;
96 }
97
98 static void set_pmu_allocation_ptr_v0(struct pmu_gk20a *pmu,
99         void **pmu_alloc_ptr, void *assign_ptr)
100 {
101         struct pmu_allocation_v0 **pmu_a_ptr =
102                 (struct pmu_allocation_v0 **)pmu_alloc_ptr;
103         *pmu_a_ptr = (struct pmu_allocation_v0 *)assign_ptr;
104 }
105
106 static void pmu_allocation_set_dmem_size_v1(struct pmu_gk20a *pmu,
107         void *pmu_alloc_ptr, u16 size)
108 {
109         struct pmu_allocation_v1 *pmu_a_ptr =
110                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
111         pmu_a_ptr->alloc.dmem.size = size;
112 }
113
114 static void pmu_allocation_set_dmem_size_v0(struct pmu_gk20a *pmu,
115         void *pmu_alloc_ptr, u16 size)
116 {
117         struct pmu_allocation_v0 *pmu_a_ptr =
118                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
119         pmu_a_ptr->alloc.dmem.size = size;
120 }
121
122 static u16 pmu_allocation_get_dmem_size_v1(struct pmu_gk20a *pmu,
123         void *pmu_alloc_ptr)
124 {
125         struct pmu_allocation_v1 *pmu_a_ptr =
126                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
127         return pmu_a_ptr->alloc.dmem.size;
128 }
129
130 static u16 pmu_allocation_get_dmem_size_v0(struct pmu_gk20a *pmu,
131         void *pmu_alloc_ptr)
132 {
133         struct pmu_allocation_v0 *pmu_a_ptr =
134                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
135         return pmu_a_ptr->alloc.dmem.size;
136 }
137
138 static u32 pmu_allocation_get_dmem_offset_v1(struct pmu_gk20a *pmu,
139         void *pmu_alloc_ptr)
140 {
141         struct pmu_allocation_v1 *pmu_a_ptr =
142                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
143         return pmu_a_ptr->alloc.dmem.offset;
144 }
145
146 static u32 pmu_allocation_get_dmem_offset_v0(struct pmu_gk20a *pmu,
147         void *pmu_alloc_ptr)
148 {
149         struct pmu_allocation_v0 *pmu_a_ptr =
150                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
151         return pmu_a_ptr->alloc.dmem.offset;
152 }
153
154 static u32 *pmu_allocation_get_dmem_offset_addr_v1(struct pmu_gk20a *pmu,
155         void *pmu_alloc_ptr)
156 {
157         struct pmu_allocation_v1 *pmu_a_ptr =
158                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
159         return &pmu_a_ptr->alloc.dmem.offset;
160 }
161
162 static u32 *pmu_allocation_get_dmem_offset_addr_v0(struct pmu_gk20a *pmu,
163         void *pmu_alloc_ptr)
164 {
165         struct pmu_allocation_v0 *pmu_a_ptr =
166                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
167         return &pmu_a_ptr->alloc.dmem.offset;
168 }
169
170 static void pmu_allocation_set_dmem_offset_v1(struct pmu_gk20a *pmu,
171         void *pmu_alloc_ptr, u32 offset)
172 {
173         struct pmu_allocation_v1 *pmu_a_ptr =
174                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
175         pmu_a_ptr->alloc.dmem.offset = offset;
176 }
177
178 static void pmu_allocation_set_dmem_offset_v0(struct pmu_gk20a *pmu,
179         void *pmu_alloc_ptr, u32 offset)
180 {
181         struct pmu_allocation_v0 *pmu_a_ptr =
182                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
183         pmu_a_ptr->alloc.dmem.offset = offset;
184 }
185
186 static void *get_pmu_msg_pmu_init_msg_ptr_v1(struct pmu_init_msg *init)
187 {
188         return (void *)(&(init->pmu_init_v1));
189 }
190
191 static u16 get_pmu_init_msg_pmu_sw_mg_off_v1(union pmu_init_msg_pmu *init_msg)
192 {
193         struct pmu_init_msg_pmu_v1 *init =
194                 (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
195         return init->sw_managed_area_offset;
196 }
197
198 static u16 get_pmu_init_msg_pmu_sw_mg_size_v1(union pmu_init_msg_pmu *init_msg)
199 {
200         struct pmu_init_msg_pmu_v1 *init =
201                 (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
202         return init->sw_managed_area_size;
203 }
204
205 static void *get_pmu_msg_pmu_init_msg_ptr_v0(struct pmu_init_msg *init)
206 {
207         return (void *)(&(init->pmu_init_v0));
208 }
209
210 static u16 get_pmu_init_msg_pmu_sw_mg_off_v0(union pmu_init_msg_pmu *init_msg)
211 {
212         struct pmu_init_msg_pmu_v0 *init =
213                 (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
214         return init->sw_managed_area_offset;
215 }
216
217 static u16 get_pmu_init_msg_pmu_sw_mg_size_v0(union pmu_init_msg_pmu *init_msg)
218 {
219         struct pmu_init_msg_pmu_v0 *init =
220                 (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
221         return init->sw_managed_area_size;
222 }
223
224 static u32 get_pmu_perfmon_cmd_start_size_v1(void)
225 {
226         return sizeof(struct pmu_perfmon_cmd_start_v1);
227 }
228
229 static u32 get_pmu_perfmon_cmd_start_size_v0(void)
230 {
231         return sizeof(struct pmu_perfmon_cmd_start_v0);
232 }
233
234 static int get_perfmon_cmd_start_offsetofvar_v1(
235         enum pmu_perfmon_cmd_start_fields field)
236 {
237         switch (field) {
238         case COUNTER_ALLOC:
239                 return offsetof(struct pmu_perfmon_cmd_start_v1,
240                 counter_alloc);
241         default:
242                 return -EINVAL;
243                 break;
244         }
245         return 0;
246 }
247
248 static int get_perfmon_cmd_start_offsetofvar_v0(
249         enum pmu_perfmon_cmd_start_fields field)
250 {
251         switch (field) {
252         case COUNTER_ALLOC:
253                 return offsetof(struct pmu_perfmon_cmd_start_v0,
254                 counter_alloc);
255         default:
256                 return -EINVAL;
257                 break;
258         }
259         return 0;
260 }
261
262 static u32 get_pmu_perfmon_cmd_init_size_v1(void)
263 {
264         return sizeof(struct pmu_perfmon_cmd_init_v1);
265 }
266
267 static u32 get_pmu_perfmon_cmd_init_size_v0(void)
268 {
269         return sizeof(struct pmu_perfmon_cmd_init_v0);
270 }
271
272 static int get_perfmon_cmd_init_offsetofvar_v1(
273         enum pmu_perfmon_cmd_start_fields field)
274 {
275         switch (field) {
276         case COUNTER_ALLOC:
277                 return offsetof(struct pmu_perfmon_cmd_init_v1,
278                 counter_alloc);
279         default:
280                 return -EINVAL;
281                 break;
282         }
283         return 0;
284 }
285
286 static int get_perfmon_cmd_init_offsetofvar_v0(
287         enum pmu_perfmon_cmd_start_fields field)
288 {
289         switch (field) {
290         case COUNTER_ALLOC:
291                 return offsetof(struct pmu_perfmon_cmd_init_v0,
292                 counter_alloc);
293         default:
294                 return -EINVAL;
295                 break;
296         }
297         return 0;
298 }
299
300 static void perfmon_start_set_cmd_type_v1(struct pmu_perfmon_cmd *pc, u8 value)
301 {
302         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
303         start->cmd_type = value;
304 }
305
306 static void perfmon_start_set_cmd_type_v0(struct pmu_perfmon_cmd *pc, u8 value)
307 {
308         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
309         start->cmd_type = value;
310 }
311
312 static void perfmon_start_set_group_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
313 {
314         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
315         start->group_id = value;
316 }
317
318 static void perfmon_start_set_group_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
319 {
320         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
321         start->group_id = value;
322 }
323
324 static void perfmon_start_set_state_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
325 {
326         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
327         start->state_id = value;
328 }
329
330 static void perfmon_start_set_state_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
331 {
332         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
333         start->state_id = value;
334 }
335
336 static void perfmon_start_set_flags_v1(struct pmu_perfmon_cmd *pc, u8 value)
337 {
338         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
339         start->flags = value;
340 }
341
342 static void perfmon_start_set_flags_v0(struct pmu_perfmon_cmd *pc, u8 value)
343 {
344         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
345         start->flags = value;
346 }
347
348 static u8 perfmon_start_get_flags_v1(struct pmu_perfmon_cmd *pc)
349 {
350         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
351         return start->flags;
352 }
353
354 static u8 perfmon_start_get_flags_v0(struct pmu_perfmon_cmd *pc)
355 {
356         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
357         return start->flags;
358 }
359
360 static void perfmon_cmd_init_set_sample_buffer_v1(struct pmu_perfmon_cmd *pc,
361         u16 value)
362 {
363         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
364         init->sample_buffer = value;
365 }
366
367 static void perfmon_cmd_init_set_sample_buffer_v0(struct pmu_perfmon_cmd *pc,
368         u16 value)
369 {
370         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
371         init->sample_buffer = value;
372 }
373
374 static void perfmon_cmd_init_set_dec_cnt_v1(struct pmu_perfmon_cmd *pc,
375         u8 value)
376 {
377         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
378         init->to_decrease_count = value;
379 }
380
381 static void perfmon_cmd_init_set_dec_cnt_v0(struct pmu_perfmon_cmd *pc,
382         u8 value)
383 {
384         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
385         init->to_decrease_count = value;
386 }
387
388 static void perfmon_cmd_init_set_base_cnt_id_v1(struct pmu_perfmon_cmd *pc,
389         u8 value)
390 {
391         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
392         init->base_counter_id = value;
393 }
394
395 static void perfmon_cmd_init_set_base_cnt_id_v0(struct pmu_perfmon_cmd *pc,
396         u8 value)
397 {
398         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
399         init->base_counter_id = value;
400 }
401
402 static void perfmon_cmd_init_set_samp_period_us_v1(struct pmu_perfmon_cmd *pc,
403         u32 value)
404 {
405         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
406         init->sample_period_us = value;
407 }
408
409 static void perfmon_cmd_init_set_samp_period_us_v0(struct pmu_perfmon_cmd *pc,
410         u32 value)
411 {
412         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
413         init->sample_period_us = value;
414 }
415
416 static void perfmon_cmd_init_set_num_cnt_v1(struct pmu_perfmon_cmd *pc,
417         u8 value)
418 {
419         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
420         init->num_counters = value;
421 }
422
423 static void perfmon_cmd_init_set_num_cnt_v0(struct pmu_perfmon_cmd *pc,
424         u8 value)
425 {
426         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
427         init->num_counters = value;
428 }
429
430 static void perfmon_cmd_init_set_mov_avg_v1(struct pmu_perfmon_cmd *pc,
431         u8 value)
432 {
433         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
434         init->samples_in_moving_avg = value;
435 }
436
437 static void perfmon_cmd_init_set_mov_avg_v0(struct pmu_perfmon_cmd *pc,
438         u8 value)
439 {
440         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
441         init->samples_in_moving_avg = value;
442 }
443
444 static void get_pmu_init_msg_pmu_queue_params_v0(struct pmu_queue *queue,
445         u32 id, void *pmu_init_msg)
446 {
447         struct pmu_init_msg_pmu_v0 *init =
448                 (struct pmu_init_msg_pmu_v0 *)pmu_init_msg;
449         queue->index    = init->queue_info[id].index;
450         queue->offset   = init->queue_info[id].offset;
451         queue->size = init->queue_info[id].size;
452 }
453
454 static void get_pmu_init_msg_pmu_queue_params_v1(struct pmu_queue *queue,
455         u32 id, void *pmu_init_msg)
456 {
457         struct pmu_init_msg_pmu_v1 *init =
458                 (struct pmu_init_msg_pmu_v1 *)pmu_init_msg;
459         queue->index    = init->queue_info[id].index;
460         queue->offset   = init->queue_info[id].offset;
461         queue->size = init->queue_info[id].size;
462 }
463
464 static void *get_pmu_sequence_in_alloc_ptr_v1(struct pmu_sequence *seq)
465 {
466         return (void *)(&seq->in_v1);
467 }
468
469 static void *get_pmu_sequence_in_alloc_ptr_v0(struct pmu_sequence *seq)
470 {
471         return (void *)(&seq->in_v0);
472 }
473
474 static void *get_pmu_sequence_out_alloc_ptr_v1(struct pmu_sequence *seq)
475 {
476         return (void *)(&seq->out_v1);
477 }
478
479 static void *get_pmu_sequence_out_alloc_ptr_v0(struct pmu_sequence *seq)
480 {
481         return (void *)(&seq->out_v0);
482 }
483
484 static int gk20a_init_pmu(struct pmu_gk20a *pmu)
485 {
486         struct gk20a *g = pmu->g;
487         switch (pmu->desc->app_version) {
488         case APP_VERSION_1:
489                 g->ops.pmu_ver.cmd_id_zbc_table_update = 16;
490                 g->ops.pmu_ver.get_pmu_cmdline_args_size =
491                         pmu_cmdline_size_v1;
492                 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
493                         set_pmu_cmdline_args_cpufreq_v1;
494                 g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
495                         get_pmu_cmdline_args_ptr_v1;
496                 g->ops.pmu_ver.get_pmu_allocation_struct_size =
497                         get_pmu_allocation_size_v1;
498                 g->ops.pmu_ver.set_pmu_allocation_ptr =
499                         set_pmu_allocation_ptr_v1;
500                 g->ops.pmu_ver.pmu_allocation_set_dmem_size =
501                         pmu_allocation_set_dmem_size_v1;
502                 g->ops.pmu_ver.pmu_allocation_get_dmem_size =
503                         pmu_allocation_get_dmem_size_v1;
504                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
505                         pmu_allocation_get_dmem_offset_v1;
506                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
507                         pmu_allocation_get_dmem_offset_addr_v1;
508                 g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
509                         pmu_allocation_set_dmem_offset_v1;
510                 g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
511                         get_pmu_init_msg_pmu_queue_params_v1;
512                 g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
513                         get_pmu_msg_pmu_init_msg_ptr_v1;
514                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
515                         get_pmu_init_msg_pmu_sw_mg_off_v1;
516                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
517                         get_pmu_init_msg_pmu_sw_mg_size_v1;
518                 g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
519                         get_pmu_perfmon_cmd_start_size_v1;
520                 g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
521                         get_perfmon_cmd_start_offsetofvar_v1;
522                 g->ops.pmu_ver.perfmon_start_set_cmd_type =
523                         perfmon_start_set_cmd_type_v1;
524                 g->ops.pmu_ver.perfmon_start_set_group_id =
525                         perfmon_start_set_group_id_v1;
526                 g->ops.pmu_ver.perfmon_start_set_state_id =
527                         perfmon_start_set_state_id_v1;
528                 g->ops.pmu_ver.perfmon_start_set_flags =
529                         perfmon_start_set_flags_v1;
530                 g->ops.pmu_ver.perfmon_start_get_flags =
531                         perfmon_start_get_flags_v1;
532                 g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
533                         get_pmu_perfmon_cmd_init_size_v1;
534                 g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
535                         get_perfmon_cmd_init_offsetofvar_v1;
536                 g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
537                         perfmon_cmd_init_set_sample_buffer_v1;
538                 g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
539                         perfmon_cmd_init_set_dec_cnt_v1;
540                 g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
541                         perfmon_cmd_init_set_base_cnt_id_v1;
542                 g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
543                         perfmon_cmd_init_set_samp_period_us_v1;
544                 g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
545                         perfmon_cmd_init_set_num_cnt_v1;
546                 g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
547                         perfmon_cmd_init_set_mov_avg_v1;
548                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
549                         get_pmu_sequence_in_alloc_ptr_v1;
550                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
551                         get_pmu_sequence_out_alloc_ptr_v1;
552                 break;
553         case APP_VERSION_0:
554                 g->ops.pmu_ver.cmd_id_zbc_table_update = 14;
555                 g->ops.pmu_ver.get_pmu_cmdline_args_size =
556                         pmu_cmdline_size_v0;
557                 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
558                         set_pmu_cmdline_args_cpufreq_v0;
559                 g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
560                         get_pmu_cmdline_args_ptr_v0;
561                 g->ops.pmu_ver.get_pmu_allocation_struct_size =
562                         get_pmu_allocation_size_v0;
563                 g->ops.pmu_ver.set_pmu_allocation_ptr =
564                         set_pmu_allocation_ptr_v0;
565                 g->ops.pmu_ver.pmu_allocation_set_dmem_size =
566                         pmu_allocation_set_dmem_size_v0;
567                 g->ops.pmu_ver.pmu_allocation_get_dmem_size =
568                         pmu_allocation_get_dmem_size_v0;
569                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
570                         pmu_allocation_get_dmem_offset_v0;
571                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
572                         pmu_allocation_get_dmem_offset_addr_v0;
573                 g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
574                         pmu_allocation_set_dmem_offset_v0;
575                 g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
576                         get_pmu_init_msg_pmu_queue_params_v0;
577                 g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
578                         get_pmu_msg_pmu_init_msg_ptr_v0;
579                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
580                         get_pmu_init_msg_pmu_sw_mg_off_v0;
581                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
582                         get_pmu_init_msg_pmu_sw_mg_size_v0;
583                 g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
584                         get_pmu_perfmon_cmd_start_size_v0;
585                 g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
586                         get_perfmon_cmd_start_offsetofvar_v0;
587                 g->ops.pmu_ver.perfmon_start_set_cmd_type =
588                         perfmon_start_set_cmd_type_v0;
589                 g->ops.pmu_ver.perfmon_start_set_group_id =
590                         perfmon_start_set_group_id_v0;
591                 g->ops.pmu_ver.perfmon_start_set_state_id =
592                         perfmon_start_set_state_id_v0;
593                 g->ops.pmu_ver.perfmon_start_set_flags =
594                         perfmon_start_set_flags_v0;
595                 g->ops.pmu_ver.perfmon_start_get_flags =
596                         perfmon_start_get_flags_v0;
597                 g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
598                         get_pmu_perfmon_cmd_init_size_v0;
599                 g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
600                         get_perfmon_cmd_init_offsetofvar_v0;
601                 g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
602                         perfmon_cmd_init_set_sample_buffer_v0;
603                 g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
604                         perfmon_cmd_init_set_dec_cnt_v0;
605                 g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
606                         perfmon_cmd_init_set_base_cnt_id_v0;
607                 g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
608                         perfmon_cmd_init_set_samp_period_us_v0;
609                 g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
610                         perfmon_cmd_init_set_num_cnt_v0;
611                 g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
612                         perfmon_cmd_init_set_mov_avg_v0;
613                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
614                         get_pmu_sequence_in_alloc_ptr_v0;
615                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
616                         get_pmu_sequence_out_alloc_ptr_v0;
617                 break;
618         default:
619                 gk20a_err(dev_from_gk20a(pmu->g),
620                 "PMU code version not supported\n");
621                 return -EINVAL;
622                 break;
623         }
624         return 0;
625 }
626
627 static void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
628                 u32 src, u8 *dst, u32 size, u8 port)
629 {
630         struct gk20a *g = pmu->g;
631         u32 i, words, bytes;
632         u32 data, addr_mask;
633         u32 *dst_u32 = (u32*)dst;
634
635         if (size == 0) {
636                 gk20a_err(dev_from_gk20a(g),
637                         "size is zero");
638                 return;
639         }
640
641         if (src & 0x3) {
642                 gk20a_err(dev_from_gk20a(g),
643                         "src (0x%08x) not 4-byte aligned", src);
644                 return;
645         }
646
647         mutex_lock(&pmu->pmu_copy_lock);
648
649         words = size >> 2;
650         bytes = size & 0x3;
651
652         addr_mask = pwr_falcon_dmemc_offs_m() |
653                     pwr_falcon_dmemc_blk_m();
654
655         src &= addr_mask;
656
657         gk20a_writel(g, pwr_falcon_dmemc_r(port),
658                 src | pwr_falcon_dmemc_aincr_f(1));
659
660         for (i = 0; i < words; i++)
661                 dst_u32[i] = gk20a_readl(g, pwr_falcon_dmemd_r(port));
662
663         if (bytes > 0) {
664                 data = gk20a_readl(g, pwr_falcon_dmemd_r(port));
665                 for (i = 0; i < bytes; i++) {
666                         dst[(words << 2) + i] = ((u8 *)&data)[i];
667                         gk20a_dbg_pmu("read: dst_u8[%d]=0x%08x",
668                                         i, dst[(words << 2) + i]);
669                 }
670         }
671         mutex_unlock(&pmu->pmu_copy_lock);
672         return;
673 }
674
675 static void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
676                 u32 dst, u8 *src, u32 size, u8 port)
677 {
678         struct gk20a *g = pmu->g;
679         u32 i, words, bytes;
680         u32 data, addr_mask;
681         u32 *src_u32 = (u32*)src;
682
683         if (size == 0) {
684                 gk20a_err(dev_from_gk20a(g),
685                         "size is zero");
686                 return;
687         }
688
689         if (dst & 0x3) {
690                 gk20a_err(dev_from_gk20a(g),
691                         "dst (0x%08x) not 4-byte aligned", dst);
692                 return;
693         }
694
695         mutex_lock(&pmu->pmu_copy_lock);
696
697         words = size >> 2;
698         bytes = size & 0x3;
699
700         addr_mask = pwr_falcon_dmemc_offs_m() |
701                     pwr_falcon_dmemc_blk_m();
702
703         dst &= addr_mask;
704
705         gk20a_writel(g, pwr_falcon_dmemc_r(port),
706                 dst | pwr_falcon_dmemc_aincw_f(1));
707
708         for (i = 0; i < words; i++)
709                 gk20a_writel(g, pwr_falcon_dmemd_r(port), src_u32[i]);
710
711         if (bytes > 0) {
712                 data = 0;
713                 for (i = 0; i < bytes; i++)
714                         ((u8 *)&data)[i] = src[(words << 2) + i];
715                 gk20a_writel(g, pwr_falcon_dmemd_r(port), data);
716         }
717
718         data = gk20a_readl(g, pwr_falcon_dmemc_r(port)) & addr_mask;
719         size = ALIGN(size, 4);
720         if (data != dst + size) {
721                 gk20a_err(dev_from_gk20a(g),
722                         "copy failed. bytes written %d, expected %d",
723                         data - dst, size);
724         }
725         mutex_unlock(&pmu->pmu_copy_lock);
726         return;
727 }
728
729 static int pmu_idle(struct pmu_gk20a *pmu)
730 {
731         struct gk20a *g = pmu->g;
732         unsigned long end_jiffies = jiffies +
733                 msecs_to_jiffies(2000);
734         u32 idle_stat;
735
736         /* wait for pmu idle */
737         do {
738                 idle_stat = gk20a_readl(g, pwr_falcon_idlestate_r());
739
740                 if (pwr_falcon_idlestate_falcon_busy_v(idle_stat) == 0 &&
741                     pwr_falcon_idlestate_ext_busy_v(idle_stat) == 0) {
742                         break;
743                 }
744
745                 if (time_after_eq(jiffies, end_jiffies)) {
746                         gk20a_err(dev_from_gk20a(g),
747                                 "timeout waiting pmu idle : 0x%08x",
748                                 idle_stat);
749                         return -EBUSY;
750                 }
751                 usleep_range(100, 200);
752         } while (1);
753
754         gk20a_dbg_fn("done");
755         return 0;
756 }
757
758 static void pmu_enable_irq(struct pmu_gk20a *pmu, bool enable)
759 {
760         struct gk20a *g = pmu->g;
761
762         gk20a_dbg_fn("");
763
764         gk20a_writel(g, mc_intr_mask_0_r(),
765                 gk20a_readl(g, mc_intr_mask_0_r()) &
766                 ~mc_intr_mask_0_pmu_enabled_f());
767         gk20a_writel(g, mc_intr_mask_1_r(),
768                 gk20a_readl(g, mc_intr_mask_1_r()) &
769                 ~mc_intr_mask_1_pmu_enabled_f());
770
771         gk20a_writel(g, pwr_falcon_irqmclr_r(),
772                 pwr_falcon_irqmclr_gptmr_f(1)  |
773                 pwr_falcon_irqmclr_wdtmr_f(1)  |
774                 pwr_falcon_irqmclr_mthd_f(1)   |
775                 pwr_falcon_irqmclr_ctxsw_f(1)  |
776                 pwr_falcon_irqmclr_halt_f(1)   |
777                 pwr_falcon_irqmclr_exterr_f(1) |
778                 pwr_falcon_irqmclr_swgen0_f(1) |
779                 pwr_falcon_irqmclr_swgen1_f(1) |
780                 pwr_falcon_irqmclr_ext_f(0xff));
781
782         if (enable) {
783                 /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 */
784                 gk20a_writel(g, pwr_falcon_irqdest_r(),
785                         pwr_falcon_irqdest_host_gptmr_f(0)    |
786                         pwr_falcon_irqdest_host_wdtmr_f(1)    |
787                         pwr_falcon_irqdest_host_mthd_f(0)     |
788                         pwr_falcon_irqdest_host_ctxsw_f(0)    |
789                         pwr_falcon_irqdest_host_halt_f(1)     |
790                         pwr_falcon_irqdest_host_exterr_f(0)   |
791                         pwr_falcon_irqdest_host_swgen0_f(1)   |
792                         pwr_falcon_irqdest_host_swgen1_f(0)   |
793                         pwr_falcon_irqdest_host_ext_f(0xff)   |
794                         pwr_falcon_irqdest_target_gptmr_f(1)  |
795                         pwr_falcon_irqdest_target_wdtmr_f(0)  |
796                         pwr_falcon_irqdest_target_mthd_f(0)   |
797                         pwr_falcon_irqdest_target_ctxsw_f(0)  |
798                         pwr_falcon_irqdest_target_halt_f(0)   |
799                         pwr_falcon_irqdest_target_exterr_f(0) |
800                         pwr_falcon_irqdest_target_swgen0_f(0) |
801                         pwr_falcon_irqdest_target_swgen1_f(0) |
802                         pwr_falcon_irqdest_target_ext_f(0xff));
803
804                 /* 0=disable, 1=enable */
805                 gk20a_writel(g, pwr_falcon_irqmset_r(),
806                         pwr_falcon_irqmset_gptmr_f(1)  |
807                         pwr_falcon_irqmset_wdtmr_f(1)  |
808                         pwr_falcon_irqmset_mthd_f(0)   |
809                         pwr_falcon_irqmset_ctxsw_f(0)  |
810                         pwr_falcon_irqmset_halt_f(1)   |
811                         pwr_falcon_irqmset_exterr_f(1) |
812                         pwr_falcon_irqmset_swgen0_f(1) |
813                         pwr_falcon_irqmset_swgen1_f(1));
814
815                 gk20a_writel(g, mc_intr_mask_0_r(),
816                         gk20a_readl(g, mc_intr_mask_0_r()) |
817                         mc_intr_mask_0_pmu_enabled_f());
818         }
819
820         gk20a_dbg_fn("done");
821 }
822
823 static int pmu_enable_hw(struct pmu_gk20a *pmu, bool enable)
824 {
825         struct gk20a *g = pmu->g;
826
827         gk20a_dbg_fn("");
828
829         if (enable) {
830                 int retries = GR_IDLE_CHECK_MAX / GR_IDLE_CHECK_DEFAULT;
831                 gk20a_enable(g, mc_enable_pwr_enabled_f());
832
833                 do {
834                         u32 w = gk20a_readl(g, pwr_falcon_dmactl_r()) &
835                                 (pwr_falcon_dmactl_dmem_scrubbing_m() |
836                                  pwr_falcon_dmactl_imem_scrubbing_m());
837
838                         if (!w) {
839                                 gk20a_dbg_fn("done");
840                                 return 0;
841                         }
842                         udelay(GR_IDLE_CHECK_DEFAULT);
843                 } while (--retries || !tegra_platform_is_silicon());
844
845                 gk20a_disable(g, mc_enable_pwr_enabled_f());
846                 gk20a_err(dev_from_gk20a(g), "Falcon mem scrubbing timeout");
847
848                 return -ETIMEDOUT;
849         } else {
850                 gk20a_disable(g, mc_enable_pwr_enabled_f());
851                 return 0;
852         }
853 }
854
855 static int pmu_enable(struct pmu_gk20a *pmu, bool enable)
856 {
857         struct gk20a *g = pmu->g;
858         u32 pmc_enable;
859         int err;
860
861         gk20a_dbg_fn("");
862
863         if (!enable) {
864                 pmc_enable = gk20a_readl(g, mc_enable_r());
865                 if (mc_enable_pwr_v(pmc_enable) !=
866                     mc_enable_pwr_disabled_v()) {
867
868                         pmu_enable_irq(pmu, false);
869                         pmu_enable_hw(pmu, false);
870                 }
871         } else {
872                 err = pmu_enable_hw(pmu, true);
873                 if (err)
874                         return err;
875
876                 /* TBD: post reset */
877
878                 err = pmu_idle(pmu);
879                 if (err)
880                         return err;
881
882                 pmu_enable_irq(pmu, true);
883         }
884
885         gk20a_dbg_fn("done");
886         return 0;
887 }
888
889 static int pmu_reset(struct pmu_gk20a *pmu)
890 {
891         int err;
892
893         err = pmu_idle(pmu);
894         if (err)
895                 return err;
896
897         /* TBD: release pmu hw mutex */
898
899         err = pmu_enable(pmu, false);
900         if (err)
901                 return err;
902
903         /* TBD: cancel all sequences */
904         /* TBD: init all sequences and state tables */
905         /* TBD: restore pre-init message handler */
906
907         err = pmu_enable(pmu, true);
908         if (err)
909                 return err;
910
911         return 0;
912 }
913
914 static int pmu_bootstrap(struct pmu_gk20a *pmu)
915 {
916         struct gk20a *g = pmu->g;
917         struct gk20a_platform *platform = platform_get_drvdata(g->dev);
918         struct mm_gk20a *mm = &g->mm;
919         struct pmu_ucode_desc *desc = pmu->desc;
920         u64 addr_code, addr_data, addr_load;
921         u32 i, blocks, addr_args;
922
923         gk20a_dbg_fn("");
924
925         gk20a_writel(g, pwr_falcon_itfen_r(),
926                 gk20a_readl(g, pwr_falcon_itfen_r()) |
927                 pwr_falcon_itfen_ctxen_enable_f());
928         gk20a_writel(g, pwr_pmu_new_instblk_r(),
929                 pwr_pmu_new_instblk_ptr_f(
930                         mm->pmu.inst_block.cpu_pa >> 12) |
931                 pwr_pmu_new_instblk_valid_f(1) |
932                 pwr_pmu_new_instblk_target_sys_coh_f());
933
934         /* TBD: load all other surfaces */
935
936         g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
937                 clk_get_rate(platform->clk[1]));
938
939         addr_args = (pwr_falcon_hwcfg_dmem_size_v(
940                 gk20a_readl(g, pwr_falcon_hwcfg_r()))
941                         << GK20A_PMU_DMEM_BLKSIZE2) -
942                 g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
943
944         pmu_copy_to_dmem(pmu, addr_args,
945                         (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
946                         g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
947
948         gk20a_writel(g, pwr_falcon_dmemc_r(0),
949                 pwr_falcon_dmemc_offs_f(0) |
950                 pwr_falcon_dmemc_blk_f(0)  |
951                 pwr_falcon_dmemc_aincw_f(1));
952
953         addr_code = u64_lo32((pmu->ucode.pmu_va +
954                         desc->app_start_offset +
955                         desc->app_resident_code_offset) >> 8) ;
956         addr_data = u64_lo32((pmu->ucode.pmu_va +
957                         desc->app_start_offset +
958                         desc->app_resident_data_offset) >> 8);
959         addr_load = u64_lo32((pmu->ucode.pmu_va +
960                         desc->bootloader_start_offset) >> 8);
961
962         gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE);
963         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
964         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_size);
965         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_code_size);
966         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_imem_entry);
967         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_data);
968         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size);
969         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
970         gk20a_writel(g, pwr_falcon_dmemd_r(0), 0x1);
971         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_args);
972
973         gk20a_writel(g, pwr_falcon_dmatrfbase_r(),
974                 addr_load - (desc->bootloader_imem_offset >> 8));
975
976         blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8;
977
978         for (i = 0; i < blocks; i++) {
979                 gk20a_writel(g, pwr_falcon_dmatrfmoffs_r(),
980                         desc->bootloader_imem_offset + (i << 8));
981                 gk20a_writel(g, pwr_falcon_dmatrffboffs_r(),
982                         desc->bootloader_imem_offset + (i << 8));
983                 gk20a_writel(g, pwr_falcon_dmatrfcmd_r(),
984                         pwr_falcon_dmatrfcmd_imem_f(1)  |
985                         pwr_falcon_dmatrfcmd_write_f(0) |
986                         pwr_falcon_dmatrfcmd_size_f(6)  |
987                         pwr_falcon_dmatrfcmd_ctxdma_f(GK20A_PMU_DMAIDX_UCODE));
988         }
989
990         gk20a_writel(g, pwr_falcon_bootvec_r(),
991                 pwr_falcon_bootvec_vec_f(desc->bootloader_entry_point));
992
993         gk20a_writel(g, pwr_falcon_cpuctl_r(),
994                 pwr_falcon_cpuctl_startcpu_f(1));
995
996         gk20a_writel(g, pwr_falcon_os_r(), desc->app_version);
997
998         return 0;
999 }
1000
1001 static void pmu_seq_init(struct pmu_gk20a *pmu)
1002 {
1003         u32 i;
1004
1005         memset(pmu->seq, 0,
1006                 sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES);
1007         memset(pmu->pmu_seq_tbl, 0,
1008                 sizeof(pmu->pmu_seq_tbl));
1009
1010         for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++)
1011                 pmu->seq[i].id = i;
1012 }
1013
1014 static int pmu_seq_acquire(struct pmu_gk20a *pmu,
1015                         struct pmu_sequence **pseq)
1016 {
1017         struct gk20a *g = pmu->g;
1018         struct pmu_sequence *seq;
1019         u32 index;
1020
1021         mutex_lock(&pmu->pmu_seq_lock);
1022         index = find_first_zero_bit(pmu->pmu_seq_tbl,
1023                                 sizeof(pmu->pmu_seq_tbl));
1024         if (index >= sizeof(pmu->pmu_seq_tbl)) {
1025                 gk20a_err(dev_from_gk20a(g),
1026                         "no free sequence available");
1027                 mutex_unlock(&pmu->pmu_seq_lock);
1028                 return -EAGAIN;
1029         }
1030         set_bit(index, pmu->pmu_seq_tbl);
1031         mutex_unlock(&pmu->pmu_seq_lock);
1032
1033         seq = &pmu->seq[index];
1034         seq->state = PMU_SEQ_STATE_PENDING;
1035
1036         *pseq = seq;
1037         return 0;
1038 }
1039
1040 static void pmu_seq_release(struct pmu_gk20a *pmu,
1041                         struct pmu_sequence *seq)
1042 {
1043         struct gk20a *g = pmu->g;
1044         seq->state      = PMU_SEQ_STATE_FREE;
1045         seq->desc       = PMU_INVALID_SEQ_DESC;
1046         seq->callback   = NULL;
1047         seq->cb_params  = NULL;
1048         seq->msg        = NULL;
1049         seq->out_payload = NULL;
1050         g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
1051                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr(seq), 0);
1052         g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
1053                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr(seq), 0);
1054
1055         clear_bit(seq->id, pmu->pmu_seq_tbl);
1056 }
1057
1058 static int pmu_queue_init(struct pmu_gk20a *pmu,
1059                 u32 id, union pmu_init_msg_pmu *init)
1060 {
1061         struct gk20a *g = pmu->g;
1062         struct pmu_queue *queue = &pmu->queue[id];
1063         queue->id       = id;
1064         g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params(queue, id, init);
1065
1066         queue->mutex_id = id;
1067         mutex_init(&queue->mutex);
1068
1069         gk20a_dbg_pmu("queue %d: index %d, offset 0x%08x, size 0x%08x",
1070                 id, queue->index, queue->offset, queue->size);
1071
1072         return 0;
1073 }
1074
1075 static int pmu_queue_head(struct pmu_gk20a *pmu, struct pmu_queue *queue,
1076                         u32 *head, bool set)
1077 {
1078         struct gk20a *g = pmu->g;
1079
1080         BUG_ON(!head);
1081
1082         if (PMU_IS_COMMAND_QUEUE(queue->id)) {
1083
1084                 if (queue->index >= pwr_pmu_queue_head__size_1_v())
1085                         return -EINVAL;
1086
1087                 if (!set)
1088                         *head = pwr_pmu_queue_head_address_v(
1089                                 gk20a_readl(g,
1090                                         pwr_pmu_queue_head_r(queue->index)));
1091                 else
1092                         gk20a_writel(g,
1093                                 pwr_pmu_queue_head_r(queue->index),
1094                                 pwr_pmu_queue_head_address_f(*head));
1095         } else {
1096                 if (!set)
1097                         *head = pwr_pmu_msgq_head_val_v(
1098                                 gk20a_readl(g, pwr_pmu_msgq_head_r()));
1099                 else
1100                         gk20a_writel(g,
1101                                 pwr_pmu_msgq_head_r(),
1102                                 pwr_pmu_msgq_head_val_f(*head));
1103         }
1104
1105         return 0;
1106 }
1107
1108 static int pmu_queue_tail(struct pmu_gk20a *pmu, struct pmu_queue *queue,
1109                         u32 *tail, bool set)
1110 {
1111         struct gk20a *g = pmu->g;
1112
1113         BUG_ON(!tail);
1114
1115         if (PMU_IS_COMMAND_QUEUE(queue->id)) {
1116
1117                 if (queue->index >= pwr_pmu_queue_tail__size_1_v())
1118                         return -EINVAL;
1119
1120                 if (!set)
1121                         *tail = pwr_pmu_queue_tail_address_v(
1122                                 gk20a_readl(g,
1123                                         pwr_pmu_queue_tail_r(queue->index)));
1124                 else
1125                         gk20a_writel(g,
1126                                 pwr_pmu_queue_tail_r(queue->index),
1127                                 pwr_pmu_queue_tail_address_f(*tail));
1128         } else {
1129                 if (!set)
1130                         *tail = pwr_pmu_msgq_tail_val_v(
1131                                 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
1132                 else
1133                         gk20a_writel(g,
1134                                 pwr_pmu_msgq_tail_r(),
1135                                 pwr_pmu_msgq_tail_val_f(*tail));
1136         }
1137
1138         return 0;
1139 }
1140
1141 static inline void pmu_queue_read(struct pmu_gk20a *pmu,
1142                         u32 offset, u8 *dst, u32 size)
1143 {
1144         pmu_copy_from_dmem(pmu, offset, dst, size, 0);
1145 }
1146
1147 static inline void pmu_queue_write(struct pmu_gk20a *pmu,
1148                         u32 offset, u8 *src, u32 size)
1149 {
1150         pmu_copy_to_dmem(pmu, offset, src, size, 0);
1151 }
1152
1153 int pmu_mutex_acquire(struct pmu_gk20a *pmu, u32 id, u32 *token)
1154 {
1155         struct gk20a *g = pmu->g;
1156         struct pmu_mutex *mutex;
1157         u32 data, owner, max_retry;
1158
1159         if (!pmu->initialized)
1160                 return 0;
1161
1162         BUG_ON(!token);
1163         BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
1164         BUG_ON(id > pmu->mutex_cnt);
1165
1166         mutex = &pmu->mutex[id];
1167
1168         owner = pwr_pmu_mutex_value_v(
1169                 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1170
1171         if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) {
1172                 BUG_ON(mutex->ref_cnt == 0);
1173                 gk20a_dbg_pmu("already acquired by owner : 0x%08x", *token);
1174                 mutex->ref_cnt++;
1175                 return 0;
1176         }
1177
1178         max_retry = 40;
1179         do {
1180                 data = pwr_pmu_mutex_id_value_v(
1181                         gk20a_readl(g, pwr_pmu_mutex_id_r()));
1182                 if (data == pwr_pmu_mutex_id_value_init_v() ||
1183                     data == pwr_pmu_mutex_id_value_not_avail_v()) {
1184                         gk20a_warn(dev_from_gk20a(g),
1185                                 "fail to generate mutex token: val 0x%08x",
1186                                 owner);
1187                         usleep_range(20, 40);
1188                         continue;
1189                 }
1190
1191                 owner = data;
1192                 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
1193                         pwr_pmu_mutex_value_f(owner));
1194
1195                 data = pwr_pmu_mutex_value_v(
1196                         gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1197
1198                 if (owner == data) {
1199                         mutex->ref_cnt = 1;
1200                         gk20a_dbg_pmu("mutex acquired: id=%d, token=0x%x",
1201                                 mutex->index, *token);
1202                         *token = owner;
1203                         return 0;
1204                 } else {
1205                         gk20a_dbg_info("fail to acquire mutex idx=0x%08x",
1206                                 mutex->index);
1207
1208                         data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
1209                         data = set_field(data,
1210                                 pwr_pmu_mutex_id_release_value_m(),
1211                                 pwr_pmu_mutex_id_release_value_f(owner));
1212                         gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
1213
1214                         usleep_range(20, 40);
1215                         continue;
1216                 }
1217         } while (max_retry-- > 0);
1218
1219         return -EBUSY;
1220 }
1221
1222 int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token)
1223 {
1224         struct gk20a *g = pmu->g;
1225         struct pmu_mutex *mutex;
1226         u32 owner, data;
1227
1228         if (!pmu->initialized)
1229                 return 0;
1230
1231         BUG_ON(!token);
1232         BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
1233         BUG_ON(id > pmu->mutex_cnt);
1234
1235         mutex = &pmu->mutex[id];
1236
1237         owner = pwr_pmu_mutex_value_v(
1238                 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1239
1240         if (*token != owner) {
1241                 gk20a_err(dev_from_gk20a(g),
1242                         "requester 0x%08x NOT match owner 0x%08x",
1243                         *token, owner);
1244                 return -EINVAL;
1245         }
1246
1247         if (--mutex->ref_cnt == 0) {
1248                 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
1249                         pwr_pmu_mutex_value_initial_lock_f());
1250
1251                 data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
1252                 data = set_field(data, pwr_pmu_mutex_id_release_value_m(),
1253                         pwr_pmu_mutex_id_release_value_f(owner));
1254                 gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
1255
1256                 gk20a_dbg_pmu("mutex released: id=%d, token=0x%x",
1257                         mutex->index, *token);
1258         }
1259
1260         return 0;
1261 }
1262
1263 static int pmu_queue_lock(struct pmu_gk20a *pmu,
1264                         struct pmu_queue *queue)
1265 {
1266         int err;
1267
1268         if (PMU_IS_MESSAGE_QUEUE(queue->id))
1269                 return 0;
1270
1271         if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
1272                 mutex_lock(&queue->mutex);
1273                 return 0;
1274         }
1275
1276         err = pmu_mutex_acquire(pmu, queue->mutex_id, &queue->mutex_lock);
1277         return err;
1278 }
1279
1280 static int pmu_queue_unlock(struct pmu_gk20a *pmu,
1281                         struct pmu_queue *queue)
1282 {
1283         int err;
1284
1285         if (PMU_IS_MESSAGE_QUEUE(queue->id))
1286                 return 0;
1287
1288         if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
1289                 mutex_unlock(&queue->mutex);
1290                 return 0;
1291         }
1292
1293         err = pmu_mutex_release(pmu, queue->mutex_id, &queue->mutex_lock);
1294         return err;
1295 }
1296
1297 /* called by pmu_read_message, no lock */
1298 static bool pmu_queue_is_empty(struct pmu_gk20a *pmu,
1299                         struct pmu_queue *queue)
1300 {
1301         u32 head, tail;
1302
1303         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1304         if (queue->opened && queue->oflag == OFLAG_READ)
1305                 tail = queue->position;
1306         else
1307                 pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
1308
1309         return head == tail;
1310 }
1311
1312 static bool pmu_queue_has_room(struct pmu_gk20a *pmu,
1313                         struct pmu_queue *queue, u32 size, bool *need_rewind)
1314 {
1315         u32 head, tail, free;
1316         bool rewind = false;
1317
1318         size = ALIGN(size, QUEUE_ALIGNMENT);
1319
1320         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1321         pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
1322
1323         if (head >= tail) {
1324                 free = queue->offset + queue->size - head;
1325                 free -= PMU_CMD_HDR_SIZE;
1326
1327                 if (size > free) {
1328                         rewind = true;
1329                         head = queue->offset;
1330                 }
1331         }
1332
1333         if (head < tail)
1334                 free = tail - head - 1;
1335
1336         if (need_rewind)
1337                 *need_rewind = rewind;
1338
1339         return size <= free;
1340 }
1341
1342 static int pmu_queue_push(struct pmu_gk20a *pmu,
1343                         struct pmu_queue *queue, void *data, u32 size)
1344 {
1345         gk20a_dbg_fn("");
1346
1347         if (!queue->opened && queue->oflag == OFLAG_WRITE){
1348                 gk20a_err(dev_from_gk20a(pmu->g),
1349                         "queue not opened for write");
1350                 return -EINVAL;
1351         }
1352
1353         pmu_queue_write(pmu, queue->position, data, size);
1354         queue->position += ALIGN(size, QUEUE_ALIGNMENT);
1355         return 0;
1356 }
1357
1358 static int pmu_queue_pop(struct pmu_gk20a *pmu,
1359                         struct pmu_queue *queue, void *data, u32 size,
1360                         u32 *bytes_read)
1361 {
1362         u32 head, tail, used;
1363
1364         *bytes_read = 0;
1365
1366         if (!queue->opened && queue->oflag == OFLAG_READ){
1367                 gk20a_err(dev_from_gk20a(pmu->g),
1368                         "queue not opened for read");
1369                 return -EINVAL;
1370         }
1371
1372         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1373         tail = queue->position;
1374
1375         if (head == tail)
1376                 return 0;
1377
1378         if (head > tail)
1379                 used = head - tail;
1380         else
1381                 used = queue->offset + queue->size - tail;
1382
1383         if (size > used) {
1384                 gk20a_warn(dev_from_gk20a(pmu->g),
1385                         "queue size smaller than request read");
1386                 size = used;
1387         }
1388
1389         pmu_queue_read(pmu, tail, data, size);
1390         queue->position += ALIGN(size, QUEUE_ALIGNMENT);
1391         *bytes_read = size;
1392         return 0;
1393 }
1394
1395 static void pmu_queue_rewind(struct pmu_gk20a *pmu,
1396                         struct pmu_queue *queue)
1397 {
1398         struct pmu_cmd cmd;
1399
1400         gk20a_dbg_fn("");
1401
1402         if (!queue->opened) {
1403                 gk20a_err(dev_from_gk20a(pmu->g),
1404                         "queue not opened");
1405                 return;
1406         }
1407
1408         if (queue->oflag == OFLAG_WRITE) {
1409                 cmd.hdr.unit_id = PMU_UNIT_REWIND;
1410                 cmd.hdr.size = PMU_CMD_HDR_SIZE;
1411                 pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size);
1412                 gk20a_dbg_pmu("queue %d rewinded", queue->id);
1413         }
1414
1415         queue->position = queue->offset;
1416         return;
1417 }
1418
1419 /* open for read and lock the queue */
1420 static int pmu_queue_open_read(struct pmu_gk20a *pmu,
1421                         struct pmu_queue *queue)
1422 {
1423         int err;
1424
1425         err = pmu_queue_lock(pmu, queue);
1426         if (err)
1427                 return err;
1428
1429         if (queue->opened)
1430                 BUG();
1431
1432         pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET);
1433         queue->oflag = OFLAG_READ;
1434         queue->opened = true;
1435
1436         return 0;
1437 }
1438
1439 /* open for write and lock the queue
1440    make sure there's enough free space for the write */
1441 static int pmu_queue_open_write(struct pmu_gk20a *pmu,
1442                         struct pmu_queue *queue, u32 size)
1443 {
1444         bool rewind = false;
1445         int err;
1446
1447         err = pmu_queue_lock(pmu, queue);
1448         if (err)
1449                 return err;
1450
1451         if (queue->opened)
1452                 BUG();
1453
1454         if (!pmu_queue_has_room(pmu, queue, size, &rewind)) {
1455                 gk20a_err(dev_from_gk20a(pmu->g), "queue full");
1456                 return -EAGAIN;
1457         }
1458
1459         pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET);
1460         queue->oflag = OFLAG_WRITE;
1461         queue->opened = true;
1462
1463         if (rewind)
1464                 pmu_queue_rewind(pmu, queue);
1465
1466         return 0;
1467 }
1468
1469 /* close and unlock the queue */
1470 static int pmu_queue_close(struct pmu_gk20a *pmu,
1471                         struct pmu_queue *queue, bool commit)
1472 {
1473         if (!queue->opened)
1474                 return 0;
1475
1476         if (commit) {
1477                 if (queue->oflag == OFLAG_READ) {
1478                         pmu_queue_tail(pmu, queue,
1479                                 &queue->position, QUEUE_SET);
1480                 }
1481                 else {
1482                         pmu_queue_head(pmu, queue,
1483                                 &queue->position, QUEUE_SET);
1484                 }
1485         }
1486
1487         queue->opened = false;
1488
1489         pmu_queue_unlock(pmu, queue);
1490
1491         return 0;
1492 }
1493
1494 static void gk20a_save_pmu_sw_state(struct pmu_gk20a *pmu,
1495                         struct gk20a_pmu_save_state *save)
1496 {
1497         save->seq = pmu->seq;
1498         save->next_seq_desc = pmu->next_seq_desc;
1499         save->mutex = pmu->mutex;
1500         save->mutex_cnt = pmu->mutex_cnt;
1501         save->desc = pmu->desc;
1502         save->ucode = pmu->ucode;
1503         save->elpg_enable = pmu->elpg_enable;
1504         save->pg_wq = pmu->pg_wq;
1505         save->seq_buf = pmu->seq_buf;
1506         save->pg_buf = pmu->pg_buf;
1507         save->sw_ready = pmu->sw_ready;
1508         save->pg_init = pmu->pg_init;
1509 }
1510
1511 static void gk20a_restore_pmu_sw_state(struct pmu_gk20a *pmu,
1512                         struct gk20a_pmu_save_state *save)
1513 {
1514         pmu->seq = save->seq;
1515         pmu->next_seq_desc = save->next_seq_desc;
1516         pmu->mutex = save->mutex;
1517         pmu->mutex_cnt = save->mutex_cnt;
1518         pmu->desc = save->desc;
1519         pmu->ucode = save->ucode;
1520         pmu->elpg_enable = save->elpg_enable;
1521         pmu->pg_wq = save->pg_wq;
1522         pmu->seq_buf = save->seq_buf;
1523         pmu->pg_buf = save->pg_buf;
1524         pmu->sw_ready = save->sw_ready;
1525         pmu->pg_init = save->pg_init;
1526 }
1527
1528 void gk20a_remove_pmu_support(struct pmu_gk20a *pmu)
1529 {
1530         struct gk20a_pmu_save_state save;
1531
1532         gk20a_dbg_fn("");
1533
1534         gk20a_allocator_destroy(&pmu->dmem);
1535
1536         /* Save the stuff you don't want to lose */
1537         gk20a_save_pmu_sw_state(pmu, &save);
1538
1539         /* this function is also called by pmu_destory outside gk20a deinit that
1540            releases gk20a struct so fill up with zeros here. */
1541         memset(pmu, 0, sizeof(struct pmu_gk20a));
1542
1543         /* Restore stuff you want to keep */
1544         gk20a_restore_pmu_sw_state(pmu, &save);
1545 }
1546
1547 int gk20a_init_pmu_reset_enable_hw(struct gk20a *g)
1548 {
1549         struct pmu_gk20a *pmu = &g->pmu;
1550
1551         gk20a_dbg_fn("");
1552
1553         pmu_enable_hw(pmu, true);
1554
1555         return 0;
1556 }
1557
1558 static void pmu_elpg_enable_allow(struct work_struct *work);
1559
1560 int gk20a_init_pmu_setup_sw(struct gk20a *g)
1561 {
1562         struct pmu_gk20a *pmu = &g->pmu;
1563         struct mm_gk20a *mm = &g->mm;
1564         struct vm_gk20a *vm = &mm->pmu.vm;
1565         struct device *d = dev_from_gk20a(g);
1566         int i, err = 0;
1567         u8 *ptr;
1568         void *ucode_ptr;
1569         struct sg_table *sgt_pmu_ucode;
1570         struct sg_table *sgt_seq_buf;
1571         DEFINE_DMA_ATTRS(attrs);
1572         dma_addr_t iova;
1573
1574         gk20a_dbg_fn("");
1575
1576         if (pmu->sw_ready) {
1577                 for (i = 0; i < pmu->mutex_cnt; i++) {
1578                         pmu->mutex[i].id    = i;
1579                         pmu->mutex[i].index = i;
1580                 }
1581                 pmu_seq_init(pmu);
1582
1583                 gk20a_dbg_fn("skip init");
1584                 goto skip_init;
1585         }
1586
1587         /* no infoRom script from vbios? */
1588
1589         /* TBD: sysmon subtask */
1590
1591         pmu->mutex_cnt = pwr_pmu_mutex__size_1_v();
1592         pmu->mutex = kzalloc(pmu->mutex_cnt *
1593                 sizeof(struct pmu_mutex), GFP_KERNEL);
1594         if (!pmu->mutex) {
1595                 err = -ENOMEM;
1596                 goto err;
1597         }
1598
1599         for (i = 0; i < pmu->mutex_cnt; i++) {
1600                 pmu->mutex[i].id    = i;
1601                 pmu->mutex[i].index = i;
1602         }
1603
1604         pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES *
1605                 sizeof(struct pmu_sequence), GFP_KERNEL);
1606         if (!pmu->seq) {
1607                 err = -ENOMEM;
1608                 goto err_free_mutex;
1609         }
1610
1611         pmu_seq_init(pmu);
1612
1613         if (!g->pmu_fw) {
1614                 g->pmu_fw = gk20a_request_firmware(g, GK20A_PMU_UCODE_IMAGE);
1615                 if (!g->pmu_fw) {
1616                         gk20a_err(d, "failed to load pmu ucode!!");
1617                         err = -ENOENT;
1618                         goto err_free_seq;
1619                 }
1620         }
1621
1622         gk20a_dbg_fn("firmware loaded");
1623
1624         pmu->desc = (struct pmu_ucode_desc *)g->pmu_fw->data;
1625         pmu->ucode_image = (u32 *)((u8 *)pmu->desc +
1626                         pmu->desc->descriptor_size);
1627
1628
1629         INIT_DELAYED_WORK(&pmu->elpg_enable, pmu_elpg_enable_allow);
1630         INIT_WORK(&pmu->pg_init, gk20a_init_pmu_setup_hw2_workqueue);
1631
1632         gk20a_init_pmu_vm(mm);
1633
1634         dma_set_attr(DMA_ATTR_READ_ONLY, &attrs);
1635         pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
1636                                         &iova,
1637                                         GFP_KERNEL,
1638                                         &attrs);
1639         if (!pmu->ucode.cpuva) {
1640                 gk20a_err(d, "failed to allocate memory\n");
1641                 err = -ENOMEM;
1642                 goto err_release_fw;
1643         }
1644
1645         pmu->ucode.iova = iova;
1646         pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
1647                                         &iova,
1648                                         GFP_KERNEL);
1649         if (!pmu->seq_buf.cpuva) {
1650                 gk20a_err(d, "failed to allocate memory\n");
1651                 err = -ENOMEM;
1652                 goto err_free_pmu_ucode;
1653         }
1654
1655         pmu->seq_buf.iova = iova;
1656         init_waitqueue_head(&pmu->pg_wq);
1657
1658         err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
1659                                 pmu->ucode.cpuva,
1660                                 pmu->ucode.iova,
1661                                 GK20A_PMU_UCODE_SIZE_MAX);
1662         if (err) {
1663                 gk20a_err(d, "failed to allocate sg table\n");
1664                 goto err_free_seq_buf;
1665         }
1666
1667         pmu->ucode.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode,
1668                                         GK20A_PMU_UCODE_SIZE_MAX,
1669                                         0, /* flags */
1670                                         gk20a_mem_flag_read_only);
1671         if (!pmu->ucode.pmu_va) {
1672                 gk20a_err(d, "failed to map pmu ucode memory!!");
1673                 goto err_free_ucode_sgt;
1674         }
1675
1676         err = gk20a_get_sgtable(d, &sgt_seq_buf,
1677                                 pmu->seq_buf.cpuva,
1678                                 pmu->seq_buf.iova,
1679                                 GK20A_PMU_SEQ_BUF_SIZE);
1680         if (err) {
1681                 gk20a_err(d, "failed to allocate sg table\n");
1682                 goto err_unmap_ucode;
1683         }
1684
1685         pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf,
1686                                         GK20A_PMU_SEQ_BUF_SIZE,
1687                                         0, /* flags */
1688                                         gk20a_mem_flag_none);
1689         if (!pmu->seq_buf.pmu_va) {
1690                 gk20a_err(d, "failed to map pmu ucode memory!!");
1691                 goto err_free_seq_buf_sgt;
1692         }
1693
1694         ptr = (u8 *)pmu->seq_buf.cpuva;
1695         if (!ptr) {
1696                 gk20a_err(d, "failed to map cpu ptr for zbc buffer");
1697                 goto err_unmap_seq_buf;
1698         }
1699
1700         /* TBD: remove this if ZBC save/restore is handled by PMU
1701          * end an empty ZBC sequence for now */
1702         ptr[0] = 0x16; /* opcode EXIT */
1703         ptr[1] = 0; ptr[2] = 1; ptr[3] = 0;
1704         ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0;
1705
1706         pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;
1707
1708         ucode_ptr = pmu->ucode.cpuva;
1709
1710         for (i = 0; i < (pmu->desc->app_start_offset +
1711                         pmu->desc->app_size) >> 2; i++)
1712                 gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]);
1713
1714         gk20a_free_sgtable(&sgt_pmu_ucode);
1715         gk20a_free_sgtable(&sgt_seq_buf);
1716
1717 skip_init:
1718         mutex_init(&pmu->elpg_mutex);
1719         mutex_init(&pmu->isr_mutex);
1720         mutex_init(&pmu->pmu_copy_lock);
1721         mutex_init(&pmu->pmu_seq_lock);
1722
1723         pmu->perfmon_counter.index = 3; /* GR & CE2 */
1724         pmu->perfmon_counter.group_id = PMU_DOMAIN_GROUP_PSTATE;
1725
1726         pmu->remove_support = gk20a_remove_pmu_support;
1727         err = gk20a_init_pmu(pmu);
1728         if (err) {
1729                 gk20a_err(d, "failed to set function pointers\n");
1730                 return err;
1731         }
1732
1733         gk20a_dbg_fn("done");
1734         return 0;
1735
1736  err_unmap_seq_buf:
1737         gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va,
1738                 GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none);
1739  err_free_seq_buf_sgt:
1740         gk20a_free_sgtable(&sgt_seq_buf);
1741  err_unmap_ucode:
1742         gk20a_gmmu_unmap(vm, pmu->ucode.pmu_va,
1743                 GK20A_PMU_UCODE_SIZE_MAX, gk20a_mem_flag_none);
1744  err_free_ucode_sgt:
1745         gk20a_free_sgtable(&sgt_pmu_ucode);
1746  err_free_seq_buf:
1747         dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
1748                 pmu->seq_buf.cpuva, pmu->seq_buf.iova);
1749         pmu->seq_buf.cpuva = NULL;
1750         pmu->seq_buf.iova = 0;
1751  err_free_pmu_ucode:
1752         dma_free_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
1753                 pmu->ucode.cpuva, pmu->ucode.iova, &attrs);
1754         pmu->ucode.cpuva = NULL;
1755         pmu->ucode.iova = 0;
1756  err_release_fw:
1757         release_firmware(g->pmu_fw);
1758  err_free_seq:
1759         kfree(pmu->seq);
1760  err_free_mutex:
1761         kfree(pmu->mutex);
1762  err:
1763         gk20a_dbg_fn("fail");
1764         return err;
1765 }
1766
1767 static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
1768                         void *param, u32 handle, u32 status);
1769
1770 static void pmu_handle_pg_buf_config_msg(struct gk20a *g, struct pmu_msg *msg,
1771                         void *param, u32 handle, u32 status)
1772 {
1773         struct pmu_gk20a *pmu = param;
1774         struct pmu_pg_msg_eng_buf_stat *eng_buf_stat = &msg->msg.pg.eng_buf_stat;
1775
1776         gk20a_dbg_fn("");
1777
1778         if (status != 0) {
1779                 gk20a_err(dev_from_gk20a(g), "PGENG cmd aborted");
1780                 /* TBD: disable ELPG */
1781                 return;
1782         }
1783
1784         if (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_FAILED) {
1785                 gk20a_err(dev_from_gk20a(g), "failed to load PGENG buffer");
1786         }
1787
1788         pmu->buf_loaded = (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_LOADED);
1789         wake_up(&pmu->pg_wq);
1790 }
1791
1792 int gk20a_init_pmu_setup_hw1(struct gk20a *g)
1793 {
1794         struct pmu_gk20a *pmu = &g->pmu;
1795         int err;
1796
1797         gk20a_dbg_fn("");
1798
1799         pmu_reset(pmu);
1800
1801         /* setup apertures - virtual */
1802         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
1803                 pwr_fbif_transcfg_mem_type_virtual_f());
1804         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
1805                 pwr_fbif_transcfg_mem_type_virtual_f());
1806         /* setup apertures - physical */
1807         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
1808                 pwr_fbif_transcfg_mem_type_physical_f() |
1809                 pwr_fbif_transcfg_target_local_fb_f());
1810         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
1811                 pwr_fbif_transcfg_mem_type_physical_f() |
1812                 pwr_fbif_transcfg_target_coherent_sysmem_f());
1813         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
1814                 pwr_fbif_transcfg_mem_type_physical_f() |
1815                 pwr_fbif_transcfg_target_noncoherent_sysmem_f());
1816
1817         /* TBD: load pmu ucode */
1818         err = pmu_bootstrap(pmu);
1819         if (err)
1820                 return err;
1821
1822         return 0;
1823
1824 }
1825
1826 static int gk20a_aelpg_init(struct gk20a *g);
1827 static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id);
1828
1829
1830 static void gk20a_init_pmu_setup_hw2_workqueue(struct work_struct *work)
1831 {
1832         struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init);
1833         struct gk20a *g = pmu->g;
1834         gk20a_init_pmu_setup_hw2(g);
1835 }
1836
1837 int gk20a_init_pmu_setup_hw2(struct gk20a *g)
1838 {
1839         struct pmu_gk20a *pmu = &g->pmu;
1840         struct mm_gk20a *mm = &g->mm;
1841         struct vm_gk20a *vm = &mm->pmu.vm;
1842         struct device *d = dev_from_gk20a(g);
1843         struct pmu_cmd cmd;
1844         u32 desc;
1845         long remain;
1846         int err;
1847         bool status;
1848         u32 size;
1849         struct sg_table *sgt_pg_buf;
1850         dma_addr_t iova;
1851
1852         gk20a_dbg_fn("");
1853
1854         if (!support_gk20a_pmu())
1855                 return 0;
1856
1857         size = 0;
1858         err = gr_gk20a_fecs_get_reglist_img_size(g, &size);
1859         if (err) {
1860                 gk20a_err(dev_from_gk20a(g),
1861                         "fail to query fecs pg buffer size");
1862                 return err;
1863         }
1864
1865         if (!pmu->sw_ready) {
1866                 pmu->pg_buf.cpuva = dma_alloc_coherent(d, size,
1867                                                 &iova,
1868                                                 GFP_KERNEL);
1869                 if (!pmu->pg_buf.cpuva) {
1870                         gk20a_err(d, "failed to allocate memory\n");
1871                         err = -ENOMEM;
1872                         goto err;
1873                 }
1874
1875                 pmu->pg_buf.iova = iova;
1876                 pmu->pg_buf.size = size;
1877
1878                 err = gk20a_get_sgtable(d, &sgt_pg_buf,
1879                                         pmu->pg_buf.cpuva,
1880                                         pmu->pg_buf.iova,
1881                                         size);
1882                 if (err) {
1883                         gk20a_err(d, "failed to create sg table\n");
1884                         goto err_free_pg_buf;
1885                 }
1886
1887                 pmu->pg_buf.pmu_va = gk20a_gmmu_map(vm,
1888                                         &sgt_pg_buf,
1889                                         size,
1890                                         0, /* flags */
1891                                         gk20a_mem_flag_none);
1892                 if (!pmu->pg_buf.pmu_va) {
1893                         gk20a_err(d, "failed to map fecs pg buffer");
1894                         err = -ENOMEM;
1895                         goto err_free_sgtable;
1896                 }
1897
1898                 gk20a_free_sgtable(&sgt_pg_buf);
1899         }
1900
1901         /*
1902          * This is the actual point at which sw setup is complete, so set the
1903          * sw_ready flag here.
1904          */
1905         pmu->sw_ready = true;
1906
1907         /* TBD: acquire pmu hw mutex */
1908
1909         /* TBD: post reset again? */
1910
1911         /* PMU_INIT message handler will send PG_INIT */
1912         remain = wait_event_timeout(
1913                         pmu->pg_wq,
1914                         (status = (pmu->elpg_ready &&
1915                                 pmu->stat_dmem_offset != 0 &&
1916                                 pmu->elpg_stat == PMU_ELPG_STAT_OFF)),
1917                         msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)));
1918         if (status == 0) {
1919                 gk20a_err(dev_from_gk20a(g),
1920                         "PG_INIT_ACK failed, remaining timeout : 0x%lx", remain);
1921                 pmu_dump_falcon_stats(pmu);
1922                 return -EBUSY;
1923         }
1924
1925         err = gr_gk20a_fecs_set_reglist_bind_inst(g, mm->pmu.inst_block.cpu_pa);
1926         if (err) {
1927                 gk20a_err(dev_from_gk20a(g),
1928                         "fail to bind pmu inst to gr");
1929                 return err;
1930         }
1931
1932         err = gr_gk20a_fecs_set_reglist_virual_addr(g, pmu->pg_buf.pmu_va);
1933         if (err) {
1934                 gk20a_err(dev_from_gk20a(g),
1935                         "fail to set pg buffer pmu va");
1936                 return err;
1937         }
1938
1939         memset(&cmd, 0, sizeof(struct pmu_cmd));
1940         cmd.hdr.unit_id = PMU_UNIT_PG;
1941         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
1942         cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
1943         cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
1944         cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_FECS;
1945         cmd.cmd.pg.eng_buf_load.buf_size = pmu->pg_buf.size;
1946         cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.pmu_va >> 8);
1947         cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.pmu_va & 0xFF);
1948         cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
1949
1950         pmu->buf_loaded = false;
1951         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
1952                         pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
1953
1954         remain = wait_event_timeout(
1955                         pmu->pg_wq,
1956                         pmu->buf_loaded,
1957                         msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)));
1958         if (!pmu->buf_loaded) {
1959                 gk20a_err(dev_from_gk20a(g),
1960                         "PGENG FECS buffer load failed, remaining timeout : 0x%lx",
1961                         remain);
1962                 return -EBUSY;
1963         }
1964
1965         memset(&cmd, 0, sizeof(struct pmu_cmd));
1966         cmd.hdr.unit_id = PMU_UNIT_PG;
1967         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
1968         cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
1969         cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
1970         cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_ZBC;
1971         cmd.cmd.pg.eng_buf_load.buf_size = pmu->seq_buf.size;
1972         cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.pmu_va >> 8);
1973         cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.pmu_va & 0xFF);
1974         cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
1975
1976         pmu->buf_loaded = false;
1977         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
1978                         pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
1979
1980         remain = wait_event_timeout(
1981                         pmu->pg_wq,
1982                         pmu->buf_loaded,
1983                         msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)));
1984         if (!pmu->buf_loaded) {
1985                 gk20a_err(dev_from_gk20a(g),
1986                         "PGENG ZBC buffer load failed, remaining timeout 0x%lx",
1987                         remain);
1988                 return -EBUSY;
1989         }
1990
1991         /*
1992          * FIXME: To enable ELPG, we increase the PMU ext2priv timeout unit to
1993          * 7. This prevents PMU stalling on Host register accesses. Once the
1994          * cause for this hang is discovered and fixed, this WAR should be
1995          * removed.
1996          */
1997         gk20a_writel(g, 0x10a164, 0x109ff);
1998
1999         pmu->initialized = true;
2000
2001         /*
2002          * We can't guarantee that gr code to enable ELPG will be
2003          * invoked, so we explicitly call disable-enable here
2004          * to enable elpg.
2005          */
2006         gk20a_pmu_disable_elpg(g);
2007
2008         pmu->zbc_ready = true;
2009         /* Save zbc table after PMU is initialized. */
2010         pmu_save_zbc(g, 0xf);
2011
2012         if (g->elpg_enabled)
2013                 gk20a_pmu_enable_elpg(g);
2014
2015         udelay(50);
2016
2017         /* Enable AELPG */
2018         if (g->aelpg_enabled) {
2019                 gk20a_aelpg_init(g);
2020                 gk20a_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS);
2021         }
2022
2023         return 0;
2024
2025  err_free_sgtable:
2026         gk20a_free_sgtable(&sgt_pg_buf);
2027  err_free_pg_buf:
2028         dma_free_coherent(d, size,
2029                 pmu->pg_buf.cpuva, pmu->pg_buf.iova);
2030         pmu->pg_buf.cpuva = NULL;
2031         pmu->pg_buf.iova = 0;
2032  err:
2033         return err;
2034 }
2035
2036 int gk20a_init_pmu_support(struct gk20a *g)
2037 {
2038         struct pmu_gk20a *pmu = &g->pmu;
2039         u32 err;
2040
2041         gk20a_dbg_fn("");
2042
2043         if (pmu->initialized)
2044                 return 0;
2045
2046         pmu->g = g;
2047
2048         err = gk20a_init_pmu_reset_enable_hw(g);
2049         if (err)
2050                 return err;
2051
2052         if (support_gk20a_pmu()) {
2053                 err = gk20a_init_pmu_setup_sw(g);
2054                 if (err)
2055                         return err;
2056
2057                 err = gk20a_init_pmu_setup_hw1(g);
2058                 if (err)
2059                         return err;
2060         }
2061
2062         return err;
2063 }
2064
2065 static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
2066                         void *param, u32 handle, u32 status)
2067 {
2068         struct pmu_gk20a *pmu = param;
2069         struct pmu_pg_msg_elpg_msg *elpg_msg = &msg->msg.pg.elpg_msg;
2070
2071         gk20a_dbg_fn("");
2072
2073         if (status != 0) {
2074                 gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
2075                 /* TBD: disable ELPG */
2076                 return;
2077         }
2078
2079         switch (elpg_msg->msg) {
2080         case PMU_PG_ELPG_MSG_INIT_ACK:
2081                 gk20a_dbg_pmu("INIT_PG is acknowledged from PMU");
2082                 pmu->elpg_ready = true;
2083                 wake_up(&pmu->pg_wq);
2084                 break;
2085         case PMU_PG_ELPG_MSG_ALLOW_ACK:
2086                 gk20a_dbg_pmu("ALLOW is acknowledged from PMU");
2087                 pmu->elpg_stat = PMU_ELPG_STAT_ON;
2088                 wake_up(&pmu->pg_wq);
2089                 break;
2090         case PMU_PG_ELPG_MSG_DISALLOW_ACK:
2091                 gk20a_dbg_pmu("DISALLOW is acknowledged from PMU");
2092                 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
2093                 wake_up(&pmu->pg_wq);
2094                 break;
2095         default:
2096                 gk20a_err(dev_from_gk20a(g),
2097                         "unsupported ELPG message : 0x%04x", elpg_msg->msg);
2098         }
2099
2100         return;
2101 }
2102
2103 static void pmu_handle_pg_stat_msg(struct gk20a *g, struct pmu_msg *msg,
2104                         void *param, u32 handle, u32 status)
2105 {
2106         struct pmu_gk20a *pmu = param;
2107
2108         gk20a_dbg_fn("");
2109
2110         if (status != 0) {
2111                 gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
2112                 /* TBD: disable ELPG */
2113                 return;
2114         }
2115
2116         switch (msg->msg.pg.stat.sub_msg_id) {
2117         case PMU_PG_STAT_MSG_RESP_DMEM_OFFSET:
2118                 gk20a_dbg_pmu("ALLOC_DMEM_OFFSET is acknowledged from PMU");
2119                 pmu->stat_dmem_offset = msg->msg.pg.stat.data;
2120                 wake_up(&pmu->pg_wq);
2121                 break;
2122         default:
2123                 break;
2124         }
2125 }
2126
2127 static int pmu_init_powergating(struct pmu_gk20a *pmu)
2128 {
2129         struct gk20a *g = pmu->g;
2130         struct pmu_cmd cmd;
2131         u32 seq;
2132
2133         gk20a_dbg_fn("");
2134
2135         if (tegra_cpu_is_asim()) {
2136                 /* TBD: calculate threshold for silicon */
2137                 gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
2138                                 PMU_PG_IDLE_THRESHOLD_SIM);
2139                 gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
2140                                 PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM);
2141         } else {
2142                 /* TBD: calculate threshold for silicon */
2143                 gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
2144                                 PMU_PG_IDLE_THRESHOLD);
2145                 gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
2146                                 PMU_PG_POST_POWERUP_IDLE_THRESHOLD);
2147         }
2148
2149         /* init ELPG */
2150         memset(&cmd, 0, sizeof(struct pmu_cmd));
2151         cmd.hdr.unit_id = PMU_UNIT_PG;
2152         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
2153         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
2154         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
2155         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_INIT;
2156
2157         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2158                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
2159
2160         /* alloc dmem for powergating state log */
2161         pmu->stat_dmem_offset = 0;
2162         memset(&cmd, 0, sizeof(struct pmu_cmd));
2163         cmd.hdr.unit_id = PMU_UNIT_PG;
2164         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_stat);
2165         cmd.cmd.pg.stat.cmd_type = PMU_PG_CMD_ID_PG_STAT;
2166         cmd.cmd.pg.stat.engine_id = ENGINE_GR_GK20A;
2167         cmd.cmd.pg.stat.sub_cmd_id = PMU_PG_STAT_CMD_ALLOC_DMEM;
2168         cmd.cmd.pg.stat.data = 0;
2169
2170         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
2171                         pmu_handle_pg_stat_msg, pmu, &seq, ~0);
2172
2173         /* disallow ELPG initially
2174            PMU ucode requires a disallow cmd before allow cmd */
2175         pmu->elpg_stat = PMU_ELPG_STAT_ON; /* set for wait_event PMU_ELPG_STAT_OFF */
2176         memset(&cmd, 0, sizeof(struct pmu_cmd));
2177         cmd.hdr.unit_id = PMU_UNIT_PG;
2178         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
2179         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
2180         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
2181         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
2182
2183         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2184                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
2185
2186         /* start with elpg disabled until first enable call */
2187         pmu->elpg_refcnt = 1;
2188
2189         return 0;
2190 }
2191
2192 static int pmu_init_perfmon(struct pmu_gk20a *pmu)
2193 {
2194         struct gk20a *g = pmu->g;
2195         struct pmu_v *pv = &g->ops.pmu_ver;
2196         struct pmu_cmd cmd;
2197         struct pmu_payload payload;
2198         u32 seq;
2199         u32 data;
2200         int err;
2201
2202         gk20a_dbg_fn("");
2203
2204         pmu->perfmon_ready = 0;
2205
2206         /* use counter #3 for GR && CE2 busy cycles */
2207         gk20a_writel(g, pwr_pmu_idle_mask_r(3),
2208                 pwr_pmu_idle_mask_gr_enabled_f() |
2209                 pwr_pmu_idle_mask_ce_2_enabled_f());
2210
2211         /* disable idle filtering for counters 3 and 6 */
2212         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3));
2213         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2214                         pwr_pmu_idle_ctrl_filter_m(),
2215                         pwr_pmu_idle_ctrl_value_busy_f() |
2216                         pwr_pmu_idle_ctrl_filter_disabled_f());
2217         gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data);
2218
2219         /* use counter #6 for total cycles */
2220         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6));
2221         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2222                         pwr_pmu_idle_ctrl_filter_m(),
2223                         pwr_pmu_idle_ctrl_value_always_f() |
2224                         pwr_pmu_idle_ctrl_filter_disabled_f());
2225         gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data);
2226
2227         /*
2228          * We don't want to disturb counters #3 and #6, which are used by
2229          * perfmon, so we add wiring also to counters #1 and #2 for
2230          * exposing raw counter readings.
2231          */
2232         gk20a_writel(g, pwr_pmu_idle_mask_r(1),
2233                 pwr_pmu_idle_mask_gr_enabled_f() |
2234                 pwr_pmu_idle_mask_ce_2_enabled_f());
2235
2236         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1));
2237         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2238                         pwr_pmu_idle_ctrl_filter_m(),
2239                         pwr_pmu_idle_ctrl_value_busy_f() |
2240                         pwr_pmu_idle_ctrl_filter_disabled_f());
2241         gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data);
2242
2243         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2));
2244         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2245                         pwr_pmu_idle_ctrl_filter_m(),
2246                         pwr_pmu_idle_ctrl_value_always_f() |
2247                         pwr_pmu_idle_ctrl_filter_disabled_f());
2248         gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
2249
2250         pmu->sample_buffer = 0;
2251         err = pmu->dmem.alloc(&pmu->dmem, &pmu->sample_buffer, 2 * sizeof(u16));
2252         if (err) {
2253                 gk20a_err(dev_from_gk20a(g),
2254                         "failed to allocate perfmon sample buffer");
2255                 return -ENOMEM;
2256         }
2257
2258         /* init PERFMON */
2259         memset(&cmd, 0, sizeof(struct pmu_cmd));
2260         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2261         cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size();
2262         cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT;
2263         /* buffer to save counter values for pmu perfmon */
2264         pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon,
2265         (u16)pmu->sample_buffer);
2266         /* number of sample periods below lower threshold
2267            before pmu triggers perfmon decrease event
2268            TBD: = 15 */
2269         pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15);
2270         /* index of base counter, aka. always ticking counter */
2271         pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6);
2272         /* microseconds interval between pmu polls perf counters */
2273         pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700);
2274         /* number of perfmon counters
2275            counter #3 (GR and CE2) for gk20a */
2276         pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1);
2277         /* moving average window for sample periods
2278            TBD: = 3000000 / sample_period_us = 17 */
2279         pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17);
2280
2281         memset(&payload, 0, sizeof(struct pmu_payload));
2282         payload.in.buf = &pmu->perfmon_counter;
2283         payload.in.size = sizeof(struct pmu_perfmon_counter);
2284         payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC);
2285
2286         gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
2287                         NULL, NULL, &seq, ~0);
2288
2289         return 0;
2290 }
2291
2292 static int pmu_process_init_msg(struct pmu_gk20a *pmu,
2293                         struct pmu_msg *msg)
2294 {
2295         struct gk20a *g = pmu->g;
2296         struct pmu_v *pv = &g->ops.pmu_ver;
2297         union pmu_init_msg_pmu *init;
2298         struct pmu_sha1_gid_data gid_data;
2299         u32 i, tail = 0;
2300
2301         tail = pwr_pmu_msgq_tail_val_v(
2302                 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
2303
2304         pmu_copy_from_dmem(pmu, tail,
2305                 (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0);
2306
2307         if (msg->hdr.unit_id != PMU_UNIT_INIT) {
2308                 gk20a_err(dev_from_gk20a(g),
2309                         "expecting init msg");
2310                 return -EINVAL;
2311         }
2312
2313         pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE,
2314                 (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0);
2315
2316         if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) {
2317                 gk20a_err(dev_from_gk20a(g),
2318                         "expecting init msg");
2319                 return -EINVAL;
2320         }
2321
2322         tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT);
2323         gk20a_writel(g, pwr_pmu_msgq_tail_r(),
2324                 pwr_pmu_msgq_tail_val_f(tail));
2325
2326         init = pv->get_pmu_msg_pmu_init_msg_ptr(&(msg->msg.init));
2327         if (!pmu->gid_info.valid) {
2328
2329                 pmu_copy_from_dmem(pmu,
2330                         pv->get_pmu_init_msg_pmu_sw_mg_off(init),
2331                         (u8 *)&gid_data,
2332                         sizeof(struct pmu_sha1_gid_data), 0);
2333
2334                 pmu->gid_info.valid =
2335                         (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE);
2336
2337                 if (pmu->gid_info.valid) {
2338
2339                         BUG_ON(sizeof(pmu->gid_info.gid) !=
2340                                 sizeof(gid_data.gid));
2341
2342                         memcpy(pmu->gid_info.gid, gid_data.gid,
2343                                 sizeof(pmu->gid_info.gid));
2344                 }
2345         }
2346
2347         for (i = 0; i < PMU_QUEUE_COUNT; i++)
2348                 pmu_queue_init(pmu, i, init);
2349
2350         gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem",
2351                         pv->get_pmu_init_msg_pmu_sw_mg_off(init),
2352                         pv->get_pmu_init_msg_pmu_sw_mg_size(init),
2353                         PMU_DMEM_ALLOC_ALIGNMENT);
2354
2355         pmu->pmu_ready = true;
2356
2357         return 0;
2358 }
2359
2360 static bool pmu_read_message(struct pmu_gk20a *pmu, struct pmu_queue *queue,
2361                         struct pmu_msg *msg, int *status)
2362 {
2363         struct gk20a *g = pmu->g;
2364         u32 read_size, bytes_read;
2365         int err;
2366
2367         *status = 0;
2368
2369         if (pmu_queue_is_empty(pmu, queue))
2370                 return false;
2371
2372         err = pmu_queue_open_read(pmu, queue);
2373         if (err) {
2374                 gk20a_err(dev_from_gk20a(g),
2375                         "fail to open queue %d for read", queue->id);
2376                 *status = err;
2377                 return false;
2378         }
2379
2380         err = pmu_queue_pop(pmu, queue, &msg->hdr,
2381                         PMU_MSG_HDR_SIZE, &bytes_read);
2382         if (err || bytes_read != PMU_MSG_HDR_SIZE) {
2383                 gk20a_err(dev_from_gk20a(g),
2384                         "fail to read msg from queue %d", queue->id);
2385                 *status = err | -EINVAL;
2386                 goto clean_up;
2387         }
2388
2389         if (msg->hdr.unit_id == PMU_UNIT_REWIND) {
2390                 pmu_queue_rewind(pmu, queue);
2391                 /* read again after rewind */
2392                 err = pmu_queue_pop(pmu, queue, &msg->hdr,
2393                                 PMU_MSG_HDR_SIZE, &bytes_read);
2394                 if (err || bytes_read != PMU_MSG_HDR_SIZE) {
2395                         gk20a_err(dev_from_gk20a(g),
2396                                 "fail to read msg from queue %d", queue->id);
2397                         *status = err | -EINVAL;
2398                         goto clean_up;
2399                 }
2400         }
2401
2402         if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) {
2403                 gk20a_err(dev_from_gk20a(g),
2404                         "read invalid unit_id %d from queue %d",
2405                         msg->hdr.unit_id, queue->id);
2406                         *status = -EINVAL;
2407                         goto clean_up;
2408         }
2409
2410         if (msg->hdr.size > PMU_MSG_HDR_SIZE) {
2411                 read_size = msg->hdr.size - PMU_MSG_HDR_SIZE;
2412                 err = pmu_queue_pop(pmu, queue, &msg->msg,
2413                         read_size, &bytes_read);
2414                 if (err || bytes_read != read_size) {
2415                         gk20a_err(dev_from_gk20a(g),
2416                                 "fail to read msg from queue %d", queue->id);
2417                         *status = err;
2418                         goto clean_up;
2419                 }
2420         }
2421
2422         err = pmu_queue_close(pmu, queue, true);
2423         if (err) {
2424                 gk20a_err(dev_from_gk20a(g),
2425                         "fail to close queue %d", queue->id);
2426                 *status = err;
2427                 return false;
2428         }
2429
2430         return true;
2431
2432 clean_up:
2433         err = pmu_queue_close(pmu, queue, false);
2434         if (err)
2435                 gk20a_err(dev_from_gk20a(g),
2436                         "fail to close queue %d", queue->id);
2437         return false;
2438 }
2439
2440 static int pmu_response_handle(struct pmu_gk20a *pmu,
2441                         struct pmu_msg *msg)
2442 {
2443         struct gk20a *g = pmu->g;
2444         struct pmu_sequence *seq;
2445         struct pmu_v *pv = &g->ops.pmu_ver;
2446         int ret = 0;
2447
2448         gk20a_dbg_fn("");
2449
2450         seq = &pmu->seq[msg->hdr.seq_id];
2451         if (seq->state != PMU_SEQ_STATE_USED &&
2452             seq->state != PMU_SEQ_STATE_CANCELLED) {
2453                 gk20a_err(dev_from_gk20a(g),
2454                         "msg for an unknown sequence %d", seq->id);
2455                 return -EINVAL;
2456         }
2457
2458         if (msg->hdr.unit_id == PMU_UNIT_RC &&
2459             msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) {
2460                 gk20a_err(dev_from_gk20a(g),
2461                         "unhandled cmd: seq %d", seq->id);
2462         }
2463         else if (seq->state != PMU_SEQ_STATE_CANCELLED) {
2464                 if (seq->msg) {
2465                         if (seq->msg->hdr.size >= msg->hdr.size) {
2466                                 memcpy(seq->msg, msg, msg->hdr.size);
2467                                 if (pv->pmu_allocation_get_dmem_size(pmu,
2468                                 pv->get_pmu_seq_out_a_ptr(seq)) != 0) {
2469                                         pmu_copy_from_dmem(pmu,
2470                                         pv->pmu_allocation_get_dmem_offset(pmu,
2471                                         pv->get_pmu_seq_out_a_ptr(seq)),
2472                                         seq->out_payload,
2473                                         pv->pmu_allocation_get_dmem_size(pmu,
2474                                         pv->get_pmu_seq_out_a_ptr(seq)), 0);
2475                                 }
2476                         } else {
2477                                 gk20a_err(dev_from_gk20a(g),
2478                                         "sequence %d msg buffer too small",
2479                                         seq->id);
2480                         }
2481                 }
2482         } else
2483                 seq->callback = NULL;
2484         if (pv->pmu_allocation_get_dmem_size(pmu,
2485                         pv->get_pmu_seq_in_a_ptr(seq)) != 0)
2486                 pmu->dmem.free(&pmu->dmem,
2487                 pv->pmu_allocation_get_dmem_offset(pmu,
2488                 pv->get_pmu_seq_in_a_ptr(seq)),
2489                 pv->pmu_allocation_get_dmem_size(pmu,
2490                 pv->get_pmu_seq_in_a_ptr(seq)));
2491         if (pv->pmu_allocation_get_dmem_size(pmu,
2492                         pv->get_pmu_seq_out_a_ptr(seq)) != 0)
2493                 pmu->dmem.free(&pmu->dmem,
2494                 pv->pmu_allocation_get_dmem_offset(pmu,
2495                 pv->get_pmu_seq_out_a_ptr(seq)),
2496                 pv->pmu_allocation_get_dmem_size(pmu,
2497                 pv->get_pmu_seq_out_a_ptr(seq)));
2498
2499         if (seq->callback)
2500                 seq->callback(g, msg, seq->cb_params, seq->desc, ret);
2501
2502         pmu_seq_release(pmu, seq);
2503
2504         /* TBD: notify client waiting for available dmem */
2505
2506         gk20a_dbg_fn("done");
2507
2508         return 0;
2509 }
2510
2511 static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
2512                                  u32 *var, u32 val);
2513
2514 static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg,
2515                         void *param, u32 handle, u32 status)
2516 {
2517         struct pmu_gk20a *pmu = param;
2518         pmu->zbc_save_done = 1;
2519 }
2520
2521 static void pmu_save_zbc(struct gk20a *g, u32 entries)
2522 {
2523         struct pmu_gk20a *pmu = &g->pmu;
2524         struct pmu_cmd cmd;
2525         u32 seq;
2526
2527         if (!pmu->pmu_ready || !entries || !pmu->zbc_ready)
2528                 return;
2529
2530         memset(&cmd, 0, sizeof(struct pmu_cmd));
2531         cmd.hdr.unit_id = PMU_UNIT_PG;
2532         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_zbc_cmd);
2533         cmd.cmd.zbc.cmd_type = g->ops.pmu_ver.cmd_id_zbc_table_update;
2534         cmd.cmd.zbc.entry_mask = ZBC_MASK(entries);
2535
2536         pmu->zbc_save_done = 0;
2537
2538         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2539                            pmu_handle_zbc_msg, pmu, &seq, ~0);
2540         pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
2541                               &pmu->zbc_save_done, 1);
2542         if (!pmu->zbc_save_done)
2543                 gk20a_err(dev_from_gk20a(g), "ZBC save timeout");
2544 }
2545
2546 void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
2547 {
2548         if (g->pmu.zbc_ready)
2549                 pmu_save_zbc(g, entries);
2550 }
2551
2552 static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu)
2553 {
2554         struct gk20a *g = pmu->g;
2555         struct pmu_v *pv = &g->ops.pmu_ver;
2556         struct pmu_cmd cmd;
2557         struct pmu_payload payload;
2558         u32 current_rate = 0;
2559         u32 seq;
2560
2561         /* PERFMON Start */
2562         memset(&cmd, 0, sizeof(struct pmu_cmd));
2563         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2564         cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size();
2565         pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon,
2566                 PMU_PERFMON_CMD_ID_START);
2567         pv->perfmon_start_set_group_id(&cmd.cmd.perfmon,
2568                 PMU_DOMAIN_GROUP_PSTATE);
2569         pv->perfmon_start_set_state_id(&cmd.cmd.perfmon,
2570                 pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);
2571
2572         current_rate = rate_gpu_to_gpc2clk(gk20a_clk_get_rate(g));
2573         if (current_rate >= gpc_pll_params.max_freq)
2574                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2575                 PMU_PERFMON_FLAG_ENABLE_DECREASE);
2576         else if (current_rate <= gpc_pll_params.min_freq)
2577                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2578                 PMU_PERFMON_FLAG_ENABLE_INCREASE);
2579         else
2580                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2581                 PMU_PERFMON_FLAG_ENABLE_INCREASE |
2582                 PMU_PERFMON_FLAG_ENABLE_DECREASE);
2583
2584         pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2585                 pv->perfmon_start_get_flags(&cmd.cmd.perfmon) |
2586                 PMU_PERFMON_FLAG_CLEAR_PREV);
2587
2588         memset(&payload, 0, sizeof(struct pmu_payload));
2589
2590         /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */
2591         pmu->perfmon_counter.upper_threshold = 3000; /* 30% */
2592         /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */
2593         pmu->perfmon_counter.lower_threshold = 1000; /* 10% */
2594         pmu->perfmon_counter.valid = true;
2595
2596         payload.in.buf = &pmu->perfmon_counter;
2597         payload.in.size = sizeof(pmu->perfmon_counter);
2598         payload.in.offset =
2599                 pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC);
2600
2601         gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
2602                         NULL, NULL, &seq, ~0);
2603
2604         return 0;
2605 }
2606
2607 static int pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu)
2608 {
2609         struct gk20a *g = pmu->g;
2610         struct pmu_cmd cmd;
2611         u32 seq;
2612
2613         /* PERFMON Stop */
2614         memset(&cmd, 0, sizeof(struct pmu_cmd));
2615         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2616         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop);
2617         cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP;
2618
2619         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
2620                         NULL, NULL, &seq, ~0);
2621         return 0;
2622 }
2623
2624 static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu,
2625                         struct pmu_perfmon_msg *msg)
2626 {
2627         struct gk20a *g = pmu->g;
2628         u32 rate;
2629
2630         gk20a_dbg_fn("");
2631
2632         switch (msg->msg_type) {
2633         case PMU_PERFMON_MSG_ID_INCREASE_EVENT:
2634                 gk20a_dbg_pmu("perfmon increase event: "
2635                         "state_id %d, ground_id %d, pct %d",
2636                         msg->gen.state_id, msg->gen.group_id, msg->gen.data);
2637                 /* increase gk20a clock freq by 20% */
2638                 rate = gk20a_clk_get_rate(g);
2639                 gk20a_clk_set_rate(g, rate * 6 / 5);
2640                 break;
2641         case PMU_PERFMON_MSG_ID_DECREASE_EVENT:
2642                 gk20a_dbg_pmu("perfmon decrease event: "
2643                         "state_id %d, ground_id %d, pct %d",
2644                         msg->gen.state_id, msg->gen.group_id, msg->gen.data);
2645                 /* decrease gk20a clock freq by 10% */
2646                 rate = gk20a_clk_get_rate(g);
2647                 gk20a_clk_set_rate(g, (rate / 10) * 7);
2648                 break;
2649         case PMU_PERFMON_MSG_ID_INIT_EVENT:
2650                 pmu->perfmon_ready = 1;
2651                 gk20a_dbg_pmu("perfmon init event");
2652                 break;
2653         default:
2654                 break;
2655         }
2656
2657         /* restart sampling */
2658         if (IS_ENABLED(CONFIG_GK20A_PERFMON))
2659                 return pmu_perfmon_start_sampling(pmu);
2660         return 0;
2661 }
2662
2663
2664 static int pmu_handle_event(struct pmu_gk20a *pmu, struct pmu_msg *msg)
2665 {
2666         int err;
2667
2668         gk20a_dbg_fn("");
2669
2670         switch (msg->hdr.unit_id) {
2671         case PMU_UNIT_PERFMON:
2672                 err = pmu_handle_perfmon_event(pmu, &msg->msg.perfmon);
2673                 break;
2674         default:
2675                 break;
2676         }
2677
2678         return err;
2679 }
2680
2681 static int pmu_process_message(struct pmu_gk20a *pmu)
2682 {
2683         struct pmu_msg msg;
2684         int status;
2685
2686         if (unlikely(!pmu->pmu_ready)) {
2687                 pmu_process_init_msg(pmu, &msg);
2688                 pmu_init_powergating(pmu);
2689                 pmu_init_perfmon(pmu);
2690                 return 0;
2691         }
2692
2693         while (pmu_read_message(pmu,
2694                 &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) {
2695
2696                 gk20a_dbg_pmu("read msg hdr: "
2697                                 "unit_id = 0x%08x, size = 0x%08x, "
2698                                 "ctrl_flags = 0x%08x, seq_id = 0x%08x",
2699                                 msg.hdr.unit_id, msg.hdr.size,
2700                                 msg.hdr.ctrl_flags, msg.hdr.seq_id);
2701
2702                 msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK;
2703
2704                 if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) {
2705                         pmu_handle_event(pmu, &msg);
2706                 } else {
2707                         pmu_response_handle(pmu, &msg);
2708                 }
2709         }
2710
2711         return 0;
2712 }
2713
2714 static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
2715                                  u32 *var, u32 val)
2716 {
2717         struct gk20a *g = pmu->g;
2718         unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
2719         unsigned long delay = GR_IDLE_CHECK_DEFAULT;
2720
2721         do {
2722                 if (*var == val)
2723                         return 0;
2724
2725                 if (gk20a_readl(g, pwr_falcon_irqstat_r()))
2726                         gk20a_pmu_isr(g);
2727
2728                 usleep_range(delay, delay * 2);
2729                 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
2730         } while (time_before(jiffies, end_jiffies) ||
2731                         !tegra_platform_is_silicon());
2732
2733         return -ETIMEDOUT;
2734 }
2735
2736 static void pmu_dump_elpg_stats(struct pmu_gk20a *pmu)
2737 {
2738         struct gk20a *g = pmu->g;
2739         struct pmu_pg_stats stats;
2740
2741         pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
2742                 (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
2743
2744         gk20a_dbg_pmu("pg_entry_start_timestamp : 0x%016llx",
2745                 stats.pg_entry_start_timestamp);
2746         gk20a_dbg_pmu("pg_exit_start_timestamp : 0x%016llx",
2747                 stats.pg_exit_start_timestamp);
2748         gk20a_dbg_pmu("pg_ingating_start_timestamp : 0x%016llx",
2749                 stats.pg_ingating_start_timestamp);
2750         gk20a_dbg_pmu("pg_ungating_start_timestamp : 0x%016llx",
2751                 stats.pg_ungating_start_timestamp);
2752         gk20a_dbg_pmu("pg_avg_entry_time_us : 0x%08x",
2753                 stats.pg_avg_entry_time_us);
2754         gk20a_dbg_pmu("pg_avg_exit_time_us : 0x%08x",
2755                 stats.pg_avg_exit_time_us);
2756         gk20a_dbg_pmu("pg_ingating_cnt : 0x%08x",
2757                 stats.pg_ingating_cnt);
2758         gk20a_dbg_pmu("pg_ingating_time_us : 0x%08x",
2759                 stats.pg_ingating_time_us);
2760         gk20a_dbg_pmu("pg_ungating_count : 0x%08x",
2761                 stats.pg_ungating_count);
2762         gk20a_dbg_pmu("pg_ungating_time_us 0x%08x: ",
2763                 stats.pg_ungating_time_us);
2764         gk20a_dbg_pmu("pg_gating_cnt : 0x%08x",
2765                 stats.pg_gating_cnt);
2766         gk20a_dbg_pmu("pg_gating_deny_cnt : 0x%08x",
2767                 stats.pg_gating_deny_cnt);
2768
2769         /*
2770            Turn on PG_DEBUG in ucode and locate symbol "ElpgLog" offset
2771            in .nm file, e.g. 0x1000066c. use 0x66c.
2772         u32 i, val[20];
2773         pmu_copy_from_dmem(pmu, 0x66c,
2774                 (u8 *)val, sizeof(val), 0);
2775         gk20a_dbg_pmu("elpg log begin");
2776         for (i = 0; i < 20; i++)
2777                 gk20a_dbg_pmu("0x%08x", val[i]);
2778         gk20a_dbg_pmu("elpg log end");
2779         */
2780
2781         gk20a_dbg_pmu("pwr_pmu_idle_mask_supp_r(3): 0x%08x",
2782                 gk20a_readl(g, pwr_pmu_idle_mask_supp_r(3)));
2783         gk20a_dbg_pmu("pwr_pmu_idle_mask_1_supp_r(3): 0x%08x",
2784                 gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(3)));
2785         gk20a_dbg_pmu("pwr_pmu_idle_ctrl_supp_r(3): 0x%08x",
2786                 gk20a_readl(g, pwr_pmu_idle_ctrl_supp_r(3)));
2787         gk20a_dbg_pmu("pwr_pmu_pg_idle_cnt_r(0): 0x%08x",
2788                 gk20a_readl(g, pwr_pmu_pg_idle_cnt_r(0)));
2789         gk20a_dbg_pmu("pwr_pmu_pg_intren_r(0): 0x%08x",
2790                 gk20a_readl(g, pwr_pmu_pg_intren_r(0)));
2791
2792         gk20a_dbg_pmu("pwr_pmu_idle_count_r(3): 0x%08x",
2793                 gk20a_readl(g, pwr_pmu_idle_count_r(3)));
2794         gk20a_dbg_pmu("pwr_pmu_idle_count_r(4): 0x%08x",
2795                 gk20a_readl(g, pwr_pmu_idle_count_r(4)));
2796         gk20a_dbg_pmu("pwr_pmu_idle_count_r(7): 0x%08x",
2797                 gk20a_readl(g, pwr_pmu_idle_count_r(7)));
2798
2799         /*
2800          TBD: script can't generate those registers correctly
2801         gk20a_dbg_pmu("pwr_pmu_idle_status_r(): 0x%08x",
2802                 gk20a_readl(g, pwr_pmu_idle_status_r()));
2803         gk20a_dbg_pmu("pwr_pmu_pg_ctrl_r(): 0x%08x",
2804                 gk20a_readl(g, pwr_pmu_pg_ctrl_r()));
2805         */
2806 }
2807
2808 static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu)
2809 {
2810         struct gk20a *g = pmu->g;
2811         int i;
2812
2813         gk20a_err(dev_from_gk20a(g), "pwr_falcon_os_r : %d",
2814                 gk20a_readl(g, pwr_falcon_os_r()));
2815         gk20a_err(dev_from_gk20a(g), "pwr_falcon_cpuctl_r : 0x%x",
2816                 gk20a_readl(g, pwr_falcon_cpuctl_r()));
2817         gk20a_err(dev_from_gk20a(g), "pwr_falcon_idlestate_r : 0x%x",
2818                 gk20a_readl(g, pwr_falcon_idlestate_r()));
2819         gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox0_r : 0x%x",
2820                 gk20a_readl(g, pwr_falcon_mailbox0_r()));
2821         gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox1_r : 0x%x",
2822                 gk20a_readl(g, pwr_falcon_mailbox1_r()));
2823         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqstat_r : 0x%x",
2824                 gk20a_readl(g, pwr_falcon_irqstat_r()));
2825         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmode_r : 0x%x",
2826                 gk20a_readl(g, pwr_falcon_irqmode_r()));
2827         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmask_r : 0x%x",
2828                 gk20a_readl(g, pwr_falcon_irqmask_r()));
2829         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqdest_r : 0x%x",
2830                 gk20a_readl(g, pwr_falcon_irqdest_r()));
2831
2832         for (i = 0; i < pwr_pmu_mailbox__size_1_v(); i++)
2833                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_mailbox_r(%d) : 0x%x",
2834                         i, gk20a_readl(g, pwr_pmu_mailbox_r(i)));
2835
2836         for (i = 0; i < pwr_pmu_debug__size_1_v(); i++)
2837                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_debug_r(%d) : 0x%x",
2838                         i, gk20a_readl(g, pwr_pmu_debug_r(i)));
2839
2840         for (i = 0; i < 6/*NV_PPWR_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) {
2841                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2842                         pwr_pmu_falcon_icd_cmd_opc_rstat_f() |
2843                         pwr_pmu_falcon_icd_cmd_idx_f(i));
2844                 gk20a_err(dev_from_gk20a(g), "pmu_rstat (%d) : 0x%x",
2845                         i, gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2846         }
2847
2848         i = gk20a_readl(g, pwr_pmu_bar0_error_status_r());
2849         gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_error_status_r : 0x%x", i);
2850         if (i != 0) {
2851                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_addr_r : 0x%x",
2852                         gk20a_readl(g, pwr_pmu_bar0_addr_r()));
2853                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_data_r : 0x%x",
2854                         gk20a_readl(g, pwr_pmu_bar0_data_r()));
2855                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_timeout_r : 0x%x",
2856                         gk20a_readl(g, pwr_pmu_bar0_timeout_r()));
2857                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_ctl_r : 0x%x",
2858                         gk20a_readl(g, pwr_pmu_bar0_ctl_r()));
2859         }
2860
2861         i = gk20a_readl(g, pwr_pmu_bar0_fecs_error_r());
2862         gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_fecs_error_r : 0x%x", i);
2863
2864         i = gk20a_readl(g, pwr_falcon_exterrstat_r());
2865         gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterrstat_r : 0x%x", i);
2866         if (pwr_falcon_exterrstat_valid_v(i) ==
2867                         pwr_falcon_exterrstat_valid_true_v()) {
2868                 gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterraddr_r : 0x%x",
2869                         gk20a_readl(g, pwr_falcon_exterraddr_r()));
2870                 gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x",
2871                         gk20a_readl(g, mc_enable_r()));
2872         }
2873
2874         gk20a_err(dev_from_gk20a(g), "pwr_falcon_engctl_r : 0x%x",
2875                 gk20a_readl(g, pwr_falcon_engctl_r()));
2876         gk20a_err(dev_from_gk20a(g), "pwr_falcon_curctx_r : 0x%x",
2877                 gk20a_readl(g, pwr_falcon_curctx_r()));
2878         gk20a_err(dev_from_gk20a(g), "pwr_falcon_nxtctx_r : 0x%x",
2879                 gk20a_readl(g, pwr_falcon_nxtctx_r()));
2880
2881         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2882                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2883                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_IMB));
2884         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_IMB : 0x%x",
2885                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2886
2887         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2888                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2889                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_DMB));
2890         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_DMB : 0x%x",
2891                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2892
2893         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2894                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2895                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CSW));
2896         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CSW : 0x%x",
2897                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2898
2899         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2900                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2901                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CTX));
2902         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CTX : 0x%x",
2903                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2904
2905         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2906                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2907                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_EXCI));
2908         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_EXCI : 0x%x",
2909                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2910
2911         for (i = 0; i < 4; i++) {
2912                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2913                         pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2914                         pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_PC));
2915                 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_PC : 0x%x",
2916                         gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2917
2918                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2919                         pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2920                         pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_SP));
2921                 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_SP : 0x%x",
2922                         gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2923         }
2924
2925         /* PMU may crash due to FECS crash. Dump FECS status */
2926         gk20a_fecs_dump_falcon_stats(g);
2927 }
2928
2929 void gk20a_pmu_isr(struct gk20a *g)
2930 {
2931         struct pmu_gk20a *pmu = &g->pmu;
2932         struct pmu_queue *queue;
2933         u32 intr, mask;
2934         bool recheck = false;
2935
2936         gk20a_dbg_fn("");
2937
2938         mutex_lock(&pmu->isr_mutex);
2939
2940         mask = gk20a_readl(g, pwr_falcon_irqmask_r()) &
2941                 gk20a_readl(g, pwr_falcon_irqdest_r());
2942
2943         intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask;
2944
2945         gk20a_dbg_pmu("received falcon interrupt: 0x%08x", intr);
2946
2947         if (!intr) {
2948                 mutex_unlock(&pmu->isr_mutex);
2949                 return;
2950         }
2951
2952         if (intr & pwr_falcon_irqstat_halt_true_f()) {
2953                 gk20a_err(dev_from_gk20a(g),
2954                         "pmu halt intr not implemented");
2955                 pmu_dump_falcon_stats(pmu);
2956         }
2957         if (intr & pwr_falcon_irqstat_exterr_true_f()) {
2958                 gk20a_err(dev_from_gk20a(g),
2959                         "pmu exterr intr not implemented. Clearing interrupt.");
2960                 pmu_dump_falcon_stats(pmu);
2961
2962                 gk20a_writel(g, pwr_falcon_exterrstat_r(),
2963                         gk20a_readl(g, pwr_falcon_exterrstat_r()) &
2964                                 ~pwr_falcon_exterrstat_valid_m());
2965         }
2966         if (intr & pwr_falcon_irqstat_swgen0_true_f()) {
2967                 pmu_process_message(pmu);
2968                 recheck = true;
2969         }
2970
2971         gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);
2972
2973         if (recheck) {
2974                 queue = &pmu->queue[PMU_MESSAGE_QUEUE];
2975                 if (!pmu_queue_is_empty(pmu, queue))
2976                         gk20a_writel(g, pwr_falcon_irqsset_r(),
2977                                 pwr_falcon_irqsset_swgen0_set_f());
2978         }
2979
2980         mutex_unlock(&pmu->isr_mutex);
2981 }
2982
2983 static bool pmu_validate_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
2984                         struct pmu_msg *msg, struct pmu_payload *payload,
2985                         u32 queue_id)
2986 {
2987         struct gk20a *g = pmu->g;
2988         struct pmu_queue *queue;
2989         u32 in_size, out_size;
2990
2991         if (!PMU_IS_SW_COMMAND_QUEUE(queue_id))
2992                 goto invalid_cmd;
2993
2994         queue = &pmu->queue[queue_id];
2995         if (cmd->hdr.size < PMU_CMD_HDR_SIZE)
2996                 goto invalid_cmd;
2997
2998         if (cmd->hdr.size > (queue->size >> 1))
2999                 goto invalid_cmd;
3000
3001         if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE)
3002                 goto invalid_cmd;
3003
3004         if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id))
3005                 goto invalid_cmd;
3006
3007         if (payload == NULL)
3008                 return true;
3009
3010         if (payload->in.buf == NULL && payload->out.buf == NULL)
3011                 goto invalid_cmd;
3012
3013         if ((payload->in.buf != NULL && payload->in.size == 0) ||
3014             (payload->out.buf != NULL && payload->out.size == 0))
3015                 goto invalid_cmd;
3016
3017         in_size = PMU_CMD_HDR_SIZE;
3018         if (payload->in.buf) {
3019                 in_size += payload->in.offset;
3020                 in_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
3021         }
3022
3023         out_size = PMU_CMD_HDR_SIZE;
3024         if (payload->out.buf) {
3025                 out_size += payload->out.offset;
3026                 out_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
3027         }
3028
3029         if (in_size > cmd->hdr.size || out_size > cmd->hdr.size)
3030                 goto invalid_cmd;
3031
3032
3033         if ((payload->in.offset != 0 && payload->in.buf == NULL) ||
3034             (payload->out.offset != 0 && payload->out.buf == NULL))
3035                 goto invalid_cmd;
3036
3037         return true;
3038
3039 invalid_cmd:
3040         gk20a_err(dev_from_gk20a(g), "invalid pmu cmd :\n"
3041                 "queue_id=%d,\n"
3042                 "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n"
3043                 "payload in=%p, in_size=%d, in_offset=%d,\n"
3044                 "payload out=%p, out_size=%d, out_offset=%d",
3045                 queue_id, cmd->hdr.size, cmd->hdr.unit_id,
3046                 msg, msg?msg->hdr.unit_id:~0,
3047                 &payload->in, payload->in.size, payload->in.offset,
3048                 &payload->out, payload->out.size, payload->out.offset);
3049
3050         return false;
3051 }
3052
3053 static int pmu_write_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
3054                         u32 queue_id, unsigned long timeout)
3055 {
3056         struct gk20a *g = pmu->g;
3057         struct pmu_queue *queue;
3058         unsigned long end_jiffies = jiffies +
3059                 msecs_to_jiffies(timeout);
3060         int err;
3061
3062         gk20a_dbg_fn("");
3063
3064         queue = &pmu->queue[queue_id];
3065
3066         do {
3067                 err = pmu_queue_open_write(pmu, queue, cmd->hdr.size);
3068                 if (err == -EAGAIN && time_before(jiffies, end_jiffies))
3069                         usleep_range(1000, 2000);
3070                 else
3071                         break;
3072         } while (1);
3073
3074         if (err)
3075                 goto clean_up;
3076
3077         pmu_queue_push(pmu, queue, cmd, cmd->hdr.size);
3078
3079         err = pmu_queue_close(pmu, queue, true);
3080
3081 clean_up:
3082         if (err)
3083                 gk20a_err(dev_from_gk20a(g),
3084                         "fail to write cmd to queue %d", queue_id);
3085         else
3086                 gk20a_dbg_fn("done");
3087
3088         return err;
3089 }
3090
3091 int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
3092                 struct pmu_msg *msg, struct pmu_payload *payload,
3093                 u32 queue_id, pmu_callback callback, void* cb_param,
3094                 u32 *seq_desc, unsigned long timeout)
3095 {
3096         struct pmu_gk20a *pmu = &g->pmu;
3097         struct pmu_v *pv = &g->ops.pmu_ver;
3098         struct pmu_sequence *seq;
3099         void *in = NULL, *out = NULL;
3100         int err;
3101
3102         gk20a_dbg_fn("");
3103
3104         BUG_ON(!cmd);
3105         BUG_ON(!seq_desc);
3106         BUG_ON(!pmu->pmu_ready);
3107
3108         if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id))
3109                 return -EINVAL;
3110
3111         err = pmu_seq_acquire(pmu, &seq);
3112         if (err)
3113                 return err;
3114
3115         cmd->hdr.seq_id = seq->id;
3116
3117         cmd->hdr.ctrl_flags = 0;
3118         cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS;
3119         cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR;
3120
3121         seq->callback = callback;
3122         seq->cb_params = cb_param;
3123         seq->msg = msg;
3124         seq->out_payload = NULL;
3125         seq->desc = pmu->next_seq_desc++;
3126
3127         if (payload)
3128                 seq->out_payload = payload->out.buf;
3129
3130         *seq_desc = seq->desc;
3131
3132         if (payload && payload->in.offset != 0) {
3133                 pv->set_pmu_allocation_ptr(pmu, &in,
3134                 ((u8 *)&cmd->cmd + payload->in.offset));
3135
3136                 if (payload->in.buf != payload->out.buf)
3137                         pv->pmu_allocation_set_dmem_size(pmu, in,
3138                         (u16)payload->in.size);
3139                 else
3140                         pv->pmu_allocation_set_dmem_size(pmu, in,
3141                         (u16)max(payload->in.size, payload->out.size));
3142
3143                 err = pmu->dmem.alloc(&pmu->dmem,
3144                 pv->pmu_allocation_get_dmem_offset_addr(pmu, in),
3145                 pv->pmu_allocation_get_dmem_size(pmu, in));
3146                 if (err)
3147                         goto clean_up;
3148
3149                 pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu,
3150                 in)),
3151                         payload->in.buf, payload->in.size, 0);
3152                 pv->pmu_allocation_set_dmem_size(pmu,
3153                 pv->get_pmu_seq_in_a_ptr(seq),
3154                 pv->pmu_allocation_get_dmem_size(pmu, in));
3155                 pv->pmu_allocation_set_dmem_offset(pmu,
3156                 pv->get_pmu_seq_in_a_ptr(seq),
3157                 pv->pmu_allocation_get_dmem_offset(pmu, in));
3158         }
3159
3160         if (payload && payload->out.offset != 0) {
3161                 pv->set_pmu_allocation_ptr(pmu, &out,
3162                 ((u8 *)&cmd->cmd + payload->out.offset));
3163                 pv->pmu_allocation_set_dmem_size(pmu, out,
3164                 (u16)payload->out.size);
3165
3166                 if (payload->out.buf != payload->in.buf) {
3167                         err = pmu->dmem.alloc(&pmu->dmem,
3168                         pv->pmu_allocation_get_dmem_offset_addr(pmu, out),
3169                         pv->pmu_allocation_get_dmem_size(pmu, out));
3170                         if (err)
3171                                 goto clean_up;
3172                 } else {
3173                         BUG_ON(in == NULL);
3174                         pv->pmu_allocation_set_dmem_offset(pmu, out,
3175                         pv->pmu_allocation_get_dmem_offset(pmu, in));
3176                 }
3177
3178                 pv->pmu_allocation_set_dmem_size(pmu,
3179                 pv->get_pmu_seq_out_a_ptr(seq),
3180                 pv->pmu_allocation_get_dmem_size(pmu, out));
3181                 pv->pmu_allocation_set_dmem_offset(pmu,
3182                 pv->get_pmu_seq_out_a_ptr(seq),
3183                 pv->pmu_allocation_get_dmem_offset(pmu, out));
3184         }
3185
3186         seq->state = PMU_SEQ_STATE_USED;
3187         err = pmu_write_cmd(pmu, cmd, queue_id, timeout);
3188         if (err)
3189                 seq->state = PMU_SEQ_STATE_PENDING;
3190
3191         gk20a_dbg_fn("done");
3192
3193         return 0;
3194
3195 clean_up:
3196         gk20a_dbg_fn("fail");
3197         if (in)
3198                 pmu->dmem.free(&pmu->dmem,
3199                 pv->pmu_allocation_get_dmem_offset(pmu, in),
3200                 pv->pmu_allocation_get_dmem_size(pmu, in));
3201         if (out)
3202                 pmu->dmem.free(&pmu->dmem,
3203                 pv->pmu_allocation_get_dmem_offset(pmu, out),
3204                 pv->pmu_allocation_get_dmem_size(pmu, out));
3205
3206         pmu_seq_release(pmu, seq);
3207         return err;
3208 }
3209
3210 static int gk20a_pmu_enable_elpg_locked(struct gk20a *g)
3211 {
3212         struct pmu_gk20a *pmu = &g->pmu;
3213         struct pmu_cmd cmd;
3214         u32 seq, status;
3215
3216         gk20a_dbg_fn("");
3217
3218         memset(&cmd, 0, sizeof(struct pmu_cmd));
3219         cmd.hdr.unit_id = PMU_UNIT_PG;
3220         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
3221         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
3222         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
3223         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_ALLOW;
3224
3225         /* no need to wait ack for ELPG enable but set pending to sync
3226            with follow up ELPG disable */
3227         pmu->elpg_stat = PMU_ELPG_STAT_ON_PENDING;
3228
3229         status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3230                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
3231
3232         BUG_ON(status != 0);
3233
3234         gk20a_dbg_fn("done");
3235         return 0;
3236 }
3237
3238 int gk20a_pmu_enable_elpg(struct gk20a *g)
3239 {
3240         struct pmu_gk20a *pmu = &g->pmu;
3241         struct gr_gk20a *gr = &g->gr;
3242
3243         int ret = 0;
3244
3245         gk20a_dbg_fn("");
3246
3247         if (!pmu->elpg_ready || !pmu->initialized)
3248                 goto exit;
3249
3250         mutex_lock(&pmu->elpg_mutex);
3251
3252         pmu->elpg_refcnt++;
3253         if (pmu->elpg_refcnt <= 0)
3254                 goto exit_unlock;
3255
3256         /* something is not right if we end up in following code path */
3257         if (unlikely(pmu->elpg_refcnt > 1)) {
3258                 gk20a_warn(dev_from_gk20a(g),
3259                 "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
3260                 __func__, pmu->elpg_refcnt);
3261                 WARN_ON(1);
3262         }
3263
3264         /* do NOT enable elpg until golden ctx is created,
3265            which is related with the ctx that ELPG save and restore. */
3266         if (unlikely(!gr->ctx_vars.golden_image_initialized))
3267                 goto exit_unlock;
3268
3269         /* return if ELPG is already on or on_pending or off_on_pending */
3270         if (pmu->elpg_stat != PMU_ELPG_STAT_OFF)
3271                 goto exit_unlock;
3272
3273         /* if ELPG is not allowed right now, mark that it should be enabled
3274          * immediately after it is allowed */
3275         if (!pmu->elpg_enable_allow) {
3276                 pmu->elpg_stat = PMU_ELPG_STAT_OFF_ON_PENDING;
3277                 goto exit_unlock;
3278         }
3279
3280         ret = gk20a_pmu_enable_elpg_locked(g);
3281
3282 exit_unlock:
3283         mutex_unlock(&pmu->elpg_mutex);
3284 exit:
3285         gk20a_dbg_fn("done");
3286         return ret;
3287 }
3288
3289 static void pmu_elpg_enable_allow(struct work_struct *work)
3290 {
3291         struct pmu_gk20a *pmu = container_of(to_delayed_work(work),
3292                                         struct pmu_gk20a, elpg_enable);
3293
3294         gk20a_dbg_fn("");
3295
3296         mutex_lock(&pmu->elpg_mutex);
3297
3298         /* It is ok to enabled powergating now */
3299         pmu->elpg_enable_allow = true;
3300
3301         /* do we have pending requests? */
3302         if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) {
3303                 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
3304                 gk20a_pmu_enable_elpg_locked(pmu->g);
3305         }
3306
3307         mutex_unlock(&pmu->elpg_mutex);
3308
3309         gk20a_dbg_fn("done");
3310 }
3311
3312 static int gk20a_pmu_disable_elpg_defer_enable(struct gk20a *g, bool enable)
3313 {
3314         struct pmu_gk20a *pmu = &g->pmu;
3315         struct pmu_cmd cmd;
3316         u32 seq;
3317         int ret = 0;
3318
3319         gk20a_dbg_fn("");
3320
3321         if (!pmu->elpg_ready || !pmu->initialized)
3322                 return 0;
3323
3324         /* remove the work from queue */
3325         cancel_delayed_work_sync(&pmu->elpg_enable);
3326
3327         mutex_lock(&pmu->elpg_mutex);
3328
3329         pmu->elpg_refcnt--;
3330         if (pmu->elpg_refcnt > 0) {
3331                 gk20a_warn(dev_from_gk20a(g),
3332                 "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
3333                 __func__, pmu->elpg_refcnt);
3334                 WARN_ON(1);
3335                 ret = 0;
3336                 goto exit_unlock;
3337         }
3338
3339         /* cancel off_on_pending and return */
3340         if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) {
3341                 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
3342                 ret = 0;
3343                 goto exit_reschedule;
3344         }
3345         /* wait if on_pending */
3346         else if (pmu->elpg_stat == PMU_ELPG_STAT_ON_PENDING) {
3347
3348                 pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
3349                                       &pmu->elpg_stat, PMU_ELPG_STAT_ON);
3350
3351                 if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
3352                         gk20a_err(dev_from_gk20a(g),
3353                                 "ELPG_ALLOW_ACK failed, elpg_stat=%d",
3354                                 pmu->elpg_stat);
3355                         pmu_dump_elpg_stats(pmu);
3356                         pmu_dump_falcon_stats(pmu);
3357                         ret = -EBUSY;
3358                         goto exit_unlock;
3359                 }
3360         }
3361         /* return if ELPG is already off */
3362         else if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
3363                 ret = 0;
3364                 goto exit_reschedule;
3365         }
3366
3367         memset(&cmd, 0, sizeof(struct pmu_cmd));
3368         cmd.hdr.unit_id = PMU_UNIT_PG;
3369         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
3370         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
3371         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
3372         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
3373
3374         pmu->elpg_stat = PMU_ELPG_STAT_OFF_PENDING;
3375
3376         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3377                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
3378
3379         pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
3380                               &pmu->elpg_stat, PMU_ELPG_STAT_OFF);
3381         if (pmu->elpg_stat != PMU_ELPG_STAT_OFF) {
3382                 gk20a_err(dev_from_gk20a(g),
3383                         "ELPG_DISALLOW_ACK failed");
3384                 pmu_dump_elpg_stats(pmu);
3385                 pmu_dump_falcon_stats(pmu);
3386                 ret = -EBUSY;
3387                 goto exit_unlock;
3388         }
3389
3390 exit_reschedule:
3391         if (enable) {
3392                 pmu->elpg_enable_allow = false;
3393                 schedule_delayed_work(&pmu->elpg_enable,
3394                         msecs_to_jiffies(PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC));
3395         } else
3396                 pmu->elpg_enable_allow = true;
3397
3398
3399 exit_unlock:
3400         mutex_unlock(&pmu->elpg_mutex);
3401         gk20a_dbg_fn("done");
3402         return ret;
3403 }
3404
3405 int gk20a_pmu_disable_elpg(struct gk20a *g)
3406 {
3407         return gk20a_pmu_disable_elpg_defer_enable(g, true);
3408 }
3409
3410 int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable)
3411 {
3412         struct pmu_gk20a *pmu = &g->pmu;
3413         int err;
3414
3415         gk20a_dbg_fn("");
3416
3417         if (enable)
3418                 err = pmu_perfmon_start_sampling(pmu);
3419         else
3420                 err = pmu_perfmon_stop_sampling(pmu);
3421
3422         return err;
3423 }
3424
3425 int gk20a_pmu_destroy(struct gk20a *g)
3426 {
3427         struct pmu_gk20a *pmu = &g->pmu;
3428         u32 elpg_ingating_time, elpg_ungating_time, gating_cnt;
3429
3430         gk20a_dbg_fn("");
3431
3432         if (!support_gk20a_pmu())
3433                 return 0;
3434
3435         /* make sure the pending operations are finished before we continue */
3436         cancel_delayed_work_sync(&pmu->elpg_enable);
3437         cancel_work_sync(&pmu->pg_init);
3438
3439         gk20a_pmu_get_elpg_residency_gating(g, &elpg_ingating_time,
3440                 &elpg_ungating_time, &gating_cnt);
3441
3442         gk20a_pmu_disable_elpg_defer_enable(g, false);
3443         pmu->initialized = false;
3444
3445         /* update the s/w ELPG residency counters */
3446         g->pg_ingating_time_us += (u64)elpg_ingating_time;
3447         g->pg_ungating_time_us += (u64)elpg_ungating_time;
3448         g->pg_gating_cnt += gating_cnt;
3449
3450         pmu_enable(pmu, false);
3451
3452         if (pmu->remove_support) {
3453                 pmu->remove_support(pmu);
3454                 pmu->remove_support = NULL;
3455         }
3456
3457         gk20a_dbg_fn("done");
3458         return 0;
3459 }
3460
3461 int gk20a_pmu_load_norm(struct gk20a *g, u32 *load)
3462 {
3463         struct pmu_gk20a *pmu = &g->pmu;
3464         u16 _load = 0;
3465
3466         if (!pmu->perfmon_ready) {
3467                 *load = 0;
3468                 return 0;
3469         }
3470
3471         pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0);
3472         *load = _load / 10;
3473
3474         return 0;
3475 }
3476
3477 void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
3478                                  u32 *total_cycles)
3479 {
3480         if (!g->power_on) {
3481                 *busy_cycles = 0;
3482                 *total_cycles = 0;
3483                 return;
3484         }
3485
3486         gk20a_busy(g->dev);
3487         *busy_cycles = pwr_pmu_idle_count_value_v(
3488                 gk20a_readl(g, pwr_pmu_idle_count_r(1)));
3489         rmb();
3490         *total_cycles = pwr_pmu_idle_count_value_v(
3491                 gk20a_readl(g, pwr_pmu_idle_count_r(2)));
3492         gk20a_idle(g->dev);
3493 }
3494
3495 void gk20a_pmu_reset_load_counters(struct gk20a *g)
3496 {
3497         u32 reg_val = pwr_pmu_idle_count_reset_f(1);
3498
3499         if (!g->power_on)
3500                 return;
3501
3502         gk20a_busy(g->dev);
3503         gk20a_writel(g, pwr_pmu_idle_count_r(2), reg_val);
3504         wmb();
3505         gk20a_writel(g, pwr_pmu_idle_count_r(1), reg_val);
3506         gk20a_idle(g->dev);
3507 }
3508
3509 static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
3510                         u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt)
3511 {
3512         struct pmu_gk20a *pmu = &g->pmu;
3513         struct pmu_pg_stats stats;
3514
3515         if (!pmu->initialized) {
3516                 *ingating_time = 0;
3517                 *ungating_time = 0;
3518                 *gating_cnt = 0;
3519                 return 0;
3520         }
3521
3522         pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
3523                 (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
3524
3525         *ingating_time = stats.pg_ingating_time_us;
3526         *ungating_time = stats.pg_ungating_time_us;
3527         *gating_cnt = stats.pg_gating_cnt;
3528
3529         return 0;
3530 }
3531
3532 /* Send an Adaptive Power (AP) related command to PMU */
3533 static int gk20a_pmu_ap_send_command(struct gk20a *g,
3534                         union pmu_ap_cmd *p_ap_cmd, bool b_block)
3535 {
3536         struct pmu_gk20a *pmu = &g->pmu;
3537         /* FIXME: where is the PG structure defined?? */
3538         u32 status = 0;
3539         struct pmu_cmd cmd;
3540         u32 seq;
3541         pmu_callback p_callback = NULL;
3542
3543         memset(&cmd, 0, sizeof(struct pmu_cmd));
3544
3545         /* Copy common members */
3546         cmd.hdr.unit_id = PMU_UNIT_PG;
3547         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(union pmu_ap_cmd);
3548
3549         cmd.cmd.pg.ap_cmd.cmn.cmd_type = PMU_PG_CMD_ID_AP;
3550         cmd.cmd.pg.ap_cmd.cmn.cmd_id = p_ap_cmd->cmn.cmd_id;
3551
3552         /* Copy other members of command */
3553         switch (p_ap_cmd->cmn.cmd_id) {
3554         case PMU_AP_CMD_ID_INIT:
3555                 cmd.cmd.pg.ap_cmd.init.pg_sampling_period_us =
3556                         p_ap_cmd->init.pg_sampling_period_us;
3557                 p_callback = ap_callback_init_and_enable_ctrl;
3558                 break;
3559
3560         case PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL:
3561                 cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.ctrl_id =
3562                 p_ap_cmd->init_and_enable_ctrl.ctrl_id;
3563                 memcpy(
3564                 (void *)&(cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.params),
3565                         (void *)&(p_ap_cmd->init_and_enable_ctrl.params),
3566                         sizeof(struct pmu_ap_ctrl_init_params));
3567
3568                 p_callback = ap_callback_init_and_enable_ctrl;
3569                 break;
3570
3571         case PMU_AP_CMD_ID_ENABLE_CTRL:
3572                 cmd.cmd.pg.ap_cmd.enable_ctrl.ctrl_id =
3573                         p_ap_cmd->enable_ctrl.ctrl_id;
3574                 break;
3575
3576         case PMU_AP_CMD_ID_DISABLE_CTRL:
3577                 cmd.cmd.pg.ap_cmd.disable_ctrl.ctrl_id =
3578                         p_ap_cmd->disable_ctrl.ctrl_id;
3579                 break;
3580
3581         case PMU_AP_CMD_ID_KICK_CTRL:
3582                 cmd.cmd.pg.ap_cmd.kick_ctrl.ctrl_id =
3583                         p_ap_cmd->kick_ctrl.ctrl_id;
3584                 cmd.cmd.pg.ap_cmd.kick_ctrl.skip_count =
3585                         p_ap_cmd->kick_ctrl.skip_count;
3586                 break;
3587
3588         default:
3589                 gk20a_dbg_pmu("%s: Invalid Adaptive Power command %d\n",
3590                         __func__, p_ap_cmd->cmn.cmd_id);
3591                 return 0x2f;
3592         }
3593
3594         status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3595                         p_callback, pmu, &seq, ~0);
3596
3597         if (!status) {
3598                 gk20a_dbg_pmu(
3599                         "%s: Unable to submit Adaptive Power Command %d\n",
3600                         __func__, p_ap_cmd->cmn.cmd_id);
3601                 goto err_return;
3602         }
3603
3604         /* TODO: Implement blocking calls (b_block) */
3605
3606 err_return:
3607         return status;
3608 }
3609
3610 static void ap_callback_init_and_enable_ctrl(
3611                 struct gk20a *g, struct pmu_msg *msg,
3612                 void *param, u32 seq_desc, u32 status)
3613 {
3614         /* Define p_ap (i.e pointer to pmu_ap structure) */
3615         WARN_ON(!msg);
3616
3617         if (!status) {
3618                 switch (msg->msg.pg.ap_msg.cmn.msg_id) {
3619                 case PMU_AP_MSG_ID_INIT_ACK:
3620                         break;
3621
3622                 default:
3623                         gk20a_dbg_pmu(
3624                         "%s: Invalid Adaptive Power Message: %x\n",
3625                         __func__, msg->msg.pg.ap_msg.cmn.msg_id);
3626                         break;
3627                 }
3628         }
3629 }
3630
3631 static int gk20a_aelpg_init(struct gk20a *g)
3632 {
3633         int status = 0;
3634
3635         /* Remove reliance on app_ctrl field. */
3636         union pmu_ap_cmd ap_cmd;
3637
3638         /* TODO: Check for elpg being ready? */
3639         ap_cmd.init.cmd_id = PMU_AP_CMD_ID_INIT;
3640         ap_cmd.init.pg_sampling_period_us =
3641                 APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US;
3642
3643         status = gk20a_pmu_ap_send_command(g, &ap_cmd, false);
3644         return status;
3645 }
3646
3647 static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id)
3648 {
3649         int status = 0;
3650         union pmu_ap_cmd ap_cmd;
3651
3652         /* TODO: Probably check if ELPG is ready? */
3653
3654         ap_cmd.init_and_enable_ctrl.cmd_id = PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL;
3655         ap_cmd.init_and_enable_ctrl.ctrl_id = ctrl_id;
3656         ap_cmd.init_and_enable_ctrl.params.min_idle_filter_us =
3657                 APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US;
3658         ap_cmd.init_and_enable_ctrl.params.min_target_saving_us =
3659                 APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US;
3660         ap_cmd.init_and_enable_ctrl.params.power_break_even_us =
3661                 APCTRL_POWER_BREAKEVEN_DEFAULT_US;
3662         ap_cmd.init_and_enable_ctrl.params.cycles_per_sample_max =
3663                 APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT;
3664
3665         switch (ctrl_id) {
3666         case PMU_AP_CTRL_ID_GRAPHICS:
3667                 break;
3668         default:
3669                 break;
3670         }
3671
3672         status = gk20a_pmu_ap_send_command(g, &ap_cmd, true);
3673         return status;
3674 }
3675
3676 #if CONFIG_DEBUG_FS
3677 static int elpg_residency_show(struct seq_file *s, void *data)
3678 {
3679         struct gk20a *g = s->private;
3680         u32 ingating_time = 0;
3681         u32 ungating_time = 0;
3682         u32 gating_cnt;
3683         u64 total_ingating, total_ungating, residency, divisor, dividend;
3684
3685         /* Don't unnecessarily power on the device */
3686         if (g->power_on) {
3687                 gk20a_busy(g->dev);
3688                 gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
3689                         &ungating_time, &gating_cnt);
3690                 gk20a_idle(g->dev);
3691         }
3692         total_ingating = g->pg_ingating_time_us + (u64)ingating_time;
3693         total_ungating = g->pg_ungating_time_us + (u64)ungating_time;
3694         divisor = total_ingating + total_ungating;
3695
3696         /* We compute the residency on a scale of 1000 */
3697         dividend = total_ingating * 1000;
3698
3699         if (divisor)
3700                 residency = div64_u64(dividend, divisor);
3701         else
3702                 residency = 0;
3703
3704         seq_printf(s, "Time in ELPG: %llu us\n"
3705                         "Time out of ELPG: %llu us\n"
3706                         "ELPG residency ratio: %llu\n",
3707                         total_ingating, total_ungating, residency);
3708         return 0;
3709
3710 }
3711
3712 static int elpg_residency_open(struct inode *inode, struct file *file)
3713 {
3714         return single_open(file, elpg_residency_show, inode->i_private);
3715 }
3716
3717 static const struct file_operations elpg_residency_fops = {
3718         .open           = elpg_residency_open,
3719         .read           = seq_read,
3720         .llseek         = seq_lseek,
3721         .release        = single_release,
3722 };
3723
3724 static int elpg_transitions_show(struct seq_file *s, void *data)
3725 {
3726         struct gk20a *g = s->private;
3727         u32 ingating_time, ungating_time, total_gating_cnt;
3728         u32 gating_cnt = 0;
3729
3730         if (g->power_on) {
3731                 gk20a_busy(g->dev);
3732                 gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
3733                         &ungating_time, &gating_cnt);
3734                 gk20a_idle(g->dev);
3735         }
3736         total_gating_cnt = g->pg_gating_cnt + gating_cnt;
3737
3738         seq_printf(s, "%u\n", total_gating_cnt);
3739         return 0;
3740
3741 }
3742
3743 static int elpg_transitions_open(struct inode *inode, struct file *file)
3744 {
3745         return single_open(file, elpg_transitions_show, inode->i_private);
3746 }
3747
3748 static const struct file_operations elpg_transitions_fops = {
3749         .open           = elpg_transitions_open,
3750         .read           = seq_read,
3751         .llseek         = seq_lseek,
3752         .release        = single_release,
3753 };
3754
3755 int gk20a_pmu_debugfs_init(struct platform_device *dev)
3756 {
3757         struct dentry *d;
3758         struct gk20a_platform *platform = platform_get_drvdata(dev);
3759         struct gk20a *g = get_gk20a(dev);
3760
3761         d = debugfs_create_file(
3762                 "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
3763                                                 &elpg_residency_fops);
3764         if (!d)
3765                 goto err_out;
3766
3767         d = debugfs_create_file(
3768                 "elpg_transitions", S_IRUGO, platform->debugfs, g,
3769                                                 &elpg_transitions_fops);
3770         if (!d)
3771                 goto err_out;
3772
3773         return 0;
3774
3775 err_out:
3776         pr_err("%s: Failed to make debugfs node\n", __func__);
3777         debugfs_remove_recursive(platform->debugfs);
3778         return -ENOMEM;
3779 }
3780 #endif