gk20a: Moved bind fecs to init_gr_support
[linux-3.10.git] / drivers / gpu / nvgpu / gk20a / pmu_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/pmu_gk20a.c
3  *
4  * GK20A PMU (aka. gPMU outside gk20a context)
5  *
6  * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/delay.h>        /* for mdelay */
23 #include <linux/firmware.h>
24 #include <linux/clk.h>
25 #include <linux/module.h>
26 #include <linux/debugfs.h>
27 #include <linux/dma-mapping.h>
28
29 #include "gk20a.h"
30 #include "gr_gk20a.h"
31 #include "hw_mc_gk20a.h"
32 #include "hw_pwr_gk20a.h"
33 #include "hw_top_gk20a.h"
34
35 #define GK20A_PMU_UCODE_IMAGE   "gpmu_ucode.bin"
36
37 #define gk20a_dbg_pmu(fmt, arg...) \
38         gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
39
40 static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu);
41 static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
42                 u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt);
43 static void pmu_setup_hw(struct work_struct *work);
44 static void ap_callback_init_and_enable_ctrl(
45                 struct gk20a *g, struct pmu_msg *msg,
46                 void *param, u32 seq_desc, u32 status);
47 static int gk20a_pmu_ap_send_command(struct gk20a *g,
48                         union pmu_ap_cmd *p_ap_cmd, bool b_block);
49
50 static int pmu_init_powergating(struct gk20a *g);
51
52 static u32 pmu_cmdline_size_v0(struct pmu_gk20a *pmu)
53 {
54         return sizeof(struct pmu_cmdline_args_v0);
55 }
56
57 static u32 pmu_cmdline_size_v1(struct pmu_gk20a *pmu)
58 {
59         return sizeof(struct pmu_cmdline_args_v1);
60 }
61
62 static void set_pmu_cmdline_args_cpufreq_v1(struct pmu_gk20a *pmu, u32 freq)
63 {
64         pmu->args_v1.cpu_freq_hz = freq;
65 }
66
67 static void set_pmu_cmdline_args_cpufreq_v0(struct pmu_gk20a *pmu, u32 freq)
68 {
69         pmu->args_v0.cpu_freq_hz = freq;
70 }
71
72 static void *get_pmu_cmdline_args_ptr_v1(struct pmu_gk20a *pmu)
73 {
74         return (void *)(&pmu->args_v1);
75 }
76
77 static void *get_pmu_cmdline_args_ptr_v0(struct pmu_gk20a *pmu)
78 {
79         return (void *)(&pmu->args_v0);
80 }
81
82 static u32 get_pmu_allocation_size_v1(struct pmu_gk20a *pmu)
83 {
84         return sizeof(struct pmu_allocation_v1);
85 }
86
87 static u32 get_pmu_allocation_size_v0(struct pmu_gk20a *pmu)
88 {
89         return sizeof(struct pmu_allocation_v0);
90 }
91
92 static void set_pmu_allocation_ptr_v1(struct pmu_gk20a *pmu,
93         void **pmu_alloc_ptr, void *assign_ptr)
94 {
95         struct pmu_allocation_v1 **pmu_a_ptr =
96                 (struct pmu_allocation_v1 **)pmu_alloc_ptr;
97         *pmu_a_ptr = (struct pmu_allocation_v1 *)assign_ptr;
98 }
99
100 static void set_pmu_allocation_ptr_v0(struct pmu_gk20a *pmu,
101         void **pmu_alloc_ptr, void *assign_ptr)
102 {
103         struct pmu_allocation_v0 **pmu_a_ptr =
104                 (struct pmu_allocation_v0 **)pmu_alloc_ptr;
105         *pmu_a_ptr = (struct pmu_allocation_v0 *)assign_ptr;
106 }
107
108 static void pmu_allocation_set_dmem_size_v1(struct pmu_gk20a *pmu,
109         void *pmu_alloc_ptr, u16 size)
110 {
111         struct pmu_allocation_v1 *pmu_a_ptr =
112                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
113         pmu_a_ptr->alloc.dmem.size = size;
114 }
115
116 static void pmu_allocation_set_dmem_size_v0(struct pmu_gk20a *pmu,
117         void *pmu_alloc_ptr, u16 size)
118 {
119         struct pmu_allocation_v0 *pmu_a_ptr =
120                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
121         pmu_a_ptr->alloc.dmem.size = size;
122 }
123
124 static u16 pmu_allocation_get_dmem_size_v1(struct pmu_gk20a *pmu,
125         void *pmu_alloc_ptr)
126 {
127         struct pmu_allocation_v1 *pmu_a_ptr =
128                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
129         return pmu_a_ptr->alloc.dmem.size;
130 }
131
132 static u16 pmu_allocation_get_dmem_size_v0(struct pmu_gk20a *pmu,
133         void *pmu_alloc_ptr)
134 {
135         struct pmu_allocation_v0 *pmu_a_ptr =
136                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
137         return pmu_a_ptr->alloc.dmem.size;
138 }
139
140 static u32 pmu_allocation_get_dmem_offset_v1(struct pmu_gk20a *pmu,
141         void *pmu_alloc_ptr)
142 {
143         struct pmu_allocation_v1 *pmu_a_ptr =
144                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
145         return pmu_a_ptr->alloc.dmem.offset;
146 }
147
148 static u32 pmu_allocation_get_dmem_offset_v0(struct pmu_gk20a *pmu,
149         void *pmu_alloc_ptr)
150 {
151         struct pmu_allocation_v0 *pmu_a_ptr =
152                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
153         return pmu_a_ptr->alloc.dmem.offset;
154 }
155
156 static u32 *pmu_allocation_get_dmem_offset_addr_v1(struct pmu_gk20a *pmu,
157         void *pmu_alloc_ptr)
158 {
159         struct pmu_allocation_v1 *pmu_a_ptr =
160                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
161         return &pmu_a_ptr->alloc.dmem.offset;
162 }
163
164 static u32 *pmu_allocation_get_dmem_offset_addr_v0(struct pmu_gk20a *pmu,
165         void *pmu_alloc_ptr)
166 {
167         struct pmu_allocation_v0 *pmu_a_ptr =
168                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
169         return &pmu_a_ptr->alloc.dmem.offset;
170 }
171
172 static void pmu_allocation_set_dmem_offset_v1(struct pmu_gk20a *pmu,
173         void *pmu_alloc_ptr, u32 offset)
174 {
175         struct pmu_allocation_v1 *pmu_a_ptr =
176                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
177         pmu_a_ptr->alloc.dmem.offset = offset;
178 }
179
180 static void pmu_allocation_set_dmem_offset_v0(struct pmu_gk20a *pmu,
181         void *pmu_alloc_ptr, u32 offset)
182 {
183         struct pmu_allocation_v0 *pmu_a_ptr =
184                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
185         pmu_a_ptr->alloc.dmem.offset = offset;
186 }
187
188 static void *get_pmu_msg_pmu_init_msg_ptr_v1(struct pmu_init_msg *init)
189 {
190         return (void *)(&(init->pmu_init_v1));
191 }
192
193 static u16 get_pmu_init_msg_pmu_sw_mg_off_v1(union pmu_init_msg_pmu *init_msg)
194 {
195         struct pmu_init_msg_pmu_v1 *init =
196                 (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
197         return init->sw_managed_area_offset;
198 }
199
200 static u16 get_pmu_init_msg_pmu_sw_mg_size_v1(union pmu_init_msg_pmu *init_msg)
201 {
202         struct pmu_init_msg_pmu_v1 *init =
203                 (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
204         return init->sw_managed_area_size;
205 }
206
207 static void *get_pmu_msg_pmu_init_msg_ptr_v0(struct pmu_init_msg *init)
208 {
209         return (void *)(&(init->pmu_init_v0));
210 }
211
212 static u16 get_pmu_init_msg_pmu_sw_mg_off_v0(union pmu_init_msg_pmu *init_msg)
213 {
214         struct pmu_init_msg_pmu_v0 *init =
215                 (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
216         return init->sw_managed_area_offset;
217 }
218
219 static u16 get_pmu_init_msg_pmu_sw_mg_size_v0(union pmu_init_msg_pmu *init_msg)
220 {
221         struct pmu_init_msg_pmu_v0 *init =
222                 (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
223         return init->sw_managed_area_size;
224 }
225
226 static u32 get_pmu_perfmon_cmd_start_size_v1(void)
227 {
228         return sizeof(struct pmu_perfmon_cmd_start_v1);
229 }
230
231 static u32 get_pmu_perfmon_cmd_start_size_v0(void)
232 {
233         return sizeof(struct pmu_perfmon_cmd_start_v0);
234 }
235
236 static int get_perfmon_cmd_start_offsetofvar_v1(
237         enum pmu_perfmon_cmd_start_fields field)
238 {
239         switch (field) {
240         case COUNTER_ALLOC:
241                 return offsetof(struct pmu_perfmon_cmd_start_v1,
242                 counter_alloc);
243         default:
244                 return -EINVAL;
245                 break;
246         }
247         return 0;
248 }
249
250 static int get_perfmon_cmd_start_offsetofvar_v0(
251         enum pmu_perfmon_cmd_start_fields field)
252 {
253         switch (field) {
254         case COUNTER_ALLOC:
255                 return offsetof(struct pmu_perfmon_cmd_start_v0,
256                 counter_alloc);
257         default:
258                 return -EINVAL;
259                 break;
260         }
261         return 0;
262 }
263
264 static u32 get_pmu_perfmon_cmd_init_size_v1(void)
265 {
266         return sizeof(struct pmu_perfmon_cmd_init_v1);
267 }
268
269 static u32 get_pmu_perfmon_cmd_init_size_v0(void)
270 {
271         return sizeof(struct pmu_perfmon_cmd_init_v0);
272 }
273
274 static int get_perfmon_cmd_init_offsetofvar_v1(
275         enum pmu_perfmon_cmd_start_fields field)
276 {
277         switch (field) {
278         case COUNTER_ALLOC:
279                 return offsetof(struct pmu_perfmon_cmd_init_v1,
280                 counter_alloc);
281         default:
282                 return -EINVAL;
283                 break;
284         }
285         return 0;
286 }
287
288 static int get_perfmon_cmd_init_offsetofvar_v0(
289         enum pmu_perfmon_cmd_start_fields field)
290 {
291         switch (field) {
292         case COUNTER_ALLOC:
293                 return offsetof(struct pmu_perfmon_cmd_init_v0,
294                 counter_alloc);
295         default:
296                 return -EINVAL;
297                 break;
298         }
299         return 0;
300 }
301
302 static void perfmon_start_set_cmd_type_v1(struct pmu_perfmon_cmd *pc, u8 value)
303 {
304         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
305         start->cmd_type = value;
306 }
307
308 static void perfmon_start_set_cmd_type_v0(struct pmu_perfmon_cmd *pc, u8 value)
309 {
310         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
311         start->cmd_type = value;
312 }
313
314 static void perfmon_start_set_group_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
315 {
316         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
317         start->group_id = value;
318 }
319
320 static void perfmon_start_set_group_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
321 {
322         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
323         start->group_id = value;
324 }
325
326 static void perfmon_start_set_state_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
327 {
328         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
329         start->state_id = value;
330 }
331
332 static void perfmon_start_set_state_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
333 {
334         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
335         start->state_id = value;
336 }
337
338 static void perfmon_start_set_flags_v1(struct pmu_perfmon_cmd *pc, u8 value)
339 {
340         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
341         start->flags = value;
342 }
343
344 static void perfmon_start_set_flags_v0(struct pmu_perfmon_cmd *pc, u8 value)
345 {
346         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
347         start->flags = value;
348 }
349
350 static u8 perfmon_start_get_flags_v1(struct pmu_perfmon_cmd *pc)
351 {
352         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
353         return start->flags;
354 }
355
356 static u8 perfmon_start_get_flags_v0(struct pmu_perfmon_cmd *pc)
357 {
358         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
359         return start->flags;
360 }
361
362 static void perfmon_cmd_init_set_sample_buffer_v1(struct pmu_perfmon_cmd *pc,
363         u16 value)
364 {
365         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
366         init->sample_buffer = value;
367 }
368
369 static void perfmon_cmd_init_set_sample_buffer_v0(struct pmu_perfmon_cmd *pc,
370         u16 value)
371 {
372         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
373         init->sample_buffer = value;
374 }
375
376 static void perfmon_cmd_init_set_dec_cnt_v1(struct pmu_perfmon_cmd *pc,
377         u8 value)
378 {
379         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
380         init->to_decrease_count = value;
381 }
382
383 static void perfmon_cmd_init_set_dec_cnt_v0(struct pmu_perfmon_cmd *pc,
384         u8 value)
385 {
386         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
387         init->to_decrease_count = value;
388 }
389
390 static void perfmon_cmd_init_set_base_cnt_id_v1(struct pmu_perfmon_cmd *pc,
391         u8 value)
392 {
393         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
394         init->base_counter_id = value;
395 }
396
397 static void perfmon_cmd_init_set_base_cnt_id_v0(struct pmu_perfmon_cmd *pc,
398         u8 value)
399 {
400         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
401         init->base_counter_id = value;
402 }
403
404 static void perfmon_cmd_init_set_samp_period_us_v1(struct pmu_perfmon_cmd *pc,
405         u32 value)
406 {
407         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
408         init->sample_period_us = value;
409 }
410
411 static void perfmon_cmd_init_set_samp_period_us_v0(struct pmu_perfmon_cmd *pc,
412         u32 value)
413 {
414         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
415         init->sample_period_us = value;
416 }
417
418 static void perfmon_cmd_init_set_num_cnt_v1(struct pmu_perfmon_cmd *pc,
419         u8 value)
420 {
421         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
422         init->num_counters = value;
423 }
424
425 static void perfmon_cmd_init_set_num_cnt_v0(struct pmu_perfmon_cmd *pc,
426         u8 value)
427 {
428         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
429         init->num_counters = value;
430 }
431
432 static void perfmon_cmd_init_set_mov_avg_v1(struct pmu_perfmon_cmd *pc,
433         u8 value)
434 {
435         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
436         init->samples_in_moving_avg = value;
437 }
438
439 static void perfmon_cmd_init_set_mov_avg_v0(struct pmu_perfmon_cmd *pc,
440         u8 value)
441 {
442         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
443         init->samples_in_moving_avg = value;
444 }
445
446 static void get_pmu_init_msg_pmu_queue_params_v0(struct pmu_queue *queue,
447         u32 id, void *pmu_init_msg)
448 {
449         struct pmu_init_msg_pmu_v0 *init =
450                 (struct pmu_init_msg_pmu_v0 *)pmu_init_msg;
451         queue->index    = init->queue_info[id].index;
452         queue->offset   = init->queue_info[id].offset;
453         queue->size = init->queue_info[id].size;
454 }
455
456 static void get_pmu_init_msg_pmu_queue_params_v1(struct pmu_queue *queue,
457         u32 id, void *pmu_init_msg)
458 {
459         struct pmu_init_msg_pmu_v1 *init =
460                 (struct pmu_init_msg_pmu_v1 *)pmu_init_msg;
461         queue->index    = init->queue_info[id].index;
462         queue->offset   = init->queue_info[id].offset;
463         queue->size = init->queue_info[id].size;
464 }
465
466 static void *get_pmu_sequence_in_alloc_ptr_v1(struct pmu_sequence *seq)
467 {
468         return (void *)(&seq->in_v1);
469 }
470
471 static void *get_pmu_sequence_in_alloc_ptr_v0(struct pmu_sequence *seq)
472 {
473         return (void *)(&seq->in_v0);
474 }
475
476 static void *get_pmu_sequence_out_alloc_ptr_v1(struct pmu_sequence *seq)
477 {
478         return (void *)(&seq->out_v1);
479 }
480
481 static void *get_pmu_sequence_out_alloc_ptr_v0(struct pmu_sequence *seq)
482 {
483         return (void *)(&seq->out_v0);
484 }
485
486 static int gk20a_init_pmu(struct pmu_gk20a *pmu)
487 {
488         struct gk20a *g = pmu->g;
489         switch (pmu->desc->app_version) {
490         case APP_VERSION_1:
491                 g->ops.pmu_ver.cmd_id_zbc_table_update = 16;
492                 g->ops.pmu_ver.get_pmu_cmdline_args_size =
493                         pmu_cmdline_size_v1;
494                 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
495                         set_pmu_cmdline_args_cpufreq_v1;
496                 g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
497                         get_pmu_cmdline_args_ptr_v1;
498                 g->ops.pmu_ver.get_pmu_allocation_struct_size =
499                         get_pmu_allocation_size_v1;
500                 g->ops.pmu_ver.set_pmu_allocation_ptr =
501                         set_pmu_allocation_ptr_v1;
502                 g->ops.pmu_ver.pmu_allocation_set_dmem_size =
503                         pmu_allocation_set_dmem_size_v1;
504                 g->ops.pmu_ver.pmu_allocation_get_dmem_size =
505                         pmu_allocation_get_dmem_size_v1;
506                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
507                         pmu_allocation_get_dmem_offset_v1;
508                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
509                         pmu_allocation_get_dmem_offset_addr_v1;
510                 g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
511                         pmu_allocation_set_dmem_offset_v1;
512                 g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
513                         get_pmu_init_msg_pmu_queue_params_v1;
514                 g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
515                         get_pmu_msg_pmu_init_msg_ptr_v1;
516                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
517                         get_pmu_init_msg_pmu_sw_mg_off_v1;
518                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
519                         get_pmu_init_msg_pmu_sw_mg_size_v1;
520                 g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
521                         get_pmu_perfmon_cmd_start_size_v1;
522                 g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
523                         get_perfmon_cmd_start_offsetofvar_v1;
524                 g->ops.pmu_ver.perfmon_start_set_cmd_type =
525                         perfmon_start_set_cmd_type_v1;
526                 g->ops.pmu_ver.perfmon_start_set_group_id =
527                         perfmon_start_set_group_id_v1;
528                 g->ops.pmu_ver.perfmon_start_set_state_id =
529                         perfmon_start_set_state_id_v1;
530                 g->ops.pmu_ver.perfmon_start_set_flags =
531                         perfmon_start_set_flags_v1;
532                 g->ops.pmu_ver.perfmon_start_get_flags =
533                         perfmon_start_get_flags_v1;
534                 g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
535                         get_pmu_perfmon_cmd_init_size_v1;
536                 g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
537                         get_perfmon_cmd_init_offsetofvar_v1;
538                 g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
539                         perfmon_cmd_init_set_sample_buffer_v1;
540                 g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
541                         perfmon_cmd_init_set_dec_cnt_v1;
542                 g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
543                         perfmon_cmd_init_set_base_cnt_id_v1;
544                 g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
545                         perfmon_cmd_init_set_samp_period_us_v1;
546                 g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
547                         perfmon_cmd_init_set_num_cnt_v1;
548                 g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
549                         perfmon_cmd_init_set_mov_avg_v1;
550                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
551                         get_pmu_sequence_in_alloc_ptr_v1;
552                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
553                         get_pmu_sequence_out_alloc_ptr_v1;
554                 break;
555         case APP_VERSION_0:
556                 g->ops.pmu_ver.cmd_id_zbc_table_update = 14;
557                 g->ops.pmu_ver.get_pmu_cmdline_args_size =
558                         pmu_cmdline_size_v0;
559                 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
560                         set_pmu_cmdline_args_cpufreq_v0;
561                 g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
562                         get_pmu_cmdline_args_ptr_v0;
563                 g->ops.pmu_ver.get_pmu_allocation_struct_size =
564                         get_pmu_allocation_size_v0;
565                 g->ops.pmu_ver.set_pmu_allocation_ptr =
566                         set_pmu_allocation_ptr_v0;
567                 g->ops.pmu_ver.pmu_allocation_set_dmem_size =
568                         pmu_allocation_set_dmem_size_v0;
569                 g->ops.pmu_ver.pmu_allocation_get_dmem_size =
570                         pmu_allocation_get_dmem_size_v0;
571                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
572                         pmu_allocation_get_dmem_offset_v0;
573                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
574                         pmu_allocation_get_dmem_offset_addr_v0;
575                 g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
576                         pmu_allocation_set_dmem_offset_v0;
577                 g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
578                         get_pmu_init_msg_pmu_queue_params_v0;
579                 g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
580                         get_pmu_msg_pmu_init_msg_ptr_v0;
581                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
582                         get_pmu_init_msg_pmu_sw_mg_off_v0;
583                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
584                         get_pmu_init_msg_pmu_sw_mg_size_v0;
585                 g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
586                         get_pmu_perfmon_cmd_start_size_v0;
587                 g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
588                         get_perfmon_cmd_start_offsetofvar_v0;
589                 g->ops.pmu_ver.perfmon_start_set_cmd_type =
590                         perfmon_start_set_cmd_type_v0;
591                 g->ops.pmu_ver.perfmon_start_set_group_id =
592                         perfmon_start_set_group_id_v0;
593                 g->ops.pmu_ver.perfmon_start_set_state_id =
594                         perfmon_start_set_state_id_v0;
595                 g->ops.pmu_ver.perfmon_start_set_flags =
596                         perfmon_start_set_flags_v0;
597                 g->ops.pmu_ver.perfmon_start_get_flags =
598                         perfmon_start_get_flags_v0;
599                 g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
600                         get_pmu_perfmon_cmd_init_size_v0;
601                 g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
602                         get_perfmon_cmd_init_offsetofvar_v0;
603                 g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
604                         perfmon_cmd_init_set_sample_buffer_v0;
605                 g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
606                         perfmon_cmd_init_set_dec_cnt_v0;
607                 g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
608                         perfmon_cmd_init_set_base_cnt_id_v0;
609                 g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
610                         perfmon_cmd_init_set_samp_period_us_v0;
611                 g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
612                         perfmon_cmd_init_set_num_cnt_v0;
613                 g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
614                         perfmon_cmd_init_set_mov_avg_v0;
615                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
616                         get_pmu_sequence_in_alloc_ptr_v0;
617                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
618                         get_pmu_sequence_out_alloc_ptr_v0;
619                 break;
620         default:
621                 gk20a_err(dev_from_gk20a(pmu->g),
622                 "PMU code version not supported\n");
623                 return -EINVAL;
624                 break;
625         }
626         return 0;
627 }
628
629 static void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
630                 u32 src, u8 *dst, u32 size, u8 port)
631 {
632         struct gk20a *g = pmu->g;
633         u32 i, words, bytes;
634         u32 data, addr_mask;
635         u32 *dst_u32 = (u32*)dst;
636
637         if (size == 0) {
638                 gk20a_err(dev_from_gk20a(g),
639                         "size is zero");
640                 return;
641         }
642
643         if (src & 0x3) {
644                 gk20a_err(dev_from_gk20a(g),
645                         "src (0x%08x) not 4-byte aligned", src);
646                 return;
647         }
648
649         mutex_lock(&pmu->pmu_copy_lock);
650
651         words = size >> 2;
652         bytes = size & 0x3;
653
654         addr_mask = pwr_falcon_dmemc_offs_m() |
655                     pwr_falcon_dmemc_blk_m();
656
657         src &= addr_mask;
658
659         gk20a_writel(g, pwr_falcon_dmemc_r(port),
660                 src | pwr_falcon_dmemc_aincr_f(1));
661
662         for (i = 0; i < words; i++)
663                 dst_u32[i] = gk20a_readl(g, pwr_falcon_dmemd_r(port));
664
665         if (bytes > 0) {
666                 data = gk20a_readl(g, pwr_falcon_dmemd_r(port));
667                 for (i = 0; i < bytes; i++) {
668                         dst[(words << 2) + i] = ((u8 *)&data)[i];
669                 }
670         }
671         mutex_unlock(&pmu->pmu_copy_lock);
672         return;
673 }
674
675 static void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
676                 u32 dst, u8 *src, u32 size, u8 port)
677 {
678         struct gk20a *g = pmu->g;
679         u32 i, words, bytes;
680         u32 data, addr_mask;
681         u32 *src_u32 = (u32*)src;
682
683         if (size == 0) {
684                 gk20a_err(dev_from_gk20a(g),
685                         "size is zero");
686                 return;
687         }
688
689         if (dst & 0x3) {
690                 gk20a_err(dev_from_gk20a(g),
691                         "dst (0x%08x) not 4-byte aligned", dst);
692                 return;
693         }
694
695         mutex_lock(&pmu->pmu_copy_lock);
696
697         words = size >> 2;
698         bytes = size & 0x3;
699
700         addr_mask = pwr_falcon_dmemc_offs_m() |
701                     pwr_falcon_dmemc_blk_m();
702
703         dst &= addr_mask;
704
705         gk20a_writel(g, pwr_falcon_dmemc_r(port),
706                 dst | pwr_falcon_dmemc_aincw_f(1));
707
708         for (i = 0; i < words; i++)
709                 gk20a_writel(g, pwr_falcon_dmemd_r(port), src_u32[i]);
710
711         if (bytes > 0) {
712                 data = 0;
713                 for (i = 0; i < bytes; i++)
714                         ((u8 *)&data)[i] = src[(words << 2) + i];
715                 gk20a_writel(g, pwr_falcon_dmemd_r(port), data);
716         }
717
718         data = gk20a_readl(g, pwr_falcon_dmemc_r(port)) & addr_mask;
719         size = ALIGN(size, 4);
720         if (data != dst + size) {
721                 gk20a_err(dev_from_gk20a(g),
722                         "copy failed. bytes written %d, expected %d",
723                         data - dst, size);
724         }
725         mutex_unlock(&pmu->pmu_copy_lock);
726         return;
727 }
728
729 static int pmu_idle(struct pmu_gk20a *pmu)
730 {
731         struct gk20a *g = pmu->g;
732         unsigned long end_jiffies = jiffies +
733                 msecs_to_jiffies(2000);
734         u32 idle_stat;
735
736         /* wait for pmu idle */
737         do {
738                 idle_stat = gk20a_readl(g, pwr_falcon_idlestate_r());
739
740                 if (pwr_falcon_idlestate_falcon_busy_v(idle_stat) == 0 &&
741                     pwr_falcon_idlestate_ext_busy_v(idle_stat) == 0) {
742                         break;
743                 }
744
745                 if (time_after_eq(jiffies, end_jiffies)) {
746                         gk20a_err(dev_from_gk20a(g),
747                                 "timeout waiting pmu idle : 0x%08x",
748                                 idle_stat);
749                         return -EBUSY;
750                 }
751                 usleep_range(100, 200);
752         } while (1);
753
754         gk20a_dbg_fn("done");
755         return 0;
756 }
757
758 static void pmu_enable_irq(struct pmu_gk20a *pmu, bool enable)
759 {
760         struct gk20a *g = pmu->g;
761
762         gk20a_dbg_fn("");
763
764         gk20a_writel(g, mc_intr_mask_0_r(),
765                 gk20a_readl(g, mc_intr_mask_0_r()) &
766                 ~mc_intr_mask_0_pmu_enabled_f());
767         gk20a_writel(g, mc_intr_mask_1_r(),
768                 gk20a_readl(g, mc_intr_mask_1_r()) &
769                 ~mc_intr_mask_1_pmu_enabled_f());
770
771         gk20a_writel(g, pwr_falcon_irqmclr_r(),
772                 pwr_falcon_irqmclr_gptmr_f(1)  |
773                 pwr_falcon_irqmclr_wdtmr_f(1)  |
774                 pwr_falcon_irqmclr_mthd_f(1)   |
775                 pwr_falcon_irqmclr_ctxsw_f(1)  |
776                 pwr_falcon_irqmclr_halt_f(1)   |
777                 pwr_falcon_irqmclr_exterr_f(1) |
778                 pwr_falcon_irqmclr_swgen0_f(1) |
779                 pwr_falcon_irqmclr_swgen1_f(1) |
780                 pwr_falcon_irqmclr_ext_f(0xff));
781
782         if (enable) {
783                 /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 */
784                 gk20a_writel(g, pwr_falcon_irqdest_r(),
785                         pwr_falcon_irqdest_host_gptmr_f(0)    |
786                         pwr_falcon_irqdest_host_wdtmr_f(1)    |
787                         pwr_falcon_irqdest_host_mthd_f(0)     |
788                         pwr_falcon_irqdest_host_ctxsw_f(0)    |
789                         pwr_falcon_irqdest_host_halt_f(1)     |
790                         pwr_falcon_irqdest_host_exterr_f(0)   |
791                         pwr_falcon_irqdest_host_swgen0_f(1)   |
792                         pwr_falcon_irqdest_host_swgen1_f(0)   |
793                         pwr_falcon_irqdest_host_ext_f(0xff)   |
794                         pwr_falcon_irqdest_target_gptmr_f(1)  |
795                         pwr_falcon_irqdest_target_wdtmr_f(0)  |
796                         pwr_falcon_irqdest_target_mthd_f(0)   |
797                         pwr_falcon_irqdest_target_ctxsw_f(0)  |
798                         pwr_falcon_irqdest_target_halt_f(0)   |
799                         pwr_falcon_irqdest_target_exterr_f(0) |
800                         pwr_falcon_irqdest_target_swgen0_f(0) |
801                         pwr_falcon_irqdest_target_swgen1_f(0) |
802                         pwr_falcon_irqdest_target_ext_f(0xff));
803
804                 /* 0=disable, 1=enable */
805                 gk20a_writel(g, pwr_falcon_irqmset_r(),
806                         pwr_falcon_irqmset_gptmr_f(1)  |
807                         pwr_falcon_irqmset_wdtmr_f(1)  |
808                         pwr_falcon_irqmset_mthd_f(0)   |
809                         pwr_falcon_irqmset_ctxsw_f(0)  |
810                         pwr_falcon_irqmset_halt_f(1)   |
811                         pwr_falcon_irqmset_exterr_f(1) |
812                         pwr_falcon_irqmset_swgen0_f(1) |
813                         pwr_falcon_irqmset_swgen1_f(1));
814
815                 gk20a_writel(g, mc_intr_mask_0_r(),
816                         gk20a_readl(g, mc_intr_mask_0_r()) |
817                         mc_intr_mask_0_pmu_enabled_f());
818         }
819
820         gk20a_dbg_fn("done");
821 }
822
823 static int pmu_enable_hw(struct pmu_gk20a *pmu, bool enable)
824 {
825         struct gk20a *g = pmu->g;
826
827         gk20a_dbg_fn("");
828
829         if (enable) {
830                 int retries = GR_IDLE_CHECK_MAX / GR_IDLE_CHECK_DEFAULT;
831                 gk20a_enable(g, mc_enable_pwr_enabled_f());
832
833                 do {
834                         u32 w = gk20a_readl(g, pwr_falcon_dmactl_r()) &
835                                 (pwr_falcon_dmactl_dmem_scrubbing_m() |
836                                  pwr_falcon_dmactl_imem_scrubbing_m());
837
838                         if (!w) {
839                                 gk20a_dbg_fn("done");
840                                 return 0;
841                         }
842                         udelay(GR_IDLE_CHECK_DEFAULT);
843                 } while (--retries || !tegra_platform_is_silicon());
844
845                 gk20a_disable(g, mc_enable_pwr_enabled_f());
846                 gk20a_err(dev_from_gk20a(g), "Falcon mem scrubbing timeout");
847
848                 return -ETIMEDOUT;
849         } else {
850                 gk20a_disable(g, mc_enable_pwr_enabled_f());
851                 return 0;
852         }
853 }
854
855 static int pmu_enable(struct pmu_gk20a *pmu, bool enable)
856 {
857         struct gk20a *g = pmu->g;
858         u32 pmc_enable;
859         int err;
860
861         gk20a_dbg_fn("");
862
863         if (!enable) {
864                 pmc_enable = gk20a_readl(g, mc_enable_r());
865                 if (mc_enable_pwr_v(pmc_enable) !=
866                     mc_enable_pwr_disabled_v()) {
867
868                         pmu_enable_irq(pmu, false);
869                         pmu_enable_hw(pmu, false);
870                 }
871         } else {
872                 err = pmu_enable_hw(pmu, true);
873                 if (err)
874                         return err;
875
876                 /* TBD: post reset */
877
878                 err = pmu_idle(pmu);
879                 if (err)
880                         return err;
881
882                 pmu_enable_irq(pmu, true);
883         }
884
885         gk20a_dbg_fn("done");
886         return 0;
887 }
888
889 static int pmu_reset(struct pmu_gk20a *pmu)
890 {
891         int err;
892
893         err = pmu_idle(pmu);
894         if (err)
895                 return err;
896
897         /* TBD: release pmu hw mutex */
898
899         err = pmu_enable(pmu, false);
900         if (err)
901                 return err;
902
903         /* TBD: cancel all sequences */
904         /* TBD: init all sequences and state tables */
905         /* TBD: restore pre-init message handler */
906
907         err = pmu_enable(pmu, true);
908         if (err)
909                 return err;
910
911         return 0;
912 }
913
914 static int pmu_bootstrap(struct pmu_gk20a *pmu)
915 {
916         struct gk20a *g = pmu->g;
917         struct gk20a_platform *platform = platform_get_drvdata(g->dev);
918         struct mm_gk20a *mm = &g->mm;
919         struct pmu_ucode_desc *desc = pmu->desc;
920         u64 addr_code, addr_data, addr_load;
921         u32 i, blocks, addr_args;
922
923         gk20a_dbg_fn("");
924
925         gk20a_writel(g, pwr_falcon_itfen_r(),
926                 gk20a_readl(g, pwr_falcon_itfen_r()) |
927                 pwr_falcon_itfen_ctxen_enable_f());
928         gk20a_writel(g, pwr_pmu_new_instblk_r(),
929                 pwr_pmu_new_instblk_ptr_f(
930                         mm->pmu.inst_block.cpu_pa >> 12) |
931                 pwr_pmu_new_instblk_valid_f(1) |
932                 pwr_pmu_new_instblk_target_sys_coh_f());
933
934         /* TBD: load all other surfaces */
935
936         g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
937                 clk_get_rate(platform->clk[1]));
938
939         addr_args = (pwr_falcon_hwcfg_dmem_size_v(
940                 gk20a_readl(g, pwr_falcon_hwcfg_r()))
941                         << GK20A_PMU_DMEM_BLKSIZE2) -
942                 g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
943
944         pmu_copy_to_dmem(pmu, addr_args,
945                         (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
946                         g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
947
948         gk20a_writel(g, pwr_falcon_dmemc_r(0),
949                 pwr_falcon_dmemc_offs_f(0) |
950                 pwr_falcon_dmemc_blk_f(0)  |
951                 pwr_falcon_dmemc_aincw_f(1));
952
953         addr_code = u64_lo32((pmu->ucode.pmu_va +
954                         desc->app_start_offset +
955                         desc->app_resident_code_offset) >> 8) ;
956         addr_data = u64_lo32((pmu->ucode.pmu_va +
957                         desc->app_start_offset +
958                         desc->app_resident_data_offset) >> 8);
959         addr_load = u64_lo32((pmu->ucode.pmu_va +
960                         desc->bootloader_start_offset) >> 8);
961
962         gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE);
963         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
964         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_size);
965         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_code_size);
966         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_imem_entry);
967         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_data);
968         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size);
969         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
970         gk20a_writel(g, pwr_falcon_dmemd_r(0), 0x1);
971         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_args);
972
973         gk20a_writel(g, pwr_falcon_dmatrfbase_r(),
974                 addr_load - (desc->bootloader_imem_offset >> 8));
975
976         blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8;
977
978         for (i = 0; i < blocks; i++) {
979                 gk20a_writel(g, pwr_falcon_dmatrfmoffs_r(),
980                         desc->bootloader_imem_offset + (i << 8));
981                 gk20a_writel(g, pwr_falcon_dmatrffboffs_r(),
982                         desc->bootloader_imem_offset + (i << 8));
983                 gk20a_writel(g, pwr_falcon_dmatrfcmd_r(),
984                         pwr_falcon_dmatrfcmd_imem_f(1)  |
985                         pwr_falcon_dmatrfcmd_write_f(0) |
986                         pwr_falcon_dmatrfcmd_size_f(6)  |
987                         pwr_falcon_dmatrfcmd_ctxdma_f(GK20A_PMU_DMAIDX_UCODE));
988         }
989
990         gk20a_writel(g, pwr_falcon_bootvec_r(),
991                 pwr_falcon_bootvec_vec_f(desc->bootloader_entry_point));
992
993         gk20a_writel(g, pwr_falcon_cpuctl_r(),
994                 pwr_falcon_cpuctl_startcpu_f(1));
995
996         gk20a_writel(g, pwr_falcon_os_r(), desc->app_version);
997
998         return 0;
999 }
1000
1001 static void pmu_seq_init(struct pmu_gk20a *pmu)
1002 {
1003         u32 i;
1004
1005         memset(pmu->seq, 0,
1006                 sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES);
1007         memset(pmu->pmu_seq_tbl, 0,
1008                 sizeof(pmu->pmu_seq_tbl));
1009
1010         for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++)
1011                 pmu->seq[i].id = i;
1012 }
1013
1014 static int pmu_seq_acquire(struct pmu_gk20a *pmu,
1015                         struct pmu_sequence **pseq)
1016 {
1017         struct gk20a *g = pmu->g;
1018         struct pmu_sequence *seq;
1019         u32 index;
1020
1021         mutex_lock(&pmu->pmu_seq_lock);
1022         index = find_first_zero_bit(pmu->pmu_seq_tbl,
1023                                 sizeof(pmu->pmu_seq_tbl));
1024         if (index >= sizeof(pmu->pmu_seq_tbl)) {
1025                 gk20a_err(dev_from_gk20a(g),
1026                         "no free sequence available");
1027                 mutex_unlock(&pmu->pmu_seq_lock);
1028                 return -EAGAIN;
1029         }
1030         set_bit(index, pmu->pmu_seq_tbl);
1031         mutex_unlock(&pmu->pmu_seq_lock);
1032
1033         seq = &pmu->seq[index];
1034         seq->state = PMU_SEQ_STATE_PENDING;
1035
1036         *pseq = seq;
1037         return 0;
1038 }
1039
1040 static void pmu_seq_release(struct pmu_gk20a *pmu,
1041                         struct pmu_sequence *seq)
1042 {
1043         struct gk20a *g = pmu->g;
1044         seq->state      = PMU_SEQ_STATE_FREE;
1045         seq->desc       = PMU_INVALID_SEQ_DESC;
1046         seq->callback   = NULL;
1047         seq->cb_params  = NULL;
1048         seq->msg        = NULL;
1049         seq->out_payload = NULL;
1050         g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
1051                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr(seq), 0);
1052         g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
1053                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr(seq), 0);
1054
1055         clear_bit(seq->id, pmu->pmu_seq_tbl);
1056 }
1057
1058 static int pmu_queue_init(struct pmu_gk20a *pmu,
1059                 u32 id, union pmu_init_msg_pmu *init)
1060 {
1061         struct gk20a *g = pmu->g;
1062         struct pmu_queue *queue = &pmu->queue[id];
1063         queue->id       = id;
1064         g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params(queue, id, init);
1065
1066         queue->mutex_id = id;
1067         mutex_init(&queue->mutex);
1068
1069         gk20a_dbg_pmu("queue %d: index %d, offset 0x%08x, size 0x%08x",
1070                 id, queue->index, queue->offset, queue->size);
1071
1072         return 0;
1073 }
1074
1075 static int pmu_queue_head(struct pmu_gk20a *pmu, struct pmu_queue *queue,
1076                         u32 *head, bool set)
1077 {
1078         struct gk20a *g = pmu->g;
1079
1080         BUG_ON(!head);
1081
1082         if (PMU_IS_COMMAND_QUEUE(queue->id)) {
1083
1084                 if (queue->index >= pwr_pmu_queue_head__size_1_v())
1085                         return -EINVAL;
1086
1087                 if (!set)
1088                         *head = pwr_pmu_queue_head_address_v(
1089                                 gk20a_readl(g,
1090                                         pwr_pmu_queue_head_r(queue->index)));
1091                 else
1092                         gk20a_writel(g,
1093                                 pwr_pmu_queue_head_r(queue->index),
1094                                 pwr_pmu_queue_head_address_f(*head));
1095         } else {
1096                 if (!set)
1097                         *head = pwr_pmu_msgq_head_val_v(
1098                                 gk20a_readl(g, pwr_pmu_msgq_head_r()));
1099                 else
1100                         gk20a_writel(g,
1101                                 pwr_pmu_msgq_head_r(),
1102                                 pwr_pmu_msgq_head_val_f(*head));
1103         }
1104
1105         return 0;
1106 }
1107
1108 static int pmu_queue_tail(struct pmu_gk20a *pmu, struct pmu_queue *queue,
1109                         u32 *tail, bool set)
1110 {
1111         struct gk20a *g = pmu->g;
1112
1113         BUG_ON(!tail);
1114
1115         if (PMU_IS_COMMAND_QUEUE(queue->id)) {
1116
1117                 if (queue->index >= pwr_pmu_queue_tail__size_1_v())
1118                         return -EINVAL;
1119
1120                 if (!set)
1121                         *tail = pwr_pmu_queue_tail_address_v(
1122                                 gk20a_readl(g,
1123                                         pwr_pmu_queue_tail_r(queue->index)));
1124                 else
1125                         gk20a_writel(g,
1126                                 pwr_pmu_queue_tail_r(queue->index),
1127                                 pwr_pmu_queue_tail_address_f(*tail));
1128         } else {
1129                 if (!set)
1130                         *tail = pwr_pmu_msgq_tail_val_v(
1131                                 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
1132                 else
1133                         gk20a_writel(g,
1134                                 pwr_pmu_msgq_tail_r(),
1135                                 pwr_pmu_msgq_tail_val_f(*tail));
1136         }
1137
1138         return 0;
1139 }
1140
1141 static inline void pmu_queue_read(struct pmu_gk20a *pmu,
1142                         u32 offset, u8 *dst, u32 size)
1143 {
1144         pmu_copy_from_dmem(pmu, offset, dst, size, 0);
1145 }
1146
1147 static inline void pmu_queue_write(struct pmu_gk20a *pmu,
1148                         u32 offset, u8 *src, u32 size)
1149 {
1150         pmu_copy_to_dmem(pmu, offset, src, size, 0);
1151 }
1152
1153 int pmu_mutex_acquire(struct pmu_gk20a *pmu, u32 id, u32 *token)
1154 {
1155         struct gk20a *g = pmu->g;
1156         struct pmu_mutex *mutex;
1157         u32 data, owner, max_retry;
1158
1159         if (!pmu->initialized)
1160                 return -EINVAL;
1161
1162         BUG_ON(!token);
1163         BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
1164         BUG_ON(id > pmu->mutex_cnt);
1165
1166         mutex = &pmu->mutex[id];
1167
1168         owner = pwr_pmu_mutex_value_v(
1169                 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1170
1171         if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) {
1172                 BUG_ON(mutex->ref_cnt == 0);
1173                 gk20a_dbg_pmu("already acquired by owner : 0x%08x", *token);
1174                 mutex->ref_cnt++;
1175                 return 0;
1176         }
1177
1178         max_retry = 40;
1179         do {
1180                 data = pwr_pmu_mutex_id_value_v(
1181                         gk20a_readl(g, pwr_pmu_mutex_id_r()));
1182                 if (data == pwr_pmu_mutex_id_value_init_v() ||
1183                     data == pwr_pmu_mutex_id_value_not_avail_v()) {
1184                         gk20a_warn(dev_from_gk20a(g),
1185                                 "fail to generate mutex token: val 0x%08x",
1186                                 owner);
1187                         usleep_range(20, 40);
1188                         continue;
1189                 }
1190
1191                 owner = data;
1192                 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
1193                         pwr_pmu_mutex_value_f(owner));
1194
1195                 data = pwr_pmu_mutex_value_v(
1196                         gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1197
1198                 if (owner == data) {
1199                         mutex->ref_cnt = 1;
1200                         gk20a_dbg_pmu("mutex acquired: id=%d, token=0x%x",
1201                                 mutex->index, *token);
1202                         *token = owner;
1203                         return 0;
1204                 } else {
1205                         gk20a_dbg_info("fail to acquire mutex idx=0x%08x",
1206                                 mutex->index);
1207
1208                         data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
1209                         data = set_field(data,
1210                                 pwr_pmu_mutex_id_release_value_m(),
1211                                 pwr_pmu_mutex_id_release_value_f(owner));
1212                         gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
1213
1214                         usleep_range(20, 40);
1215                         continue;
1216                 }
1217         } while (max_retry-- > 0);
1218
1219         return -EBUSY;
1220 }
1221
1222 int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token)
1223 {
1224         struct gk20a *g = pmu->g;
1225         struct pmu_mutex *mutex;
1226         u32 owner, data;
1227
1228         if (!pmu->initialized)
1229                 return -EINVAL;
1230
1231         BUG_ON(!token);
1232         BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
1233         BUG_ON(id > pmu->mutex_cnt);
1234
1235         mutex = &pmu->mutex[id];
1236
1237         owner = pwr_pmu_mutex_value_v(
1238                 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1239
1240         if (*token != owner) {
1241                 gk20a_err(dev_from_gk20a(g),
1242                         "requester 0x%08x NOT match owner 0x%08x",
1243                         *token, owner);
1244                 return -EINVAL;
1245         }
1246
1247         if (--mutex->ref_cnt == 0) {
1248                 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
1249                         pwr_pmu_mutex_value_initial_lock_f());
1250
1251                 data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
1252                 data = set_field(data, pwr_pmu_mutex_id_release_value_m(),
1253                         pwr_pmu_mutex_id_release_value_f(owner));
1254                 gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
1255
1256                 gk20a_dbg_pmu("mutex released: id=%d, token=0x%x",
1257                         mutex->index, *token);
1258         }
1259
1260         return 0;
1261 }
1262
1263 static int pmu_queue_lock(struct pmu_gk20a *pmu,
1264                         struct pmu_queue *queue)
1265 {
1266         int err;
1267
1268         if (PMU_IS_MESSAGE_QUEUE(queue->id))
1269                 return 0;
1270
1271         if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
1272                 mutex_lock(&queue->mutex);
1273                 return 0;
1274         }
1275
1276         err = pmu_mutex_acquire(pmu, queue->mutex_id, &queue->mutex_lock);
1277         return err;
1278 }
1279
1280 static int pmu_queue_unlock(struct pmu_gk20a *pmu,
1281                         struct pmu_queue *queue)
1282 {
1283         int err;
1284
1285         if (PMU_IS_MESSAGE_QUEUE(queue->id))
1286                 return 0;
1287
1288         if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
1289                 mutex_unlock(&queue->mutex);
1290                 return 0;
1291         }
1292
1293         err = pmu_mutex_release(pmu, queue->mutex_id, &queue->mutex_lock);
1294         return err;
1295 }
1296
1297 /* called by pmu_read_message, no lock */
1298 static bool pmu_queue_is_empty(struct pmu_gk20a *pmu,
1299                         struct pmu_queue *queue)
1300 {
1301         u32 head, tail;
1302
1303         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1304         if (queue->opened && queue->oflag == OFLAG_READ)
1305                 tail = queue->position;
1306         else
1307                 pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
1308
1309         return head == tail;
1310 }
1311
1312 static bool pmu_queue_has_room(struct pmu_gk20a *pmu,
1313                         struct pmu_queue *queue, u32 size, bool *need_rewind)
1314 {
1315         u32 head, tail, free;
1316         bool rewind = false;
1317
1318         size = ALIGN(size, QUEUE_ALIGNMENT);
1319
1320         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1321         pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
1322
1323         if (head >= tail) {
1324                 free = queue->offset + queue->size - head;
1325                 free -= PMU_CMD_HDR_SIZE;
1326
1327                 if (size > free) {
1328                         rewind = true;
1329                         head = queue->offset;
1330                 }
1331         }
1332
1333         if (head < tail)
1334                 free = tail - head - 1;
1335
1336         if (need_rewind)
1337                 *need_rewind = rewind;
1338
1339         return size <= free;
1340 }
1341
1342 static int pmu_queue_push(struct pmu_gk20a *pmu,
1343                         struct pmu_queue *queue, void *data, u32 size)
1344 {
1345         gk20a_dbg_fn("");
1346
1347         if (!queue->opened && queue->oflag == OFLAG_WRITE){
1348                 gk20a_err(dev_from_gk20a(pmu->g),
1349                         "queue not opened for write");
1350                 return -EINVAL;
1351         }
1352
1353         pmu_queue_write(pmu, queue->position, data, size);
1354         queue->position += ALIGN(size, QUEUE_ALIGNMENT);
1355         return 0;
1356 }
1357
1358 static int pmu_queue_pop(struct pmu_gk20a *pmu,
1359                         struct pmu_queue *queue, void *data, u32 size,
1360                         u32 *bytes_read)
1361 {
1362         u32 head, tail, used;
1363
1364         *bytes_read = 0;
1365
1366         if (!queue->opened && queue->oflag == OFLAG_READ){
1367                 gk20a_err(dev_from_gk20a(pmu->g),
1368                         "queue not opened for read");
1369                 return -EINVAL;
1370         }
1371
1372         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1373         tail = queue->position;
1374
1375         if (head == tail)
1376                 return 0;
1377
1378         if (head > tail)
1379                 used = head - tail;
1380         else
1381                 used = queue->offset + queue->size - tail;
1382
1383         if (size > used) {
1384                 gk20a_warn(dev_from_gk20a(pmu->g),
1385                         "queue size smaller than request read");
1386                 size = used;
1387         }
1388
1389         pmu_queue_read(pmu, tail, data, size);
1390         queue->position += ALIGN(size, QUEUE_ALIGNMENT);
1391         *bytes_read = size;
1392         return 0;
1393 }
1394
1395 static void pmu_queue_rewind(struct pmu_gk20a *pmu,
1396                         struct pmu_queue *queue)
1397 {
1398         struct pmu_cmd cmd;
1399
1400         gk20a_dbg_fn("");
1401
1402         if (!queue->opened) {
1403                 gk20a_err(dev_from_gk20a(pmu->g),
1404                         "queue not opened");
1405                 return;
1406         }
1407
1408         if (queue->oflag == OFLAG_WRITE) {
1409                 cmd.hdr.unit_id = PMU_UNIT_REWIND;
1410                 cmd.hdr.size = PMU_CMD_HDR_SIZE;
1411                 pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size);
1412                 gk20a_dbg_pmu("queue %d rewinded", queue->id);
1413         }
1414
1415         queue->position = queue->offset;
1416         return;
1417 }
1418
1419 /* open for read and lock the queue */
1420 static int pmu_queue_open_read(struct pmu_gk20a *pmu,
1421                         struct pmu_queue *queue)
1422 {
1423         int err;
1424
1425         err = pmu_queue_lock(pmu, queue);
1426         if (err)
1427                 return err;
1428
1429         if (queue->opened)
1430                 BUG();
1431
1432         pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET);
1433         queue->oflag = OFLAG_READ;
1434         queue->opened = true;
1435
1436         return 0;
1437 }
1438
1439 /* open for write and lock the queue
1440    make sure there's enough free space for the write */
1441 static int pmu_queue_open_write(struct pmu_gk20a *pmu,
1442                         struct pmu_queue *queue, u32 size)
1443 {
1444         bool rewind = false;
1445         int err;
1446
1447         err = pmu_queue_lock(pmu, queue);
1448         if (err)
1449                 return err;
1450
1451         if (queue->opened)
1452                 BUG();
1453
1454         if (!pmu_queue_has_room(pmu, queue, size, &rewind)) {
1455                 gk20a_err(dev_from_gk20a(pmu->g), "queue full");
1456                 return -EAGAIN;
1457         }
1458
1459         pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET);
1460         queue->oflag = OFLAG_WRITE;
1461         queue->opened = true;
1462
1463         if (rewind)
1464                 pmu_queue_rewind(pmu, queue);
1465
1466         return 0;
1467 }
1468
1469 /* close and unlock the queue */
1470 static int pmu_queue_close(struct pmu_gk20a *pmu,
1471                         struct pmu_queue *queue, bool commit)
1472 {
1473         if (!queue->opened)
1474                 return 0;
1475
1476         if (commit) {
1477                 if (queue->oflag == OFLAG_READ) {
1478                         pmu_queue_tail(pmu, queue,
1479                                 &queue->position, QUEUE_SET);
1480                 }
1481                 else {
1482                         pmu_queue_head(pmu, queue,
1483                                 &queue->position, QUEUE_SET);
1484                 }
1485         }
1486
1487         queue->opened = false;
1488
1489         pmu_queue_unlock(pmu, queue);
1490
1491         return 0;
1492 }
1493
1494 void gk20a_remove_pmu_support(struct pmu_gk20a *pmu)
1495 {
1496         gk20a_dbg_fn("");
1497
1498         gk20a_allocator_destroy(&pmu->dmem);
1499 }
1500
1501 int gk20a_init_pmu_reset_enable_hw(struct gk20a *g)
1502 {
1503         struct pmu_gk20a *pmu = &g->pmu;
1504
1505         gk20a_dbg_fn("");
1506
1507         pmu_enable_hw(pmu, true);
1508
1509         return 0;
1510 }
1511
1512 int gk20a_init_pmu_setup_sw(struct gk20a *g)
1513 {
1514         struct pmu_gk20a *pmu = &g->pmu;
1515         struct mm_gk20a *mm = &g->mm;
1516         struct vm_gk20a *vm = &mm->pmu.vm;
1517         struct device *d = dev_from_gk20a(g);
1518         int i, err = 0;
1519         u8 *ptr;
1520         void *ucode_ptr;
1521         struct sg_table *sgt_pmu_ucode;
1522         struct sg_table *sgt_seq_buf;
1523         DEFINE_DMA_ATTRS(attrs);
1524         dma_addr_t iova;
1525
1526         gk20a_dbg_fn("");
1527
1528         /* start with elpg disabled until first enable call */
1529         mutex_init(&pmu->elpg_mutex);
1530         pmu->elpg_refcnt = 0;
1531
1532         if (pmu->sw_ready) {
1533                 for (i = 0; i < pmu->mutex_cnt; i++) {
1534                         pmu->mutex[i].id    = i;
1535                         pmu->mutex[i].index = i;
1536                 }
1537                 pmu_seq_init(pmu);
1538
1539                 gk20a_dbg_fn("skip init");
1540                 goto skip_init;
1541         }
1542
1543         /* no infoRom script from vbios? */
1544
1545         /* TBD: sysmon subtask */
1546
1547         pmu->mutex_cnt = pwr_pmu_mutex__size_1_v();
1548         pmu->mutex = kzalloc(pmu->mutex_cnt *
1549                 sizeof(struct pmu_mutex), GFP_KERNEL);
1550         if (!pmu->mutex) {
1551                 err = -ENOMEM;
1552                 goto err;
1553         }
1554
1555         for (i = 0; i < pmu->mutex_cnt; i++) {
1556                 pmu->mutex[i].id    = i;
1557                 pmu->mutex[i].index = i;
1558         }
1559
1560         pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES *
1561                 sizeof(struct pmu_sequence), GFP_KERNEL);
1562         if (!pmu->seq) {
1563                 err = -ENOMEM;
1564                 goto err_free_mutex;
1565         }
1566
1567         pmu_seq_init(pmu);
1568
1569         if (!g->pmu_fw) {
1570                 g->pmu_fw = gk20a_request_firmware(g, GK20A_PMU_UCODE_IMAGE);
1571                 if (!g->pmu_fw) {
1572                         gk20a_err(d, "failed to load pmu ucode!!");
1573                         err = -ENOENT;
1574                         goto err_free_seq;
1575                 }
1576         }
1577
1578         gk20a_dbg_fn("firmware loaded");
1579
1580         pmu->desc = (struct pmu_ucode_desc *)g->pmu_fw->data;
1581         pmu->ucode_image = (u32 *)((u8 *)pmu->desc +
1582                         pmu->desc->descriptor_size);
1583
1584         INIT_WORK(&pmu->pg_init, pmu_setup_hw);
1585
1586         gk20a_init_pmu_vm(mm);
1587
1588         dma_set_attr(DMA_ATTR_READ_ONLY, &attrs);
1589         pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
1590                                         &iova,
1591                                         GFP_KERNEL,
1592                                         &attrs);
1593         if (!pmu->ucode.cpuva) {
1594                 gk20a_err(d, "failed to allocate memory\n");
1595                 err = -ENOMEM;
1596                 goto err_release_fw;
1597         }
1598
1599         pmu->ucode.iova = iova;
1600         pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
1601                                         &iova,
1602                                         GFP_KERNEL);
1603         if (!pmu->seq_buf.cpuva) {
1604                 gk20a_err(d, "failed to allocate memory\n");
1605                 err = -ENOMEM;
1606                 goto err_free_pmu_ucode;
1607         }
1608
1609         pmu->seq_buf.iova = iova;
1610
1611         err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
1612                                 pmu->ucode.cpuva,
1613                                 pmu->ucode.iova,
1614                                 GK20A_PMU_UCODE_SIZE_MAX);
1615         if (err) {
1616                 gk20a_err(d, "failed to allocate sg table\n");
1617                 goto err_free_seq_buf;
1618         }
1619
1620         pmu->ucode.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode,
1621                                         GK20A_PMU_UCODE_SIZE_MAX,
1622                                         0, /* flags */
1623                                         gk20a_mem_flag_read_only);
1624         if (!pmu->ucode.pmu_va) {
1625                 gk20a_err(d, "failed to map pmu ucode memory!!");
1626                 goto err_free_ucode_sgt;
1627         }
1628
1629         err = gk20a_get_sgtable(d, &sgt_seq_buf,
1630                                 pmu->seq_buf.cpuva,
1631                                 pmu->seq_buf.iova,
1632                                 GK20A_PMU_SEQ_BUF_SIZE);
1633         if (err) {
1634                 gk20a_err(d, "failed to allocate sg table\n");
1635                 goto err_unmap_ucode;
1636         }
1637
1638         pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf,
1639                                         GK20A_PMU_SEQ_BUF_SIZE,
1640                                         0, /* flags */
1641                                         gk20a_mem_flag_none);
1642         if (!pmu->seq_buf.pmu_va) {
1643                 gk20a_err(d, "failed to map pmu ucode memory!!");
1644                 goto err_free_seq_buf_sgt;
1645         }
1646
1647         ptr = (u8 *)pmu->seq_buf.cpuva;
1648         if (!ptr) {
1649                 gk20a_err(d, "failed to map cpu ptr for zbc buffer");
1650                 goto err_unmap_seq_buf;
1651         }
1652
1653         /* TBD: remove this if ZBC save/restore is handled by PMU
1654          * end an empty ZBC sequence for now */
1655         ptr[0] = 0x16; /* opcode EXIT */
1656         ptr[1] = 0; ptr[2] = 1; ptr[3] = 0;
1657         ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0;
1658
1659         pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;
1660
1661         ucode_ptr = pmu->ucode.cpuva;
1662
1663         for (i = 0; i < (pmu->desc->app_start_offset +
1664                         pmu->desc->app_size) >> 2; i++)
1665                 gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]);
1666
1667         gk20a_free_sgtable(&sgt_pmu_ucode);
1668         gk20a_free_sgtable(&sgt_seq_buf);
1669
1670         pmu->sw_ready = true;
1671
1672 skip_init:
1673         mutex_init(&pmu->isr_mutex);
1674         mutex_init(&pmu->isr_enable_lock);
1675         mutex_init(&pmu->pmu_copy_lock);
1676         mutex_init(&pmu->pmu_seq_lock);
1677
1678         pmu->perfmon_counter.index = 3; /* GR & CE2 */
1679         pmu->perfmon_counter.group_id = PMU_DOMAIN_GROUP_PSTATE;
1680
1681         pmu->remove_support = gk20a_remove_pmu_support;
1682         err = gk20a_init_pmu(pmu);
1683         if (err) {
1684                 gk20a_err(d, "failed to set function pointers\n");
1685                 return err;
1686         }
1687
1688         gk20a_dbg_fn("done");
1689         return 0;
1690
1691  err_unmap_seq_buf:
1692         gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va,
1693                 GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none);
1694  err_free_seq_buf_sgt:
1695         gk20a_free_sgtable(&sgt_seq_buf);
1696  err_unmap_ucode:
1697         gk20a_gmmu_unmap(vm, pmu->ucode.pmu_va,
1698                 GK20A_PMU_UCODE_SIZE_MAX, gk20a_mem_flag_none);
1699  err_free_ucode_sgt:
1700         gk20a_free_sgtable(&sgt_pmu_ucode);
1701  err_free_seq_buf:
1702         dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
1703                 pmu->seq_buf.cpuva, pmu->seq_buf.iova);
1704         pmu->seq_buf.cpuva = NULL;
1705         pmu->seq_buf.iova = 0;
1706  err_free_pmu_ucode:
1707         dma_free_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
1708                 pmu->ucode.cpuva, pmu->ucode.iova, &attrs);
1709         pmu->ucode.cpuva = NULL;
1710         pmu->ucode.iova = 0;
1711  err_release_fw:
1712         release_firmware(g->pmu_fw);
1713  err_free_seq:
1714         kfree(pmu->seq);
1715  err_free_mutex:
1716         kfree(pmu->mutex);
1717  err:
1718         gk20a_dbg_fn("fail");
1719         return err;
1720 }
1721
1722 static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
1723                         void *param, u32 handle, u32 status);
1724
1725 static void pmu_handle_pg_buf_config_msg(struct gk20a *g, struct pmu_msg *msg,
1726                         void *param, u32 handle, u32 status)
1727 {
1728         struct pmu_gk20a *pmu = param;
1729         struct pmu_pg_msg_eng_buf_stat *eng_buf_stat = &msg->msg.pg.eng_buf_stat;
1730
1731         gk20a_dbg_fn("");
1732
1733         gk20a_dbg_pmu("reply PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS");
1734         if (status != 0) {
1735                 gk20a_err(dev_from_gk20a(g), "PGENG cmd aborted");
1736                 /* TBD: disable ELPG */
1737                 return;
1738         }
1739
1740         pmu->buf_loaded = (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_LOADED);
1741         if ((!pmu->buf_loaded) &&
1742                 (pmu->pmu_state == PMU_STATE_LOADING_PG_BUF))
1743                         gk20a_err(dev_from_gk20a(g), "failed to load PGENG buffer");
1744         else {
1745                 schedule_work(&pmu->pg_init);
1746         }
1747 }
1748
1749 int gk20a_init_pmu_setup_hw1(struct gk20a *g)
1750 {
1751         struct pmu_gk20a *pmu = &g->pmu;
1752         int err;
1753
1754         gk20a_dbg_fn("");
1755
1756         mutex_lock(&pmu->isr_enable_lock);
1757         pmu_reset(pmu);
1758         pmu->isr_enabled = true;
1759         mutex_unlock(&pmu->isr_enable_lock);
1760
1761         /* setup apertures - virtual */
1762         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
1763                 pwr_fbif_transcfg_mem_type_virtual_f());
1764         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
1765                 pwr_fbif_transcfg_mem_type_virtual_f());
1766         /* setup apertures - physical */
1767         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
1768                 pwr_fbif_transcfg_mem_type_physical_f() |
1769                 pwr_fbif_transcfg_target_local_fb_f());
1770         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
1771                 pwr_fbif_transcfg_mem_type_physical_f() |
1772                 pwr_fbif_transcfg_target_coherent_sysmem_f());
1773         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
1774                 pwr_fbif_transcfg_mem_type_physical_f() |
1775                 pwr_fbif_transcfg_target_noncoherent_sysmem_f());
1776
1777         /* TBD: load pmu ucode */
1778         err = pmu_bootstrap(pmu);
1779         if (err)
1780                 return err;
1781
1782         return 0;
1783
1784 }
1785
1786 static int gk20a_aelpg_init(struct gk20a *g);
1787 static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id);
1788
1789 static void pmu_setup_hw_load_zbc(struct gk20a *g);
1790 static void pmu_setup_hw_enable_elpg(struct gk20a *g);
1791
1792 static void pmu_setup_hw(struct work_struct *work)
1793 {
1794         struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init);
1795         struct gk20a *g = pmu->g;
1796
1797         switch (pmu->pmu_state) {
1798         case PMU_STATE_INIT_RECEIVED:
1799                 gk20a_dbg_pmu("pmu starting");
1800                 pmu_init_powergating(g);
1801                 break;
1802         case PMU_STATE_ELPG_BOOTED:
1803                 gk20a_dbg_pmu("elpg booted");
1804                 gk20a_init_pmu_load_fecs(g);
1805                 break;
1806         case PMU_STATE_LOADING_PG_BUF:
1807                 gk20a_dbg_pmu("loaded pg buf");
1808                 pmu_setup_hw_load_zbc(g);
1809                 break;
1810         case PMU_STATE_LOADING_ZBC:
1811                 gk20a_dbg_pmu("loaded zbc");
1812                 pmu_setup_hw_enable_elpg(g);
1813                 break;
1814         case PMU_STATE_STARTED:
1815                 gk20a_dbg_pmu("PMU booted");
1816                 break;
1817         default:
1818                 gk20a_dbg_pmu("invalid state");
1819                 break;
1820         }
1821 }
1822
1823 int gk20a_init_pmu_load_fecs(struct gk20a *g)
1824 {
1825         struct pmu_gk20a *pmu = &g->pmu;
1826         struct pmu_cmd cmd;
1827         u32 desc;
1828         int err = 0;
1829         gk20a_dbg_fn("");
1830
1831         memset(&cmd, 0, sizeof(struct pmu_cmd));
1832         cmd.hdr.unit_id = PMU_UNIT_PG;
1833         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
1834         cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
1835         cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
1836         cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_FECS;
1837         cmd.cmd.pg.eng_buf_load.buf_size = pmu->pg_buf.size;
1838         cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.pmu_va >> 8);
1839         cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.pmu_va & 0xFF);
1840         cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
1841
1842         pmu->buf_loaded = false;
1843         gk20a_dbg_pmu("cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS");
1844         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
1845                         pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
1846         pmu->pmu_state = PMU_STATE_LOADING_PG_BUF;
1847         return err;
1848 }
1849
1850 static void pmu_setup_hw_load_zbc(struct gk20a *g)
1851 {
1852         struct pmu_gk20a *pmu = &g->pmu;
1853         struct pmu_cmd cmd;
1854         u32 desc;
1855
1856         memset(&cmd, 0, sizeof(struct pmu_cmd));
1857         cmd.hdr.unit_id = PMU_UNIT_PG;
1858         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
1859         cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
1860         cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
1861         cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_ZBC;
1862         cmd.cmd.pg.eng_buf_load.buf_size = pmu->seq_buf.size;
1863         cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.pmu_va >> 8);
1864         cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.pmu_va & 0xFF);
1865         cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
1866
1867         pmu->buf_loaded = false;
1868         gk20a_dbg_pmu("cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_ZBC");
1869         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
1870                         pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
1871         pmu->pmu_state = PMU_STATE_LOADING_ZBC;
1872 }
1873
1874 static void pmu_setup_hw_enable_elpg(struct gk20a *g)
1875 {
1876         struct pmu_gk20a *pmu = &g->pmu;
1877
1878         /*
1879          * FIXME: To enable ELPG, we increase the PMU ext2priv timeout unit to
1880          * 7. This prevents PMU stalling on Host register accesses. Once the
1881          * cause for this hang is discovered and fixed, this WAR should be
1882          * removed.
1883          */
1884         gk20a_writel(g, 0x10a164, 0x109ff);
1885
1886         pmu->initialized = true;
1887         pmu->pmu_state = PMU_STATE_STARTED;
1888
1889         pmu->zbc_ready = true;
1890         /* Save zbc table after PMU is initialized. */
1891         gr_gk20a_pmu_save_zbc(g, 0xf);
1892
1893         if (g->elpg_enabled)
1894                 gk20a_pmu_enable_elpg(g);
1895
1896         udelay(50);
1897
1898         /* Enable AELPG */
1899         if (g->aelpg_enabled) {
1900                 gk20a_aelpg_init(g);
1901                 gk20a_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS);
1902         }
1903
1904         wake_up(&g->pmu.boot_wq);
1905 }
1906
1907 int gk20a_init_pmu_support(struct gk20a *g)
1908 {
1909         struct pmu_gk20a *pmu = &g->pmu;
1910         u32 err;
1911
1912         gk20a_dbg_fn("");
1913
1914         if (pmu->initialized)
1915                 return 0;
1916
1917         pmu->g = g;
1918
1919         err = gk20a_init_pmu_reset_enable_hw(g);
1920         if (err)
1921                 return err;
1922
1923         if (support_gk20a_pmu()) {
1924                 err = gk20a_init_pmu_setup_sw(g);
1925                 if (err)
1926                         return err;
1927
1928                 err = gk20a_init_pmu_setup_hw1(g);
1929                 if (err)
1930                         return err;
1931
1932                 pmu->pmu_state = PMU_STATE_STARTING;
1933         }
1934
1935         return err;
1936 }
1937
1938 static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
1939                         void *param, u32 handle, u32 status)
1940 {
1941         struct pmu_gk20a *pmu = param;
1942         struct pmu_pg_msg_elpg_msg *elpg_msg = &msg->msg.pg.elpg_msg;
1943
1944         gk20a_dbg_fn("");
1945
1946         if (status != 0) {
1947                 gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
1948                 /* TBD: disable ELPG */
1949                 return;
1950         }
1951
1952         switch (elpg_msg->msg) {
1953         case PMU_PG_ELPG_MSG_INIT_ACK:
1954                 gk20a_dbg_pmu("INIT_PG is acknowledged from PMU");
1955                 break;
1956         case PMU_PG_ELPG_MSG_ALLOW_ACK:
1957                 gk20a_dbg_pmu("ALLOW is acknowledged from PMU");
1958                 pmu->elpg_stat = PMU_ELPG_STAT_ON;
1959                 break;
1960         case PMU_PG_ELPG_MSG_DISALLOW_ACK:
1961                 gk20a_dbg_pmu("DISALLOW is acknowledged from PMU");
1962                 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
1963                 if (pmu->pmu_state == PMU_STATE_ELPG_BOOTING) {
1964                         pmu->pmu_state = PMU_STATE_ELPG_BOOTED;
1965                         schedule_work(&pmu->pg_init);
1966                 }
1967                 break;
1968         default:
1969                 gk20a_err(dev_from_gk20a(g),
1970                         "unsupported ELPG message : 0x%04x", elpg_msg->msg);
1971         }
1972
1973         return;
1974 }
1975
1976 static void pmu_handle_pg_stat_msg(struct gk20a *g, struct pmu_msg *msg,
1977                         void *param, u32 handle, u32 status)
1978 {
1979         struct pmu_gk20a *pmu = param;
1980
1981         gk20a_dbg_fn("");
1982
1983         if (status != 0) {
1984                 gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
1985                 /* TBD: disable ELPG */
1986                 return;
1987         }
1988
1989         switch (msg->msg.pg.stat.sub_msg_id) {
1990         case PMU_PG_STAT_MSG_RESP_DMEM_OFFSET:
1991                 gk20a_dbg_pmu("ALLOC_DMEM_OFFSET is acknowledged from PMU");
1992                 pmu->stat_dmem_offset = msg->msg.pg.stat.data;
1993                 break;
1994         default:
1995                 break;
1996         }
1997 }
1998
1999 static int pmu_init_powergating(struct gk20a *g)
2000 {
2001         struct pmu_gk20a *pmu = &g->pmu;
2002         struct pmu_cmd cmd;
2003         u32 seq;
2004
2005         gk20a_dbg_fn("");
2006
2007         mutex_lock(&pmu->isr_mutex);
2008
2009         if (tegra_cpu_is_asim()) {
2010                 /* TBD: calculate threshold for silicon */
2011                 gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
2012                                 PMU_PG_IDLE_THRESHOLD_SIM);
2013                 gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
2014                                 PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM);
2015         } else {
2016                 /* TBD: calculate threshold for silicon */
2017                 gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
2018                                 PMU_PG_IDLE_THRESHOLD);
2019                 gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
2020                                 PMU_PG_POST_POWERUP_IDLE_THRESHOLD);
2021         }
2022
2023         gk20a_gr_wait_initialized(g);
2024
2025         /* init ELPG */
2026         memset(&cmd, 0, sizeof(struct pmu_cmd));
2027         cmd.hdr.unit_id = PMU_UNIT_PG;
2028         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
2029         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
2030         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
2031         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_INIT;
2032
2033         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_INIT");
2034         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2035                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
2036
2037         /* alloc dmem for powergating state log */
2038         pmu->stat_dmem_offset = 0;
2039         memset(&cmd, 0, sizeof(struct pmu_cmd));
2040         cmd.hdr.unit_id = PMU_UNIT_PG;
2041         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_stat);
2042         cmd.cmd.pg.stat.cmd_type = PMU_PG_CMD_ID_PG_STAT;
2043         cmd.cmd.pg.stat.engine_id = ENGINE_GR_GK20A;
2044         cmd.cmd.pg.stat.sub_cmd_id = PMU_PG_STAT_CMD_ALLOC_DMEM;
2045         cmd.cmd.pg.stat.data = 0;
2046
2047         gk20a_dbg_pmu("cmd post PMU_PG_STAT_CMD_ALLOC_DMEM");
2048         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
2049                         pmu_handle_pg_stat_msg, pmu, &seq, ~0);
2050
2051         /* disallow ELPG initially
2052            PMU ucode requires a disallow cmd before allow cmd */
2053         pmu->elpg_stat = PMU_ELPG_STAT_OFF; /* set for wait_event PMU_ELPG_STAT_OFF */
2054         memset(&cmd, 0, sizeof(struct pmu_cmd));
2055         cmd.hdr.unit_id = PMU_UNIT_PG;
2056         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
2057         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
2058         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
2059         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
2060
2061         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_DISALLOW");
2062         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2063                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
2064
2065         /* start with elpg disabled until first enable call */
2066         pmu->elpg_refcnt = 0;
2067
2068         if (pmu->pmu_state == PMU_STATE_INIT_RECEIVED)
2069                 pmu->pmu_state = PMU_STATE_ELPG_BOOTING;
2070
2071         mutex_unlock(&pmu->isr_mutex);
2072
2073         return 0;
2074 }
2075
2076 static int pmu_init_perfmon(struct pmu_gk20a *pmu)
2077 {
2078         struct gk20a *g = pmu->g;
2079         struct pmu_v *pv = &g->ops.pmu_ver;
2080         struct pmu_cmd cmd;
2081         struct pmu_payload payload;
2082         u32 seq;
2083         u32 data;
2084         int err = 0;
2085
2086         gk20a_dbg_fn("");
2087
2088         pmu->perfmon_ready = 0;
2089
2090         /* use counter #3 for GR && CE2 busy cycles */
2091         gk20a_writel(g, pwr_pmu_idle_mask_r(3),
2092                 pwr_pmu_idle_mask_gr_enabled_f() |
2093                 pwr_pmu_idle_mask_ce_2_enabled_f());
2094
2095         /* disable idle filtering for counters 3 and 6 */
2096         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3));
2097         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2098                         pwr_pmu_idle_ctrl_filter_m(),
2099                         pwr_pmu_idle_ctrl_value_busy_f() |
2100                         pwr_pmu_idle_ctrl_filter_disabled_f());
2101         gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data);
2102
2103         /* use counter #6 for total cycles */
2104         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6));
2105         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2106                         pwr_pmu_idle_ctrl_filter_m(),
2107                         pwr_pmu_idle_ctrl_value_always_f() |
2108                         pwr_pmu_idle_ctrl_filter_disabled_f());
2109         gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data);
2110
2111         /*
2112          * We don't want to disturb counters #3 and #6, which are used by
2113          * perfmon, so we add wiring also to counters #1 and #2 for
2114          * exposing raw counter readings.
2115          */
2116         gk20a_writel(g, pwr_pmu_idle_mask_r(1),
2117                 pwr_pmu_idle_mask_gr_enabled_f() |
2118                 pwr_pmu_idle_mask_ce_2_enabled_f());
2119
2120         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1));
2121         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2122                         pwr_pmu_idle_ctrl_filter_m(),
2123                         pwr_pmu_idle_ctrl_value_busy_f() |
2124                         pwr_pmu_idle_ctrl_filter_disabled_f());
2125         gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data);
2126
2127         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2));
2128         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2129                         pwr_pmu_idle_ctrl_filter_m(),
2130                         pwr_pmu_idle_ctrl_value_always_f() |
2131                         pwr_pmu_idle_ctrl_filter_disabled_f());
2132         gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
2133
2134         if (!pmu->sample_buffer)
2135                 err = pmu->dmem.alloc(&pmu->dmem,
2136                                       &pmu->sample_buffer, 2 * sizeof(u16));
2137         if (err) {
2138                 gk20a_err(dev_from_gk20a(g),
2139                         "failed to allocate perfmon sample buffer");
2140                 return -ENOMEM;
2141         }
2142
2143         /* init PERFMON */
2144         memset(&cmd, 0, sizeof(struct pmu_cmd));
2145         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2146         cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size();
2147         cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT;
2148         /* buffer to save counter values for pmu perfmon */
2149         pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon,
2150         (u16)pmu->sample_buffer);
2151         /* number of sample periods below lower threshold
2152            before pmu triggers perfmon decrease event
2153            TBD: = 15 */
2154         pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15);
2155         /* index of base counter, aka. always ticking counter */
2156         pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6);
2157         /* microseconds interval between pmu polls perf counters */
2158         pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700);
2159         /* number of perfmon counters
2160            counter #3 (GR and CE2) for gk20a */
2161         pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1);
2162         /* moving average window for sample periods
2163            TBD: = 3000000 / sample_period_us = 17 */
2164         pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17);
2165
2166         memset(&payload, 0, sizeof(struct pmu_payload));
2167         payload.in.buf = &pmu->perfmon_counter;
2168         payload.in.size = sizeof(struct pmu_perfmon_counter);
2169         payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC);
2170
2171         gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_INIT");
2172         gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
2173                         NULL, NULL, &seq, ~0);
2174
2175         return 0;
2176 }
2177
2178 static int pmu_process_init_msg(struct pmu_gk20a *pmu,
2179                         struct pmu_msg *msg)
2180 {
2181         struct gk20a *g = pmu->g;
2182         struct pmu_v *pv = &g->ops.pmu_ver;
2183         union pmu_init_msg_pmu *init;
2184         struct pmu_sha1_gid_data gid_data;
2185         u32 i, tail = 0;
2186         gk20a_dbg_pmu("init received\n");
2187
2188         tail = pwr_pmu_msgq_tail_val_v(
2189                 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
2190
2191         pmu_copy_from_dmem(pmu, tail,
2192                 (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0);
2193
2194         if (msg->hdr.unit_id != PMU_UNIT_INIT) {
2195                 gk20a_err(dev_from_gk20a(g),
2196                         "expecting init msg");
2197                 return -EINVAL;
2198         }
2199
2200         pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE,
2201                 (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0);
2202
2203         if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) {
2204                 gk20a_err(dev_from_gk20a(g),
2205                         "expecting init msg");
2206                 return -EINVAL;
2207         }
2208
2209         tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT);
2210         gk20a_writel(g, pwr_pmu_msgq_tail_r(),
2211                 pwr_pmu_msgq_tail_val_f(tail));
2212
2213         init = pv->get_pmu_msg_pmu_init_msg_ptr(&(msg->msg.init));
2214         if (!pmu->gid_info.valid) {
2215
2216                 pmu_copy_from_dmem(pmu,
2217                         pv->get_pmu_init_msg_pmu_sw_mg_off(init),
2218                         (u8 *)&gid_data,
2219                         sizeof(struct pmu_sha1_gid_data), 0);
2220
2221                 pmu->gid_info.valid =
2222                         (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE);
2223
2224                 if (pmu->gid_info.valid) {
2225
2226                         BUG_ON(sizeof(pmu->gid_info.gid) !=
2227                                 sizeof(gid_data.gid));
2228
2229                         memcpy(pmu->gid_info.gid, gid_data.gid,
2230                                 sizeof(pmu->gid_info.gid));
2231                 }
2232         }
2233
2234         for (i = 0; i < PMU_QUEUE_COUNT; i++)
2235                 pmu_queue_init(pmu, i, init);
2236
2237         if (!pmu->dmem.alloc)
2238                 gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem",
2239                                 pv->get_pmu_init_msg_pmu_sw_mg_off(init),
2240                                 pv->get_pmu_init_msg_pmu_sw_mg_size(init),
2241                                 PMU_DMEM_ALLOC_ALIGNMENT);
2242
2243         pmu->pmu_ready = true;
2244         pmu->pmu_state = PMU_STATE_INIT_RECEIVED;
2245         schedule_work(&pmu->pg_init);
2246         gk20a_dbg_pmu("init received end\n");
2247
2248         return 0;
2249 }
2250
2251 static bool pmu_read_message(struct pmu_gk20a *pmu, struct pmu_queue *queue,
2252                         struct pmu_msg *msg, int *status)
2253 {
2254         struct gk20a *g = pmu->g;
2255         u32 read_size, bytes_read;
2256         int err;
2257
2258         *status = 0;
2259
2260         if (pmu_queue_is_empty(pmu, queue))
2261                 return false;
2262
2263         err = pmu_queue_open_read(pmu, queue);
2264         if (err) {
2265                 gk20a_err(dev_from_gk20a(g),
2266                         "fail to open queue %d for read", queue->id);
2267                 *status = err;
2268                 return false;
2269         }
2270
2271         err = pmu_queue_pop(pmu, queue, &msg->hdr,
2272                         PMU_MSG_HDR_SIZE, &bytes_read);
2273         if (err || bytes_read != PMU_MSG_HDR_SIZE) {
2274                 gk20a_err(dev_from_gk20a(g),
2275                         "fail to read msg from queue %d", queue->id);
2276                 *status = err | -EINVAL;
2277                 goto clean_up;
2278         }
2279
2280         if (msg->hdr.unit_id == PMU_UNIT_REWIND) {
2281                 pmu_queue_rewind(pmu, queue);
2282                 /* read again after rewind */
2283                 err = pmu_queue_pop(pmu, queue, &msg->hdr,
2284                                 PMU_MSG_HDR_SIZE, &bytes_read);
2285                 if (err || bytes_read != PMU_MSG_HDR_SIZE) {
2286                         gk20a_err(dev_from_gk20a(g),
2287                                 "fail to read msg from queue %d", queue->id);
2288                         *status = err | -EINVAL;
2289                         goto clean_up;
2290                 }
2291         }
2292
2293         if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) {
2294                 gk20a_err(dev_from_gk20a(g),
2295                         "read invalid unit_id %d from queue %d",
2296                         msg->hdr.unit_id, queue->id);
2297                         *status = -EINVAL;
2298                         goto clean_up;
2299         }
2300
2301         if (msg->hdr.size > PMU_MSG_HDR_SIZE) {
2302                 read_size = msg->hdr.size - PMU_MSG_HDR_SIZE;
2303                 err = pmu_queue_pop(pmu, queue, &msg->msg,
2304                         read_size, &bytes_read);
2305                 if (err || bytes_read != read_size) {
2306                         gk20a_err(dev_from_gk20a(g),
2307                                 "fail to read msg from queue %d", queue->id);
2308                         *status = err;
2309                         goto clean_up;
2310                 }
2311         }
2312
2313         err = pmu_queue_close(pmu, queue, true);
2314         if (err) {
2315                 gk20a_err(dev_from_gk20a(g),
2316                         "fail to close queue %d", queue->id);
2317                 *status = err;
2318                 return false;
2319         }
2320
2321         return true;
2322
2323 clean_up:
2324         err = pmu_queue_close(pmu, queue, false);
2325         if (err)
2326                 gk20a_err(dev_from_gk20a(g),
2327                         "fail to close queue %d", queue->id);
2328         return false;
2329 }
2330
2331 static int pmu_response_handle(struct pmu_gk20a *pmu,
2332                         struct pmu_msg *msg)
2333 {
2334         struct gk20a *g = pmu->g;
2335         struct pmu_sequence *seq;
2336         struct pmu_v *pv = &g->ops.pmu_ver;
2337         int ret = 0;
2338
2339         gk20a_dbg_fn("");
2340
2341         seq = &pmu->seq[msg->hdr.seq_id];
2342         if (seq->state != PMU_SEQ_STATE_USED &&
2343             seq->state != PMU_SEQ_STATE_CANCELLED) {
2344                 gk20a_err(dev_from_gk20a(g),
2345                         "msg for an unknown sequence %d", seq->id);
2346                 return -EINVAL;
2347         }
2348
2349         if (msg->hdr.unit_id == PMU_UNIT_RC &&
2350             msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) {
2351                 gk20a_err(dev_from_gk20a(g),
2352                         "unhandled cmd: seq %d", seq->id);
2353         }
2354         else if (seq->state != PMU_SEQ_STATE_CANCELLED) {
2355                 if (seq->msg) {
2356                         if (seq->msg->hdr.size >= msg->hdr.size) {
2357                                 memcpy(seq->msg, msg, msg->hdr.size);
2358                                 if (pv->pmu_allocation_get_dmem_size(pmu,
2359                                 pv->get_pmu_seq_out_a_ptr(seq)) != 0) {
2360                                         pmu_copy_from_dmem(pmu,
2361                                         pv->pmu_allocation_get_dmem_offset(pmu,
2362                                         pv->get_pmu_seq_out_a_ptr(seq)),
2363                                         seq->out_payload,
2364                                         pv->pmu_allocation_get_dmem_size(pmu,
2365                                         pv->get_pmu_seq_out_a_ptr(seq)), 0);
2366                                 }
2367                         } else {
2368                                 gk20a_err(dev_from_gk20a(g),
2369                                         "sequence %d msg buffer too small",
2370                                         seq->id);
2371                         }
2372                 }
2373         } else
2374                 seq->callback = NULL;
2375         if (pv->pmu_allocation_get_dmem_size(pmu,
2376                         pv->get_pmu_seq_in_a_ptr(seq)) != 0)
2377                 pmu->dmem.free(&pmu->dmem,
2378                 pv->pmu_allocation_get_dmem_offset(pmu,
2379                 pv->get_pmu_seq_in_a_ptr(seq)),
2380                 pv->pmu_allocation_get_dmem_size(pmu,
2381                 pv->get_pmu_seq_in_a_ptr(seq)));
2382         if (pv->pmu_allocation_get_dmem_size(pmu,
2383                         pv->get_pmu_seq_out_a_ptr(seq)) != 0)
2384                 pmu->dmem.free(&pmu->dmem,
2385                 pv->pmu_allocation_get_dmem_offset(pmu,
2386                 pv->get_pmu_seq_out_a_ptr(seq)),
2387                 pv->pmu_allocation_get_dmem_size(pmu,
2388                 pv->get_pmu_seq_out_a_ptr(seq)));
2389
2390         if (seq->callback)
2391                 seq->callback(g, msg, seq->cb_params, seq->desc, ret);
2392
2393         pmu_seq_release(pmu, seq);
2394
2395         /* TBD: notify client waiting for available dmem */
2396
2397         gk20a_dbg_fn("done");
2398
2399         return 0;
2400 }
2401
2402 static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
2403                                  u32 *var, u32 val);
2404
2405 static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg,
2406                         void *param, u32 handle, u32 status)
2407 {
2408         struct pmu_gk20a *pmu = param;
2409         gk20a_dbg_pmu("reply ZBC_TABLE_UPDATE");
2410         pmu->zbc_save_done = 1;
2411 }
2412
2413 void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
2414 {
2415         struct pmu_gk20a *pmu = &g->pmu;
2416         struct pmu_cmd cmd;
2417         u32 seq;
2418
2419         if (!pmu->pmu_ready || !entries || !pmu->zbc_ready)
2420                 return;
2421
2422         memset(&cmd, 0, sizeof(struct pmu_cmd));
2423         cmd.hdr.unit_id = PMU_UNIT_PG;
2424         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_zbc_cmd);
2425         cmd.cmd.zbc.cmd_type = g->ops.pmu_ver.cmd_id_zbc_table_update;
2426         cmd.cmd.zbc.entry_mask = ZBC_MASK(entries);
2427
2428         pmu->zbc_save_done = 0;
2429
2430         gk20a_dbg_pmu("cmd post ZBC_TABLE_UPDATE");
2431         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2432                            pmu_handle_zbc_msg, pmu, &seq, ~0);
2433         pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
2434                               &pmu->zbc_save_done, 1);
2435         if (!pmu->zbc_save_done)
2436                 gk20a_err(dev_from_gk20a(g), "ZBC save timeout");
2437 }
2438
2439 static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu)
2440 {
2441         struct gk20a *g = pmu->g;
2442         struct pmu_v *pv = &g->ops.pmu_ver;
2443         struct pmu_cmd cmd;
2444         struct pmu_payload payload;
2445         u32 current_rate = 0;
2446         u32 seq;
2447
2448         /* PERFMON Start */
2449         memset(&cmd, 0, sizeof(struct pmu_cmd));
2450         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2451         cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size();
2452         pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon,
2453                 PMU_PERFMON_CMD_ID_START);
2454         pv->perfmon_start_set_group_id(&cmd.cmd.perfmon,
2455                 PMU_DOMAIN_GROUP_PSTATE);
2456         pv->perfmon_start_set_state_id(&cmd.cmd.perfmon,
2457                 pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);
2458
2459         current_rate = rate_gpu_to_gpc2clk(gk20a_clk_get_rate(g));
2460         if (current_rate >= gpc_pll_params.max_freq)
2461                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2462                 PMU_PERFMON_FLAG_ENABLE_DECREASE);
2463         else if (current_rate <= gpc_pll_params.min_freq)
2464                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2465                 PMU_PERFMON_FLAG_ENABLE_INCREASE);
2466         else
2467                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2468                 PMU_PERFMON_FLAG_ENABLE_INCREASE |
2469                 PMU_PERFMON_FLAG_ENABLE_DECREASE);
2470
2471         pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2472                 pv->perfmon_start_get_flags(&cmd.cmd.perfmon) |
2473                 PMU_PERFMON_FLAG_CLEAR_PREV);
2474
2475         memset(&payload, 0, sizeof(struct pmu_payload));
2476
2477         /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */
2478         pmu->perfmon_counter.upper_threshold = 3000; /* 30% */
2479         /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */
2480         pmu->perfmon_counter.lower_threshold = 1000; /* 10% */
2481         pmu->perfmon_counter.valid = true;
2482
2483         payload.in.buf = &pmu->perfmon_counter;
2484         payload.in.size = sizeof(pmu->perfmon_counter);
2485         payload.in.offset =
2486                 pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC);
2487
2488         gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_START");
2489         gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
2490                         NULL, NULL, &seq, ~0);
2491
2492         return 0;
2493 }
2494
2495 static int pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu)
2496 {
2497         struct gk20a *g = pmu->g;
2498         struct pmu_cmd cmd;
2499         u32 seq;
2500
2501         /* PERFMON Stop */
2502         memset(&cmd, 0, sizeof(struct pmu_cmd));
2503         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2504         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop);
2505         cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP;
2506
2507         gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_STOP");
2508         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
2509                         NULL, NULL, &seq, ~0);
2510         return 0;
2511 }
2512
2513 static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu,
2514                         struct pmu_perfmon_msg *msg)
2515 {
2516         struct gk20a *g = pmu->g;
2517         u32 rate;
2518
2519         gk20a_dbg_fn("");
2520
2521         switch (msg->msg_type) {
2522         case PMU_PERFMON_MSG_ID_INCREASE_EVENT:
2523                 gk20a_dbg_pmu("perfmon increase event: "
2524                         "state_id %d, ground_id %d, pct %d",
2525                         msg->gen.state_id, msg->gen.group_id, msg->gen.data);
2526                 /* increase gk20a clock freq by 20% */
2527                 rate = gk20a_clk_get_rate(g);
2528                 gk20a_clk_set_rate(g, rate * 6 / 5);
2529                 break;
2530         case PMU_PERFMON_MSG_ID_DECREASE_EVENT:
2531                 gk20a_dbg_pmu("perfmon decrease event: "
2532                         "state_id %d, ground_id %d, pct %d",
2533                         msg->gen.state_id, msg->gen.group_id, msg->gen.data);
2534                 /* decrease gk20a clock freq by 10% */
2535                 rate = gk20a_clk_get_rate(g);
2536                 gk20a_clk_set_rate(g, (rate / 10) * 7);
2537                 break;
2538         case PMU_PERFMON_MSG_ID_INIT_EVENT:
2539                 pmu->perfmon_ready = 1;
2540                 gk20a_dbg_pmu("perfmon init event");
2541                 break;
2542         default:
2543                 break;
2544         }
2545
2546         /* restart sampling */
2547         if (IS_ENABLED(CONFIG_GK20A_PERFMON))
2548                 return pmu_perfmon_start_sampling(pmu);
2549         return 0;
2550 }
2551
2552
2553 static int pmu_handle_event(struct pmu_gk20a *pmu, struct pmu_msg *msg)
2554 {
2555         int err;
2556
2557         gk20a_dbg_fn("");
2558
2559         switch (msg->hdr.unit_id) {
2560         case PMU_UNIT_PERFMON:
2561                 err = pmu_handle_perfmon_event(pmu, &msg->msg.perfmon);
2562                 break;
2563         default:
2564                 break;
2565         }
2566
2567         return err;
2568 }
2569
2570 static int pmu_process_message(struct pmu_gk20a *pmu)
2571 {
2572         struct pmu_msg msg;
2573         int status;
2574
2575         if (unlikely(!pmu->pmu_ready)) {
2576                 pmu_process_init_msg(pmu, &msg);
2577                 pmu_init_perfmon(pmu);
2578                 return 0;
2579         }
2580
2581         while (pmu_read_message(pmu,
2582                 &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) {
2583
2584                 gk20a_dbg_pmu("read msg hdr: "
2585                                 "unit_id = 0x%08x, size = 0x%08x, "
2586                                 "ctrl_flags = 0x%08x, seq_id = 0x%08x",
2587                                 msg.hdr.unit_id, msg.hdr.size,
2588                                 msg.hdr.ctrl_flags, msg.hdr.seq_id);
2589
2590                 msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK;
2591
2592                 if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) {
2593                         pmu_handle_event(pmu, &msg);
2594                 } else {
2595                         pmu_response_handle(pmu, &msg);
2596                 }
2597         }
2598
2599         return 0;
2600 }
2601
2602 static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
2603                                  u32 *var, u32 val)
2604 {
2605         struct gk20a *g = pmu->g;
2606         unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
2607         unsigned long delay = GR_IDLE_CHECK_DEFAULT;
2608
2609         do {
2610                 if (*var == val)
2611                         return 0;
2612
2613                 if (gk20a_readl(g, pwr_falcon_irqstat_r()))
2614                         gk20a_pmu_isr(g);
2615
2616                 usleep_range(delay, delay * 2);
2617                 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
2618         } while (time_before(jiffies, end_jiffies) ||
2619                         !tegra_platform_is_silicon());
2620
2621         return -ETIMEDOUT;
2622 }
2623
2624 static void pmu_dump_elpg_stats(struct pmu_gk20a *pmu)
2625 {
2626         struct gk20a *g = pmu->g;
2627         struct pmu_pg_stats stats;
2628
2629         pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
2630                 (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
2631
2632         gk20a_dbg_pmu("pg_entry_start_timestamp : 0x%016llx",
2633                 stats.pg_entry_start_timestamp);
2634         gk20a_dbg_pmu("pg_exit_start_timestamp : 0x%016llx",
2635                 stats.pg_exit_start_timestamp);
2636         gk20a_dbg_pmu("pg_ingating_start_timestamp : 0x%016llx",
2637                 stats.pg_ingating_start_timestamp);
2638         gk20a_dbg_pmu("pg_ungating_start_timestamp : 0x%016llx",
2639                 stats.pg_ungating_start_timestamp);
2640         gk20a_dbg_pmu("pg_avg_entry_time_us : 0x%08x",
2641                 stats.pg_avg_entry_time_us);
2642         gk20a_dbg_pmu("pg_avg_exit_time_us : 0x%08x",
2643                 stats.pg_avg_exit_time_us);
2644         gk20a_dbg_pmu("pg_ingating_cnt : 0x%08x",
2645                 stats.pg_ingating_cnt);
2646         gk20a_dbg_pmu("pg_ingating_time_us : 0x%08x",
2647                 stats.pg_ingating_time_us);
2648         gk20a_dbg_pmu("pg_ungating_count : 0x%08x",
2649                 stats.pg_ungating_count);
2650         gk20a_dbg_pmu("pg_ungating_time_us 0x%08x: ",
2651                 stats.pg_ungating_time_us);
2652         gk20a_dbg_pmu("pg_gating_cnt : 0x%08x",
2653                 stats.pg_gating_cnt);
2654         gk20a_dbg_pmu("pg_gating_deny_cnt : 0x%08x",
2655                 stats.pg_gating_deny_cnt);
2656
2657         /*
2658            Turn on PG_DEBUG in ucode and locate symbol "ElpgLog" offset
2659            in .nm file, e.g. 0x1000066c. use 0x66c.
2660         u32 i, val[20];
2661         pmu_copy_from_dmem(pmu, 0x66c,
2662                 (u8 *)val, sizeof(val), 0);
2663         gk20a_dbg_pmu("elpg log begin");
2664         for (i = 0; i < 20; i++)
2665                 gk20a_dbg_pmu("0x%08x", val[i]);
2666         gk20a_dbg_pmu("elpg log end");
2667         */
2668
2669         gk20a_dbg_pmu("pwr_pmu_idle_mask_supp_r(3): 0x%08x",
2670                 gk20a_readl(g, pwr_pmu_idle_mask_supp_r(3)));
2671         gk20a_dbg_pmu("pwr_pmu_idle_mask_1_supp_r(3): 0x%08x",
2672                 gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(3)));
2673         gk20a_dbg_pmu("pwr_pmu_idle_ctrl_supp_r(3): 0x%08x",
2674                 gk20a_readl(g, pwr_pmu_idle_ctrl_supp_r(3)));
2675         gk20a_dbg_pmu("pwr_pmu_pg_idle_cnt_r(0): 0x%08x",
2676                 gk20a_readl(g, pwr_pmu_pg_idle_cnt_r(0)));
2677         gk20a_dbg_pmu("pwr_pmu_pg_intren_r(0): 0x%08x",
2678                 gk20a_readl(g, pwr_pmu_pg_intren_r(0)));
2679
2680         gk20a_dbg_pmu("pwr_pmu_idle_count_r(3): 0x%08x",
2681                 gk20a_readl(g, pwr_pmu_idle_count_r(3)));
2682         gk20a_dbg_pmu("pwr_pmu_idle_count_r(4): 0x%08x",
2683                 gk20a_readl(g, pwr_pmu_idle_count_r(4)));
2684         gk20a_dbg_pmu("pwr_pmu_idle_count_r(7): 0x%08x",
2685                 gk20a_readl(g, pwr_pmu_idle_count_r(7)));
2686
2687         /*
2688          TBD: script can't generate those registers correctly
2689         gk20a_dbg_pmu("pwr_pmu_idle_status_r(): 0x%08x",
2690                 gk20a_readl(g, pwr_pmu_idle_status_r()));
2691         gk20a_dbg_pmu("pwr_pmu_pg_ctrl_r(): 0x%08x",
2692                 gk20a_readl(g, pwr_pmu_pg_ctrl_r()));
2693         */
2694 }
2695
2696 static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu)
2697 {
2698         struct gk20a *g = pmu->g;
2699         int i;
2700
2701         gk20a_err(dev_from_gk20a(g), "pwr_falcon_os_r : %d",
2702                 gk20a_readl(g, pwr_falcon_os_r()));
2703         gk20a_err(dev_from_gk20a(g), "pwr_falcon_cpuctl_r : 0x%x",
2704                 gk20a_readl(g, pwr_falcon_cpuctl_r()));
2705         gk20a_err(dev_from_gk20a(g), "pwr_falcon_idlestate_r : 0x%x",
2706                 gk20a_readl(g, pwr_falcon_idlestate_r()));
2707         gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox0_r : 0x%x",
2708                 gk20a_readl(g, pwr_falcon_mailbox0_r()));
2709         gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox1_r : 0x%x",
2710                 gk20a_readl(g, pwr_falcon_mailbox1_r()));
2711         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqstat_r : 0x%x",
2712                 gk20a_readl(g, pwr_falcon_irqstat_r()));
2713         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmode_r : 0x%x",
2714                 gk20a_readl(g, pwr_falcon_irqmode_r()));
2715         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmask_r : 0x%x",
2716                 gk20a_readl(g, pwr_falcon_irqmask_r()));
2717         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqdest_r : 0x%x",
2718                 gk20a_readl(g, pwr_falcon_irqdest_r()));
2719
2720         for (i = 0; i < pwr_pmu_mailbox__size_1_v(); i++)
2721                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_mailbox_r(%d) : 0x%x",
2722                         i, gk20a_readl(g, pwr_pmu_mailbox_r(i)));
2723
2724         for (i = 0; i < pwr_pmu_debug__size_1_v(); i++)
2725                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_debug_r(%d) : 0x%x",
2726                         i, gk20a_readl(g, pwr_pmu_debug_r(i)));
2727
2728         for (i = 0; i < 6/*NV_PPWR_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) {
2729                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2730                         pwr_pmu_falcon_icd_cmd_opc_rstat_f() |
2731                         pwr_pmu_falcon_icd_cmd_idx_f(i));
2732                 gk20a_err(dev_from_gk20a(g), "pmu_rstat (%d) : 0x%x",
2733                         i, gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2734         }
2735
2736         i = gk20a_readl(g, pwr_pmu_bar0_error_status_r());
2737         gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_error_status_r : 0x%x", i);
2738         if (i != 0) {
2739                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_addr_r : 0x%x",
2740                         gk20a_readl(g, pwr_pmu_bar0_addr_r()));
2741                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_data_r : 0x%x",
2742                         gk20a_readl(g, pwr_pmu_bar0_data_r()));
2743                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_timeout_r : 0x%x",
2744                         gk20a_readl(g, pwr_pmu_bar0_timeout_r()));
2745                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_ctl_r : 0x%x",
2746                         gk20a_readl(g, pwr_pmu_bar0_ctl_r()));
2747         }
2748
2749         i = gk20a_readl(g, pwr_pmu_bar0_fecs_error_r());
2750         gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_fecs_error_r : 0x%x", i);
2751
2752         i = gk20a_readl(g, pwr_falcon_exterrstat_r());
2753         gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterrstat_r : 0x%x", i);
2754         if (pwr_falcon_exterrstat_valid_v(i) ==
2755                         pwr_falcon_exterrstat_valid_true_v()) {
2756                 gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterraddr_r : 0x%x",
2757                         gk20a_readl(g, pwr_falcon_exterraddr_r()));
2758                 gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x",
2759                         gk20a_readl(g, mc_enable_r()));
2760         }
2761
2762         gk20a_err(dev_from_gk20a(g), "pwr_falcon_engctl_r : 0x%x",
2763                 gk20a_readl(g, pwr_falcon_engctl_r()));
2764         gk20a_err(dev_from_gk20a(g), "pwr_falcon_curctx_r : 0x%x",
2765                 gk20a_readl(g, pwr_falcon_curctx_r()));
2766         gk20a_err(dev_from_gk20a(g), "pwr_falcon_nxtctx_r : 0x%x",
2767                 gk20a_readl(g, pwr_falcon_nxtctx_r()));
2768
2769         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2770                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2771                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_IMB));
2772         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_IMB : 0x%x",
2773                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2774
2775         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2776                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2777                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_DMB));
2778         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_DMB : 0x%x",
2779                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2780
2781         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2782                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2783                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CSW));
2784         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CSW : 0x%x",
2785                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2786
2787         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2788                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2789                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CTX));
2790         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CTX : 0x%x",
2791                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2792
2793         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2794                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2795                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_EXCI));
2796         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_EXCI : 0x%x",
2797                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2798
2799         for (i = 0; i < 4; i++) {
2800                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2801                         pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2802                         pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_PC));
2803                 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_PC : 0x%x",
2804                         gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2805
2806                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2807                         pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2808                         pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_SP));
2809                 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_SP : 0x%x",
2810                         gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2811         }
2812         gk20a_err(dev_from_gk20a(g), "elpg stat: %d\n",
2813                         pmu->elpg_stat);
2814
2815         /* PMU may crash due to FECS crash. Dump FECS status */
2816         gk20a_fecs_dump_falcon_stats(g);
2817 }
2818
2819 void gk20a_pmu_isr(struct gk20a *g)
2820 {
2821         struct pmu_gk20a *pmu = &g->pmu;
2822         struct pmu_queue *queue;
2823         u32 intr, mask;
2824         bool recheck = false;
2825
2826         gk20a_dbg_fn("");
2827
2828         mutex_lock(&pmu->isr_enable_lock);
2829         if (!pmu->isr_enabled) {
2830                 mutex_unlock(&pmu->isr_enable_lock);
2831                 return;
2832         }
2833
2834         mutex_lock(&pmu->isr_mutex);
2835
2836         mask = gk20a_readl(g, pwr_falcon_irqmask_r()) &
2837                 gk20a_readl(g, pwr_falcon_irqdest_r());
2838
2839         intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask;
2840
2841         gk20a_dbg_pmu("received falcon interrupt: 0x%08x", intr);
2842
2843         if (!intr || pmu->pmu_state == PMU_STATE_OFF) {
2844                 gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);
2845                 mutex_unlock(&pmu->isr_mutex);
2846                 mutex_unlock(&pmu->isr_enable_lock);
2847                 return;
2848         }
2849
2850         if (intr & pwr_falcon_irqstat_halt_true_f()) {
2851                 gk20a_err(dev_from_gk20a(g),
2852                         "pmu halt intr not implemented");
2853                 pmu_dump_falcon_stats(pmu);
2854         }
2855         if (intr & pwr_falcon_irqstat_exterr_true_f()) {
2856                 gk20a_err(dev_from_gk20a(g),
2857                         "pmu exterr intr not implemented. Clearing interrupt.");
2858                 pmu_dump_falcon_stats(pmu);
2859
2860                 gk20a_writel(g, pwr_falcon_exterrstat_r(),
2861                         gk20a_readl(g, pwr_falcon_exterrstat_r()) &
2862                                 ~pwr_falcon_exterrstat_valid_m());
2863         }
2864         if (intr & pwr_falcon_irqstat_swgen0_true_f()) {
2865                 pmu_process_message(pmu);
2866                 recheck = true;
2867         }
2868
2869         gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);
2870
2871         if (recheck) {
2872                 queue = &pmu->queue[PMU_MESSAGE_QUEUE];
2873                 if (!pmu_queue_is_empty(pmu, queue))
2874                         gk20a_writel(g, pwr_falcon_irqsset_r(),
2875                                 pwr_falcon_irqsset_swgen0_set_f());
2876         }
2877
2878         mutex_unlock(&pmu->isr_mutex);
2879         mutex_unlock(&pmu->isr_enable_lock);
2880 }
2881
2882 static bool pmu_validate_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
2883                         struct pmu_msg *msg, struct pmu_payload *payload,
2884                         u32 queue_id)
2885 {
2886         struct gk20a *g = pmu->g;
2887         struct pmu_queue *queue;
2888         u32 in_size, out_size;
2889
2890         if (!PMU_IS_SW_COMMAND_QUEUE(queue_id))
2891                 goto invalid_cmd;
2892
2893         queue = &pmu->queue[queue_id];
2894         if (cmd->hdr.size < PMU_CMD_HDR_SIZE)
2895                 goto invalid_cmd;
2896
2897         if (cmd->hdr.size > (queue->size >> 1))
2898                 goto invalid_cmd;
2899
2900         if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE)
2901                 goto invalid_cmd;
2902
2903         if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id))
2904                 goto invalid_cmd;
2905
2906         if (payload == NULL)
2907                 return true;
2908
2909         if (payload->in.buf == NULL && payload->out.buf == NULL)
2910                 goto invalid_cmd;
2911
2912         if ((payload->in.buf != NULL && payload->in.size == 0) ||
2913             (payload->out.buf != NULL && payload->out.size == 0))
2914                 goto invalid_cmd;
2915
2916         in_size = PMU_CMD_HDR_SIZE;
2917         if (payload->in.buf) {
2918                 in_size += payload->in.offset;
2919                 in_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
2920         }
2921
2922         out_size = PMU_CMD_HDR_SIZE;
2923         if (payload->out.buf) {
2924                 out_size += payload->out.offset;
2925                 out_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
2926         }
2927
2928         if (in_size > cmd->hdr.size || out_size > cmd->hdr.size)
2929                 goto invalid_cmd;
2930
2931
2932         if ((payload->in.offset != 0 && payload->in.buf == NULL) ||
2933             (payload->out.offset != 0 && payload->out.buf == NULL))
2934                 goto invalid_cmd;
2935
2936         return true;
2937
2938 invalid_cmd:
2939         gk20a_err(dev_from_gk20a(g), "invalid pmu cmd :\n"
2940                 "queue_id=%d,\n"
2941                 "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n"
2942                 "payload in=%p, in_size=%d, in_offset=%d,\n"
2943                 "payload out=%p, out_size=%d, out_offset=%d",
2944                 queue_id, cmd->hdr.size, cmd->hdr.unit_id,
2945                 msg, msg?msg->hdr.unit_id:~0,
2946                 &payload->in, payload->in.size, payload->in.offset,
2947                 &payload->out, payload->out.size, payload->out.offset);
2948
2949         return false;
2950 }
2951
2952 static int pmu_write_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
2953                         u32 queue_id, unsigned long timeout)
2954 {
2955         struct gk20a *g = pmu->g;
2956         struct pmu_queue *queue;
2957         unsigned long end_jiffies = jiffies +
2958                 msecs_to_jiffies(timeout);
2959         int err;
2960
2961         gk20a_dbg_fn("");
2962
2963         queue = &pmu->queue[queue_id];
2964
2965         do {
2966                 err = pmu_queue_open_write(pmu, queue, cmd->hdr.size);
2967                 if (err == -EAGAIN && time_before(jiffies, end_jiffies))
2968                         usleep_range(1000, 2000);
2969                 else
2970                         break;
2971         } while (1);
2972
2973         if (err)
2974                 goto clean_up;
2975
2976         pmu_queue_push(pmu, queue, cmd, cmd->hdr.size);
2977
2978         err = pmu_queue_close(pmu, queue, true);
2979
2980 clean_up:
2981         if (err)
2982                 gk20a_err(dev_from_gk20a(g),
2983                         "fail to write cmd to queue %d", queue_id);
2984         else
2985                 gk20a_dbg_fn("done");
2986
2987         return err;
2988 }
2989
2990 int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
2991                 struct pmu_msg *msg, struct pmu_payload *payload,
2992                 u32 queue_id, pmu_callback callback, void* cb_param,
2993                 u32 *seq_desc, unsigned long timeout)
2994 {
2995         struct pmu_gk20a *pmu = &g->pmu;
2996         struct pmu_v *pv = &g->ops.pmu_ver;
2997         struct pmu_sequence *seq;
2998         void *in = NULL, *out = NULL;
2999         int err;
3000
3001         gk20a_dbg_fn("");
3002
3003         BUG_ON(!cmd);
3004         BUG_ON(!seq_desc);
3005         BUG_ON(!pmu->pmu_ready);
3006
3007         if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id))
3008                 return -EINVAL;
3009
3010         err = pmu_seq_acquire(pmu, &seq);
3011         if (err)
3012                 return err;
3013
3014         cmd->hdr.seq_id = seq->id;
3015
3016         cmd->hdr.ctrl_flags = 0;
3017         cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS;
3018         cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR;
3019
3020         seq->callback = callback;
3021         seq->cb_params = cb_param;
3022         seq->msg = msg;
3023         seq->out_payload = NULL;
3024         seq->desc = pmu->next_seq_desc++;
3025
3026         if (payload)
3027                 seq->out_payload = payload->out.buf;
3028
3029         *seq_desc = seq->desc;
3030
3031         if (payload && payload->in.offset != 0) {
3032                 pv->set_pmu_allocation_ptr(pmu, &in,
3033                 ((u8 *)&cmd->cmd + payload->in.offset));
3034
3035                 if (payload->in.buf != payload->out.buf)
3036                         pv->pmu_allocation_set_dmem_size(pmu, in,
3037                         (u16)payload->in.size);
3038                 else
3039                         pv->pmu_allocation_set_dmem_size(pmu, in,
3040                         (u16)max(payload->in.size, payload->out.size));
3041
3042                 err = pmu->dmem.alloc(&pmu->dmem,
3043                 pv->pmu_allocation_get_dmem_offset_addr(pmu, in),
3044                 pv->pmu_allocation_get_dmem_size(pmu, in));
3045                 if (err)
3046                         goto clean_up;
3047
3048                 pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu,
3049                 in)),
3050                         payload->in.buf, payload->in.size, 0);
3051                 pv->pmu_allocation_set_dmem_size(pmu,
3052                 pv->get_pmu_seq_in_a_ptr(seq),
3053                 pv->pmu_allocation_get_dmem_size(pmu, in));
3054                 pv->pmu_allocation_set_dmem_offset(pmu,
3055                 pv->get_pmu_seq_in_a_ptr(seq),
3056                 pv->pmu_allocation_get_dmem_offset(pmu, in));
3057         }
3058
3059         if (payload && payload->out.offset != 0) {
3060                 pv->set_pmu_allocation_ptr(pmu, &out,
3061                 ((u8 *)&cmd->cmd + payload->out.offset));
3062                 pv->pmu_allocation_set_dmem_size(pmu, out,
3063                 (u16)payload->out.size);
3064
3065                 if (payload->out.buf != payload->in.buf) {
3066                         err = pmu->dmem.alloc(&pmu->dmem,
3067                         pv->pmu_allocation_get_dmem_offset_addr(pmu, out),
3068                         pv->pmu_allocation_get_dmem_size(pmu, out));
3069                         if (err)
3070                                 goto clean_up;
3071                 } else {
3072                         BUG_ON(in == NULL);
3073                         pv->pmu_allocation_set_dmem_offset(pmu, out,
3074                         pv->pmu_allocation_get_dmem_offset(pmu, in));
3075                 }
3076
3077                 pv->pmu_allocation_set_dmem_size(pmu,
3078                 pv->get_pmu_seq_out_a_ptr(seq),
3079                 pv->pmu_allocation_get_dmem_size(pmu, out));
3080                 pv->pmu_allocation_set_dmem_offset(pmu,
3081                 pv->get_pmu_seq_out_a_ptr(seq),
3082                 pv->pmu_allocation_get_dmem_offset(pmu, out));
3083         }
3084
3085         seq->state = PMU_SEQ_STATE_USED;
3086         err = pmu_write_cmd(pmu, cmd, queue_id, timeout);
3087         if (err)
3088                 seq->state = PMU_SEQ_STATE_PENDING;
3089
3090         gk20a_dbg_fn("done");
3091
3092         return 0;
3093
3094 clean_up:
3095         gk20a_dbg_fn("fail");
3096         if (in)
3097                 pmu->dmem.free(&pmu->dmem,
3098                 pv->pmu_allocation_get_dmem_offset(pmu, in),
3099                 pv->pmu_allocation_get_dmem_size(pmu, in));
3100         if (out)
3101                 pmu->dmem.free(&pmu->dmem,
3102                 pv->pmu_allocation_get_dmem_offset(pmu, out),
3103                 pv->pmu_allocation_get_dmem_size(pmu, out));
3104
3105         pmu_seq_release(pmu, seq);
3106         return err;
3107 }
3108
3109 static int gk20a_pmu_enable_elpg_locked(struct gk20a *g)
3110 {
3111         struct pmu_gk20a *pmu = &g->pmu;
3112         struct pmu_cmd cmd;
3113         u32 seq, status;
3114
3115         gk20a_dbg_fn("");
3116
3117         memset(&cmd, 0, sizeof(struct pmu_cmd));
3118         cmd.hdr.unit_id = PMU_UNIT_PG;
3119         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
3120         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
3121         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
3122         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_ALLOW;
3123
3124         /* no need to wait ack for ELPG enable but set pending to sync
3125            with follow up ELPG disable */
3126         pmu->elpg_stat = PMU_ELPG_STAT_ON_PENDING;
3127
3128         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_ALLOW");
3129         status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3130                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
3131
3132         BUG_ON(status != 0);
3133
3134         gk20a_dbg_fn("done");
3135         return 0;
3136 }
3137
3138 int gk20a_pmu_enable_elpg(struct gk20a *g)
3139 {
3140         struct pmu_gk20a *pmu = &g->pmu;
3141         struct gr_gk20a *gr = &g->gr;
3142
3143         int ret = 0;
3144
3145         gk20a_dbg_fn("");
3146
3147         mutex_lock(&pmu->elpg_mutex);
3148
3149         pmu->elpg_refcnt++;
3150         if (pmu->elpg_refcnt <= 0)
3151                 goto exit_unlock;
3152
3153         /* something is not right if we end up in following code path */
3154         if (unlikely(pmu->elpg_refcnt > 1)) {
3155                 gk20a_warn(dev_from_gk20a(g),
3156                 "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
3157                 __func__, pmu->elpg_refcnt);
3158                 WARN_ON(1);
3159         }
3160
3161         /* do NOT enable elpg until golden ctx is created,
3162            which is related with the ctx that ELPG save and restore. */
3163         if (unlikely(!gr->ctx_vars.golden_image_initialized))
3164                 goto exit_unlock;
3165
3166         /* return if ELPG is already on or on_pending or off_on_pending */
3167         if (pmu->elpg_stat != PMU_ELPG_STAT_OFF)
3168                 goto exit_unlock;
3169
3170         ret = gk20a_pmu_enable_elpg_locked(g);
3171
3172 exit_unlock:
3173         mutex_unlock(&pmu->elpg_mutex);
3174         gk20a_dbg_fn("done");
3175         return ret;
3176 }
3177
3178 int gk20a_pmu_disable_elpg(struct gk20a *g)
3179 {
3180         struct pmu_gk20a *pmu = &g->pmu;
3181         struct pmu_cmd cmd;
3182         u32 seq;
3183         int ret = 0;
3184
3185         gk20a_dbg_fn("");
3186
3187         mutex_lock(&pmu->elpg_mutex);
3188
3189         pmu->elpg_refcnt--;
3190         if (pmu->elpg_refcnt > 0) {
3191                 gk20a_warn(dev_from_gk20a(g),
3192                 "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
3193                 __func__, pmu->elpg_refcnt);
3194                 WARN_ON(1);
3195                 ret = 0;
3196                 goto exit_unlock;
3197         }
3198
3199         /* cancel off_on_pending and return */
3200         if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) {
3201                 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
3202                 ret = 0;
3203                 goto exit_reschedule;
3204         }
3205         /* wait if on_pending */
3206         else if (pmu->elpg_stat == PMU_ELPG_STAT_ON_PENDING) {
3207
3208                 pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
3209                                       &pmu->elpg_stat, PMU_ELPG_STAT_ON);
3210
3211                 if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
3212                         gk20a_err(dev_from_gk20a(g),
3213                                 "ELPG_ALLOW_ACK failed, elpg_stat=%d",
3214                                 pmu->elpg_stat);
3215                         pmu_dump_elpg_stats(pmu);
3216                         pmu_dump_falcon_stats(pmu);
3217                         ret = -EBUSY;
3218                         goto exit_unlock;
3219                 }
3220         }
3221         /* return if ELPG is already off */
3222         else if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
3223                 ret = 0;
3224                 goto exit_reschedule;
3225         }
3226
3227         memset(&cmd, 0, sizeof(struct pmu_cmd));
3228         cmd.hdr.unit_id = PMU_UNIT_PG;
3229         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
3230         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
3231         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
3232         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
3233
3234         pmu->elpg_stat = PMU_ELPG_STAT_OFF_PENDING;
3235
3236         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_DISALLOW");
3237         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3238                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
3239
3240         pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
3241                               &pmu->elpg_stat, PMU_ELPG_STAT_OFF);
3242         if (pmu->elpg_stat != PMU_ELPG_STAT_OFF) {
3243                 gk20a_err(dev_from_gk20a(g),
3244                         "ELPG_DISALLOW_ACK failed");
3245                 pmu_dump_elpg_stats(pmu);
3246                 pmu_dump_falcon_stats(pmu);
3247                 ret = -EBUSY;
3248                 goto exit_unlock;
3249         }
3250
3251 exit_reschedule:
3252 exit_unlock:
3253         mutex_unlock(&pmu->elpg_mutex);
3254         gk20a_dbg_fn("done");
3255         return ret;
3256 }
3257
3258 int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable)
3259 {
3260         struct pmu_gk20a *pmu = &g->pmu;
3261         int err;
3262
3263         gk20a_dbg_fn("");
3264
3265         if (enable)
3266                 err = pmu_perfmon_start_sampling(pmu);
3267         else
3268                 err = pmu_perfmon_stop_sampling(pmu);
3269
3270         return err;
3271 }
3272
3273 int gk20a_pmu_destroy(struct gk20a *g)
3274 {
3275         struct pmu_gk20a *pmu = &g->pmu;
3276         u32 elpg_ingating_time, elpg_ungating_time, gating_cnt;
3277
3278         gk20a_dbg_fn("");
3279
3280         if (!support_gk20a_pmu())
3281                 return 0;
3282
3283         /* make sure the pending operations are finished before we continue */
3284         cancel_work_sync(&pmu->pg_init);
3285
3286         gk20a_pmu_get_elpg_residency_gating(g, &elpg_ingating_time,
3287                 &elpg_ungating_time, &gating_cnt);
3288
3289         gk20a_pmu_disable_elpg(g);
3290         pmu->initialized = false;
3291
3292         /* update the s/w ELPG residency counters */
3293         g->pg_ingating_time_us += (u64)elpg_ingating_time;
3294         g->pg_ungating_time_us += (u64)elpg_ungating_time;
3295         g->pg_gating_cnt += gating_cnt;
3296
3297         mutex_lock(&pmu->isr_enable_lock);
3298         pmu_enable(pmu, false);
3299         pmu->isr_enabled = false;
3300         mutex_unlock(&pmu->isr_enable_lock);
3301
3302         pmu->pmu_state = PMU_STATE_OFF;
3303         pmu->pmu_ready = false;
3304         pmu->perfmon_ready = false;
3305         pmu->zbc_ready = false;
3306
3307         gk20a_dbg_fn("done");
3308         return 0;
3309 }
3310
3311 int gk20a_pmu_load_norm(struct gk20a *g, u32 *load)
3312 {
3313         struct pmu_gk20a *pmu = &g->pmu;
3314         u16 _load = 0;
3315
3316         if (!pmu->perfmon_ready) {
3317                 *load = 0;
3318                 return 0;
3319         }
3320
3321         pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0);
3322         *load = _load / 10;
3323
3324         return 0;
3325 }
3326
3327 void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
3328                                  u32 *total_cycles)
3329 {
3330         if (!g->power_on) {
3331                 *busy_cycles = 0;
3332                 *total_cycles = 0;
3333                 return;
3334         }
3335
3336         gk20a_busy(g->dev);
3337         *busy_cycles = pwr_pmu_idle_count_value_v(
3338                 gk20a_readl(g, pwr_pmu_idle_count_r(1)));
3339         rmb();
3340         *total_cycles = pwr_pmu_idle_count_value_v(
3341                 gk20a_readl(g, pwr_pmu_idle_count_r(2)));
3342         gk20a_idle(g->dev);
3343 }
3344
3345 void gk20a_pmu_reset_load_counters(struct gk20a *g)
3346 {
3347         u32 reg_val = pwr_pmu_idle_count_reset_f(1);
3348
3349         if (!g->power_on)
3350                 return;
3351
3352         gk20a_busy(g->dev);
3353         gk20a_writel(g, pwr_pmu_idle_count_r(2), reg_val);
3354         wmb();
3355         gk20a_writel(g, pwr_pmu_idle_count_r(1), reg_val);
3356         gk20a_idle(g->dev);
3357 }
3358
3359 static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
3360                         u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt)
3361 {
3362         struct pmu_gk20a *pmu = &g->pmu;
3363         struct pmu_pg_stats stats;
3364
3365         if (!pmu->initialized) {
3366                 *ingating_time = 0;
3367                 *ungating_time = 0;
3368                 *gating_cnt = 0;
3369                 return 0;
3370         }
3371
3372         pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
3373                 (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
3374
3375         *ingating_time = stats.pg_ingating_time_us;
3376         *ungating_time = stats.pg_ungating_time_us;
3377         *gating_cnt = stats.pg_gating_cnt;
3378
3379         return 0;
3380 }
3381
3382 /* Send an Adaptive Power (AP) related command to PMU */
3383 static int gk20a_pmu_ap_send_command(struct gk20a *g,
3384                         union pmu_ap_cmd *p_ap_cmd, bool b_block)
3385 {
3386         struct pmu_gk20a *pmu = &g->pmu;
3387         /* FIXME: where is the PG structure defined?? */
3388         u32 status = 0;
3389         struct pmu_cmd cmd;
3390         u32 seq;
3391         pmu_callback p_callback = NULL;
3392
3393         memset(&cmd, 0, sizeof(struct pmu_cmd));
3394
3395         /* Copy common members */
3396         cmd.hdr.unit_id = PMU_UNIT_PG;
3397         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(union pmu_ap_cmd);
3398
3399         cmd.cmd.pg.ap_cmd.cmn.cmd_type = PMU_PG_CMD_ID_AP;
3400         cmd.cmd.pg.ap_cmd.cmn.cmd_id = p_ap_cmd->cmn.cmd_id;
3401
3402         /* Copy other members of command */
3403         switch (p_ap_cmd->cmn.cmd_id) {
3404         case PMU_AP_CMD_ID_INIT:
3405                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_INIT");
3406                 cmd.cmd.pg.ap_cmd.init.pg_sampling_period_us =
3407                         p_ap_cmd->init.pg_sampling_period_us;
3408                 p_callback = ap_callback_init_and_enable_ctrl;
3409                 break;
3410
3411         case PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL:
3412                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL");
3413                 cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.ctrl_id =
3414                 p_ap_cmd->init_and_enable_ctrl.ctrl_id;
3415                 memcpy(
3416                 (void *)&(cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.params),
3417                         (void *)&(p_ap_cmd->init_and_enable_ctrl.params),
3418                         sizeof(struct pmu_ap_ctrl_init_params));
3419
3420                 p_callback = ap_callback_init_and_enable_ctrl;
3421                 break;
3422
3423         case PMU_AP_CMD_ID_ENABLE_CTRL:
3424                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_ENABLE_CTRL");
3425                 cmd.cmd.pg.ap_cmd.enable_ctrl.ctrl_id =
3426                         p_ap_cmd->enable_ctrl.ctrl_id;
3427                 break;
3428
3429         case PMU_AP_CMD_ID_DISABLE_CTRL:
3430                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_DISABLE_CTRL");
3431                 cmd.cmd.pg.ap_cmd.disable_ctrl.ctrl_id =
3432                         p_ap_cmd->disable_ctrl.ctrl_id;
3433                 break;
3434
3435         case PMU_AP_CMD_ID_KICK_CTRL:
3436                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_KICK_CTRL");
3437                 cmd.cmd.pg.ap_cmd.kick_ctrl.ctrl_id =
3438                         p_ap_cmd->kick_ctrl.ctrl_id;
3439                 cmd.cmd.pg.ap_cmd.kick_ctrl.skip_count =
3440                         p_ap_cmd->kick_ctrl.skip_count;
3441                 break;
3442
3443         default:
3444                 gk20a_dbg_pmu("%s: Invalid Adaptive Power command %d\n",
3445                         __func__, p_ap_cmd->cmn.cmd_id);
3446                 return 0x2f;
3447         }
3448
3449         status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3450                         p_callback, pmu, &seq, ~0);
3451
3452         if (!status) {
3453                 gk20a_dbg_pmu(
3454                         "%s: Unable to submit Adaptive Power Command %d\n",
3455                         __func__, p_ap_cmd->cmn.cmd_id);
3456                 goto err_return;
3457         }
3458
3459         /* TODO: Implement blocking calls (b_block) */
3460
3461 err_return:
3462         return status;
3463 }
3464
3465 static void ap_callback_init_and_enable_ctrl(
3466                 struct gk20a *g, struct pmu_msg *msg,
3467                 void *param, u32 seq_desc, u32 status)
3468 {
3469         /* Define p_ap (i.e pointer to pmu_ap structure) */
3470         WARN_ON(!msg);
3471
3472         if (!status) {
3473                 switch (msg->msg.pg.ap_msg.cmn.msg_id) {
3474                 case PMU_AP_MSG_ID_INIT_ACK:
3475                         gk20a_dbg_pmu("reply PMU_AP_CMD_ID_INIT");
3476                         break;
3477
3478                 default:
3479                         gk20a_dbg_pmu(
3480                         "%s: Invalid Adaptive Power Message: %x\n",
3481                         __func__, msg->msg.pg.ap_msg.cmn.msg_id);
3482                         break;
3483                 }
3484         }
3485 }
3486
3487 static int gk20a_aelpg_init(struct gk20a *g)
3488 {
3489         int status = 0;
3490
3491         /* Remove reliance on app_ctrl field. */
3492         union pmu_ap_cmd ap_cmd;
3493
3494         /* TODO: Check for elpg being ready? */
3495         ap_cmd.init.cmd_id = PMU_AP_CMD_ID_INIT;
3496         ap_cmd.init.pg_sampling_period_us =
3497                 APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US;
3498
3499         status = gk20a_pmu_ap_send_command(g, &ap_cmd, false);
3500         return status;
3501 }
3502
3503 static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id)
3504 {
3505         int status = 0;
3506         union pmu_ap_cmd ap_cmd;
3507
3508         /* TODO: Probably check if ELPG is ready? */
3509
3510         ap_cmd.init_and_enable_ctrl.cmd_id = PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL;
3511         ap_cmd.init_and_enable_ctrl.ctrl_id = ctrl_id;
3512         ap_cmd.init_and_enable_ctrl.params.min_idle_filter_us =
3513                 APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US;
3514         ap_cmd.init_and_enable_ctrl.params.min_target_saving_us =
3515                 APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US;
3516         ap_cmd.init_and_enable_ctrl.params.power_break_even_us =
3517                 APCTRL_POWER_BREAKEVEN_DEFAULT_US;
3518         ap_cmd.init_and_enable_ctrl.params.cycles_per_sample_max =
3519                 APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT;
3520
3521         switch (ctrl_id) {
3522         case PMU_AP_CTRL_ID_GRAPHICS:
3523                 break;
3524         default:
3525                 break;
3526         }
3527
3528         status = gk20a_pmu_ap_send_command(g, &ap_cmd, true);
3529         return status;
3530 }
3531
3532 #if CONFIG_DEBUG_FS
3533 static int elpg_residency_show(struct seq_file *s, void *data)
3534 {
3535         struct gk20a *g = s->private;
3536         u32 ingating_time = 0;
3537         u32 ungating_time = 0;
3538         u32 gating_cnt;
3539         u64 total_ingating, total_ungating, residency, divisor, dividend;
3540
3541         /* Don't unnecessarily power on the device */
3542         if (g->power_on) {
3543                 gk20a_busy(g->dev);
3544                 gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
3545                         &ungating_time, &gating_cnt);
3546                 gk20a_idle(g->dev);
3547         }
3548         total_ingating = g->pg_ingating_time_us + (u64)ingating_time;
3549         total_ungating = g->pg_ungating_time_us + (u64)ungating_time;
3550         divisor = total_ingating + total_ungating;
3551
3552         /* We compute the residency on a scale of 1000 */
3553         dividend = total_ingating * 1000;
3554
3555         if (divisor)
3556                 residency = div64_u64(dividend, divisor);
3557         else
3558                 residency = 0;
3559
3560         seq_printf(s, "Time in ELPG: %llu us\n"
3561                         "Time out of ELPG: %llu us\n"
3562                         "ELPG residency ratio: %llu\n",
3563                         total_ingating, total_ungating, residency);
3564         return 0;
3565
3566 }
3567
3568 static int elpg_residency_open(struct inode *inode, struct file *file)
3569 {
3570         return single_open(file, elpg_residency_show, inode->i_private);
3571 }
3572
3573 static const struct file_operations elpg_residency_fops = {
3574         .open           = elpg_residency_open,
3575         .read           = seq_read,
3576         .llseek         = seq_lseek,
3577         .release        = single_release,
3578 };
3579
3580 static int elpg_transitions_show(struct seq_file *s, void *data)
3581 {
3582         struct gk20a *g = s->private;
3583         u32 ingating_time, ungating_time, total_gating_cnt;
3584         u32 gating_cnt = 0;
3585
3586         if (g->power_on) {
3587                 gk20a_busy(g->dev);
3588                 gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
3589                         &ungating_time, &gating_cnt);
3590                 gk20a_idle(g->dev);
3591         }
3592         total_gating_cnt = g->pg_gating_cnt + gating_cnt;
3593
3594         seq_printf(s, "%u\n", total_gating_cnt);
3595         return 0;
3596
3597 }
3598
3599 static int elpg_transitions_open(struct inode *inode, struct file *file)
3600 {
3601         return single_open(file, elpg_transitions_show, inode->i_private);
3602 }
3603
3604 static const struct file_operations elpg_transitions_fops = {
3605         .open           = elpg_transitions_open,
3606         .read           = seq_read,
3607         .llseek         = seq_lseek,
3608         .release        = single_release,
3609 };
3610
3611 int gk20a_pmu_debugfs_init(struct platform_device *dev)
3612 {
3613         struct dentry *d;
3614         struct gk20a_platform *platform = platform_get_drvdata(dev);
3615         struct gk20a *g = get_gk20a(dev);
3616
3617         d = debugfs_create_file(
3618                 "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
3619                                                 &elpg_residency_fops);
3620         if (!d)
3621                 goto err_out;
3622
3623         d = debugfs_create_file(
3624                 "elpg_transitions", S_IRUGO, platform->debugfs, g,
3625                                                 &elpg_transitions_fops);
3626         if (!d)
3627                 goto err_out;
3628
3629         return 0;
3630
3631 err_out:
3632         pr_err("%s: Failed to make debugfs node\n", __func__);
3633         debugfs_remove_recursive(platform->debugfs);
3634         return -ENOMEM;
3635 }
3636 #endif