gpu: nvgpu: return error from mutex_acquire() if pmu not initialized
[linux-3.10.git] / drivers / gpu / nvgpu / gk20a / pmu_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/pmu_gk20a.c
3  *
4  * GK20A PMU (aka. gPMU outside gk20a context)
5  *
6  * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/delay.h>        /* for mdelay */
23 #include <linux/firmware.h>
24 #include <linux/clk.h>
25 #include <linux/module.h>
26 #include <linux/debugfs.h>
27 #include <linux/dma-mapping.h>
28
29 #include "gk20a.h"
30 #include "gr_gk20a.h"
31 #include "hw_mc_gk20a.h"
32 #include "hw_pwr_gk20a.h"
33 #include "hw_top_gk20a.h"
34
35 #define GK20A_PMU_UCODE_IMAGE   "gpmu_ucode.bin"
36
37 #define gk20a_dbg_pmu(fmt, arg...) \
38         gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
39
40 static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu);
41 static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
42                 u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt);
43 static void pmu_setup_hw(struct work_struct *work);
44 static void ap_callback_init_and_enable_ctrl(
45                 struct gk20a *g, struct pmu_msg *msg,
46                 void *param, u32 seq_desc, u32 status);
47 static int gk20a_pmu_ap_send_command(struct gk20a *g,
48                         union pmu_ap_cmd *p_ap_cmd, bool b_block);
49
50 static u32 pmu_cmdline_size_v0(struct pmu_gk20a *pmu)
51 {
52         return sizeof(struct pmu_cmdline_args_v0);
53 }
54
55 static u32 pmu_cmdline_size_v1(struct pmu_gk20a *pmu)
56 {
57         return sizeof(struct pmu_cmdline_args_v1);
58 }
59
60 static void set_pmu_cmdline_args_cpufreq_v1(struct pmu_gk20a *pmu, u32 freq)
61 {
62         pmu->args_v1.cpu_freq_hz = freq;
63 }
64
65 static void set_pmu_cmdline_args_cpufreq_v0(struct pmu_gk20a *pmu, u32 freq)
66 {
67         pmu->args_v0.cpu_freq_hz = freq;
68 }
69
70 static void *get_pmu_cmdline_args_ptr_v1(struct pmu_gk20a *pmu)
71 {
72         return (void *)(&pmu->args_v1);
73 }
74
75 static void *get_pmu_cmdline_args_ptr_v0(struct pmu_gk20a *pmu)
76 {
77         return (void *)(&pmu->args_v0);
78 }
79
80 static u32 get_pmu_allocation_size_v1(struct pmu_gk20a *pmu)
81 {
82         return sizeof(struct pmu_allocation_v1);
83 }
84
85 static u32 get_pmu_allocation_size_v0(struct pmu_gk20a *pmu)
86 {
87         return sizeof(struct pmu_allocation_v0);
88 }
89
90 static void set_pmu_allocation_ptr_v1(struct pmu_gk20a *pmu,
91         void **pmu_alloc_ptr, void *assign_ptr)
92 {
93         struct pmu_allocation_v1 **pmu_a_ptr =
94                 (struct pmu_allocation_v1 **)pmu_alloc_ptr;
95         *pmu_a_ptr = (struct pmu_allocation_v1 *)assign_ptr;
96 }
97
98 static void set_pmu_allocation_ptr_v0(struct pmu_gk20a *pmu,
99         void **pmu_alloc_ptr, void *assign_ptr)
100 {
101         struct pmu_allocation_v0 **pmu_a_ptr =
102                 (struct pmu_allocation_v0 **)pmu_alloc_ptr;
103         *pmu_a_ptr = (struct pmu_allocation_v0 *)assign_ptr;
104 }
105
106 static void pmu_allocation_set_dmem_size_v1(struct pmu_gk20a *pmu,
107         void *pmu_alloc_ptr, u16 size)
108 {
109         struct pmu_allocation_v1 *pmu_a_ptr =
110                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
111         pmu_a_ptr->alloc.dmem.size = size;
112 }
113
114 static void pmu_allocation_set_dmem_size_v0(struct pmu_gk20a *pmu,
115         void *pmu_alloc_ptr, u16 size)
116 {
117         struct pmu_allocation_v0 *pmu_a_ptr =
118                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
119         pmu_a_ptr->alloc.dmem.size = size;
120 }
121
122 static u16 pmu_allocation_get_dmem_size_v1(struct pmu_gk20a *pmu,
123         void *pmu_alloc_ptr)
124 {
125         struct pmu_allocation_v1 *pmu_a_ptr =
126                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
127         return pmu_a_ptr->alloc.dmem.size;
128 }
129
130 static u16 pmu_allocation_get_dmem_size_v0(struct pmu_gk20a *pmu,
131         void *pmu_alloc_ptr)
132 {
133         struct pmu_allocation_v0 *pmu_a_ptr =
134                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
135         return pmu_a_ptr->alloc.dmem.size;
136 }
137
138 static u32 pmu_allocation_get_dmem_offset_v1(struct pmu_gk20a *pmu,
139         void *pmu_alloc_ptr)
140 {
141         struct pmu_allocation_v1 *pmu_a_ptr =
142                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
143         return pmu_a_ptr->alloc.dmem.offset;
144 }
145
146 static u32 pmu_allocation_get_dmem_offset_v0(struct pmu_gk20a *pmu,
147         void *pmu_alloc_ptr)
148 {
149         struct pmu_allocation_v0 *pmu_a_ptr =
150                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
151         return pmu_a_ptr->alloc.dmem.offset;
152 }
153
154 static u32 *pmu_allocation_get_dmem_offset_addr_v1(struct pmu_gk20a *pmu,
155         void *pmu_alloc_ptr)
156 {
157         struct pmu_allocation_v1 *pmu_a_ptr =
158                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
159         return &pmu_a_ptr->alloc.dmem.offset;
160 }
161
162 static u32 *pmu_allocation_get_dmem_offset_addr_v0(struct pmu_gk20a *pmu,
163         void *pmu_alloc_ptr)
164 {
165         struct pmu_allocation_v0 *pmu_a_ptr =
166                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
167         return &pmu_a_ptr->alloc.dmem.offset;
168 }
169
170 static void pmu_allocation_set_dmem_offset_v1(struct pmu_gk20a *pmu,
171         void *pmu_alloc_ptr, u32 offset)
172 {
173         struct pmu_allocation_v1 *pmu_a_ptr =
174                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
175         pmu_a_ptr->alloc.dmem.offset = offset;
176 }
177
178 static void pmu_allocation_set_dmem_offset_v0(struct pmu_gk20a *pmu,
179         void *pmu_alloc_ptr, u32 offset)
180 {
181         struct pmu_allocation_v0 *pmu_a_ptr =
182                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
183         pmu_a_ptr->alloc.dmem.offset = offset;
184 }
185
186 static void *get_pmu_msg_pmu_init_msg_ptr_v1(struct pmu_init_msg *init)
187 {
188         return (void *)(&(init->pmu_init_v1));
189 }
190
191 static u16 get_pmu_init_msg_pmu_sw_mg_off_v1(union pmu_init_msg_pmu *init_msg)
192 {
193         struct pmu_init_msg_pmu_v1 *init =
194                 (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
195         return init->sw_managed_area_offset;
196 }
197
198 static u16 get_pmu_init_msg_pmu_sw_mg_size_v1(union pmu_init_msg_pmu *init_msg)
199 {
200         struct pmu_init_msg_pmu_v1 *init =
201                 (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
202         return init->sw_managed_area_size;
203 }
204
205 static void *get_pmu_msg_pmu_init_msg_ptr_v0(struct pmu_init_msg *init)
206 {
207         return (void *)(&(init->pmu_init_v0));
208 }
209
210 static u16 get_pmu_init_msg_pmu_sw_mg_off_v0(union pmu_init_msg_pmu *init_msg)
211 {
212         struct pmu_init_msg_pmu_v0 *init =
213                 (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
214         return init->sw_managed_area_offset;
215 }
216
217 static u16 get_pmu_init_msg_pmu_sw_mg_size_v0(union pmu_init_msg_pmu *init_msg)
218 {
219         struct pmu_init_msg_pmu_v0 *init =
220                 (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
221         return init->sw_managed_area_size;
222 }
223
224 static u32 get_pmu_perfmon_cmd_start_size_v1(void)
225 {
226         return sizeof(struct pmu_perfmon_cmd_start_v1);
227 }
228
229 static u32 get_pmu_perfmon_cmd_start_size_v0(void)
230 {
231         return sizeof(struct pmu_perfmon_cmd_start_v0);
232 }
233
234 static int get_perfmon_cmd_start_offsetofvar_v1(
235         enum pmu_perfmon_cmd_start_fields field)
236 {
237         switch (field) {
238         case COUNTER_ALLOC:
239                 return offsetof(struct pmu_perfmon_cmd_start_v1,
240                 counter_alloc);
241         default:
242                 return -EINVAL;
243                 break;
244         }
245         return 0;
246 }
247
248 static int get_perfmon_cmd_start_offsetofvar_v0(
249         enum pmu_perfmon_cmd_start_fields field)
250 {
251         switch (field) {
252         case COUNTER_ALLOC:
253                 return offsetof(struct pmu_perfmon_cmd_start_v0,
254                 counter_alloc);
255         default:
256                 return -EINVAL;
257                 break;
258         }
259         return 0;
260 }
261
262 static u32 get_pmu_perfmon_cmd_init_size_v1(void)
263 {
264         return sizeof(struct pmu_perfmon_cmd_init_v1);
265 }
266
267 static u32 get_pmu_perfmon_cmd_init_size_v0(void)
268 {
269         return sizeof(struct pmu_perfmon_cmd_init_v0);
270 }
271
272 static int get_perfmon_cmd_init_offsetofvar_v1(
273         enum pmu_perfmon_cmd_start_fields field)
274 {
275         switch (field) {
276         case COUNTER_ALLOC:
277                 return offsetof(struct pmu_perfmon_cmd_init_v1,
278                 counter_alloc);
279         default:
280                 return -EINVAL;
281                 break;
282         }
283         return 0;
284 }
285
286 static int get_perfmon_cmd_init_offsetofvar_v0(
287         enum pmu_perfmon_cmd_start_fields field)
288 {
289         switch (field) {
290         case COUNTER_ALLOC:
291                 return offsetof(struct pmu_perfmon_cmd_init_v0,
292                 counter_alloc);
293         default:
294                 return -EINVAL;
295                 break;
296         }
297         return 0;
298 }
299
300 static void perfmon_start_set_cmd_type_v1(struct pmu_perfmon_cmd *pc, u8 value)
301 {
302         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
303         start->cmd_type = value;
304 }
305
306 static void perfmon_start_set_cmd_type_v0(struct pmu_perfmon_cmd *pc, u8 value)
307 {
308         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
309         start->cmd_type = value;
310 }
311
312 static void perfmon_start_set_group_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
313 {
314         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
315         start->group_id = value;
316 }
317
318 static void perfmon_start_set_group_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
319 {
320         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
321         start->group_id = value;
322 }
323
324 static void perfmon_start_set_state_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
325 {
326         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
327         start->state_id = value;
328 }
329
330 static void perfmon_start_set_state_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
331 {
332         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
333         start->state_id = value;
334 }
335
336 static void perfmon_start_set_flags_v1(struct pmu_perfmon_cmd *pc, u8 value)
337 {
338         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
339         start->flags = value;
340 }
341
342 static void perfmon_start_set_flags_v0(struct pmu_perfmon_cmd *pc, u8 value)
343 {
344         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
345         start->flags = value;
346 }
347
348 static u8 perfmon_start_get_flags_v1(struct pmu_perfmon_cmd *pc)
349 {
350         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
351         return start->flags;
352 }
353
354 static u8 perfmon_start_get_flags_v0(struct pmu_perfmon_cmd *pc)
355 {
356         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
357         return start->flags;
358 }
359
360 static void perfmon_cmd_init_set_sample_buffer_v1(struct pmu_perfmon_cmd *pc,
361         u16 value)
362 {
363         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
364         init->sample_buffer = value;
365 }
366
367 static void perfmon_cmd_init_set_sample_buffer_v0(struct pmu_perfmon_cmd *pc,
368         u16 value)
369 {
370         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
371         init->sample_buffer = value;
372 }
373
374 static void perfmon_cmd_init_set_dec_cnt_v1(struct pmu_perfmon_cmd *pc,
375         u8 value)
376 {
377         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
378         init->to_decrease_count = value;
379 }
380
381 static void perfmon_cmd_init_set_dec_cnt_v0(struct pmu_perfmon_cmd *pc,
382         u8 value)
383 {
384         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
385         init->to_decrease_count = value;
386 }
387
388 static void perfmon_cmd_init_set_base_cnt_id_v1(struct pmu_perfmon_cmd *pc,
389         u8 value)
390 {
391         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
392         init->base_counter_id = value;
393 }
394
395 static void perfmon_cmd_init_set_base_cnt_id_v0(struct pmu_perfmon_cmd *pc,
396         u8 value)
397 {
398         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
399         init->base_counter_id = value;
400 }
401
402 static void perfmon_cmd_init_set_samp_period_us_v1(struct pmu_perfmon_cmd *pc,
403         u32 value)
404 {
405         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
406         init->sample_period_us = value;
407 }
408
409 static void perfmon_cmd_init_set_samp_period_us_v0(struct pmu_perfmon_cmd *pc,
410         u32 value)
411 {
412         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
413         init->sample_period_us = value;
414 }
415
416 static void perfmon_cmd_init_set_num_cnt_v1(struct pmu_perfmon_cmd *pc,
417         u8 value)
418 {
419         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
420         init->num_counters = value;
421 }
422
423 static void perfmon_cmd_init_set_num_cnt_v0(struct pmu_perfmon_cmd *pc,
424         u8 value)
425 {
426         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
427         init->num_counters = value;
428 }
429
430 static void perfmon_cmd_init_set_mov_avg_v1(struct pmu_perfmon_cmd *pc,
431         u8 value)
432 {
433         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
434         init->samples_in_moving_avg = value;
435 }
436
437 static void perfmon_cmd_init_set_mov_avg_v0(struct pmu_perfmon_cmd *pc,
438         u8 value)
439 {
440         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
441         init->samples_in_moving_avg = value;
442 }
443
444 static void get_pmu_init_msg_pmu_queue_params_v0(struct pmu_queue *queue,
445         u32 id, void *pmu_init_msg)
446 {
447         struct pmu_init_msg_pmu_v0 *init =
448                 (struct pmu_init_msg_pmu_v0 *)pmu_init_msg;
449         queue->index    = init->queue_info[id].index;
450         queue->offset   = init->queue_info[id].offset;
451         queue->size = init->queue_info[id].size;
452 }
453
454 static void get_pmu_init_msg_pmu_queue_params_v1(struct pmu_queue *queue,
455         u32 id, void *pmu_init_msg)
456 {
457         struct pmu_init_msg_pmu_v1 *init =
458                 (struct pmu_init_msg_pmu_v1 *)pmu_init_msg;
459         queue->index    = init->queue_info[id].index;
460         queue->offset   = init->queue_info[id].offset;
461         queue->size = init->queue_info[id].size;
462 }
463
464 static void *get_pmu_sequence_in_alloc_ptr_v1(struct pmu_sequence *seq)
465 {
466         return (void *)(&seq->in_v1);
467 }
468
469 static void *get_pmu_sequence_in_alloc_ptr_v0(struct pmu_sequence *seq)
470 {
471         return (void *)(&seq->in_v0);
472 }
473
474 static void *get_pmu_sequence_out_alloc_ptr_v1(struct pmu_sequence *seq)
475 {
476         return (void *)(&seq->out_v1);
477 }
478
479 static void *get_pmu_sequence_out_alloc_ptr_v0(struct pmu_sequence *seq)
480 {
481         return (void *)(&seq->out_v0);
482 }
483
484 static int gk20a_init_pmu(struct pmu_gk20a *pmu)
485 {
486         struct gk20a *g = pmu->g;
487         switch (pmu->desc->app_version) {
488         case APP_VERSION_1:
489                 g->ops.pmu_ver.cmd_id_zbc_table_update = 16;
490                 g->ops.pmu_ver.get_pmu_cmdline_args_size =
491                         pmu_cmdline_size_v1;
492                 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
493                         set_pmu_cmdline_args_cpufreq_v1;
494                 g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
495                         get_pmu_cmdline_args_ptr_v1;
496                 g->ops.pmu_ver.get_pmu_allocation_struct_size =
497                         get_pmu_allocation_size_v1;
498                 g->ops.pmu_ver.set_pmu_allocation_ptr =
499                         set_pmu_allocation_ptr_v1;
500                 g->ops.pmu_ver.pmu_allocation_set_dmem_size =
501                         pmu_allocation_set_dmem_size_v1;
502                 g->ops.pmu_ver.pmu_allocation_get_dmem_size =
503                         pmu_allocation_get_dmem_size_v1;
504                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
505                         pmu_allocation_get_dmem_offset_v1;
506                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
507                         pmu_allocation_get_dmem_offset_addr_v1;
508                 g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
509                         pmu_allocation_set_dmem_offset_v1;
510                 g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
511                         get_pmu_init_msg_pmu_queue_params_v1;
512                 g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
513                         get_pmu_msg_pmu_init_msg_ptr_v1;
514                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
515                         get_pmu_init_msg_pmu_sw_mg_off_v1;
516                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
517                         get_pmu_init_msg_pmu_sw_mg_size_v1;
518                 g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
519                         get_pmu_perfmon_cmd_start_size_v1;
520                 g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
521                         get_perfmon_cmd_start_offsetofvar_v1;
522                 g->ops.pmu_ver.perfmon_start_set_cmd_type =
523                         perfmon_start_set_cmd_type_v1;
524                 g->ops.pmu_ver.perfmon_start_set_group_id =
525                         perfmon_start_set_group_id_v1;
526                 g->ops.pmu_ver.perfmon_start_set_state_id =
527                         perfmon_start_set_state_id_v1;
528                 g->ops.pmu_ver.perfmon_start_set_flags =
529                         perfmon_start_set_flags_v1;
530                 g->ops.pmu_ver.perfmon_start_get_flags =
531                         perfmon_start_get_flags_v1;
532                 g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
533                         get_pmu_perfmon_cmd_init_size_v1;
534                 g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
535                         get_perfmon_cmd_init_offsetofvar_v1;
536                 g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
537                         perfmon_cmd_init_set_sample_buffer_v1;
538                 g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
539                         perfmon_cmd_init_set_dec_cnt_v1;
540                 g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
541                         perfmon_cmd_init_set_base_cnt_id_v1;
542                 g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
543                         perfmon_cmd_init_set_samp_period_us_v1;
544                 g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
545                         perfmon_cmd_init_set_num_cnt_v1;
546                 g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
547                         perfmon_cmd_init_set_mov_avg_v1;
548                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
549                         get_pmu_sequence_in_alloc_ptr_v1;
550                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
551                         get_pmu_sequence_out_alloc_ptr_v1;
552                 break;
553         case APP_VERSION_0:
554                 g->ops.pmu_ver.cmd_id_zbc_table_update = 14;
555                 g->ops.pmu_ver.get_pmu_cmdline_args_size =
556                         pmu_cmdline_size_v0;
557                 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
558                         set_pmu_cmdline_args_cpufreq_v0;
559                 g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
560                         get_pmu_cmdline_args_ptr_v0;
561                 g->ops.pmu_ver.get_pmu_allocation_struct_size =
562                         get_pmu_allocation_size_v0;
563                 g->ops.pmu_ver.set_pmu_allocation_ptr =
564                         set_pmu_allocation_ptr_v0;
565                 g->ops.pmu_ver.pmu_allocation_set_dmem_size =
566                         pmu_allocation_set_dmem_size_v0;
567                 g->ops.pmu_ver.pmu_allocation_get_dmem_size =
568                         pmu_allocation_get_dmem_size_v0;
569                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
570                         pmu_allocation_get_dmem_offset_v0;
571                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
572                         pmu_allocation_get_dmem_offset_addr_v0;
573                 g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
574                         pmu_allocation_set_dmem_offset_v0;
575                 g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
576                         get_pmu_init_msg_pmu_queue_params_v0;
577                 g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
578                         get_pmu_msg_pmu_init_msg_ptr_v0;
579                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
580                         get_pmu_init_msg_pmu_sw_mg_off_v0;
581                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
582                         get_pmu_init_msg_pmu_sw_mg_size_v0;
583                 g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
584                         get_pmu_perfmon_cmd_start_size_v0;
585                 g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
586                         get_perfmon_cmd_start_offsetofvar_v0;
587                 g->ops.pmu_ver.perfmon_start_set_cmd_type =
588                         perfmon_start_set_cmd_type_v0;
589                 g->ops.pmu_ver.perfmon_start_set_group_id =
590                         perfmon_start_set_group_id_v0;
591                 g->ops.pmu_ver.perfmon_start_set_state_id =
592                         perfmon_start_set_state_id_v0;
593                 g->ops.pmu_ver.perfmon_start_set_flags =
594                         perfmon_start_set_flags_v0;
595                 g->ops.pmu_ver.perfmon_start_get_flags =
596                         perfmon_start_get_flags_v0;
597                 g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
598                         get_pmu_perfmon_cmd_init_size_v0;
599                 g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
600                         get_perfmon_cmd_init_offsetofvar_v0;
601                 g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
602                         perfmon_cmd_init_set_sample_buffer_v0;
603                 g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
604                         perfmon_cmd_init_set_dec_cnt_v0;
605                 g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
606                         perfmon_cmd_init_set_base_cnt_id_v0;
607                 g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
608                         perfmon_cmd_init_set_samp_period_us_v0;
609                 g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
610                         perfmon_cmd_init_set_num_cnt_v0;
611                 g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
612                         perfmon_cmd_init_set_mov_avg_v0;
613                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
614                         get_pmu_sequence_in_alloc_ptr_v0;
615                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
616                         get_pmu_sequence_out_alloc_ptr_v0;
617                 break;
618         default:
619                 gk20a_err(dev_from_gk20a(pmu->g),
620                 "PMU code version not supported\n");
621                 return -EINVAL;
622                 break;
623         }
624         return 0;
625 }
626
627 static void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
628                 u32 src, u8 *dst, u32 size, u8 port)
629 {
630         struct gk20a *g = pmu->g;
631         u32 i, words, bytes;
632         u32 data, addr_mask;
633         u32 *dst_u32 = (u32*)dst;
634
635         if (size == 0) {
636                 gk20a_err(dev_from_gk20a(g),
637                         "size is zero");
638                 return;
639         }
640
641         if (src & 0x3) {
642                 gk20a_err(dev_from_gk20a(g),
643                         "src (0x%08x) not 4-byte aligned", src);
644                 return;
645         }
646
647         mutex_lock(&pmu->pmu_copy_lock);
648
649         words = size >> 2;
650         bytes = size & 0x3;
651
652         addr_mask = pwr_falcon_dmemc_offs_m() |
653                     pwr_falcon_dmemc_blk_m();
654
655         src &= addr_mask;
656
657         gk20a_writel(g, pwr_falcon_dmemc_r(port),
658                 src | pwr_falcon_dmemc_aincr_f(1));
659
660         for (i = 0; i < words; i++)
661                 dst_u32[i] = gk20a_readl(g, pwr_falcon_dmemd_r(port));
662
663         if (bytes > 0) {
664                 data = gk20a_readl(g, pwr_falcon_dmemd_r(port));
665                 for (i = 0; i < bytes; i++) {
666                         dst[(words << 2) + i] = ((u8 *)&data)[i];
667                 }
668         }
669         mutex_unlock(&pmu->pmu_copy_lock);
670         return;
671 }
672
673 static void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
674                 u32 dst, u8 *src, u32 size, u8 port)
675 {
676         struct gk20a *g = pmu->g;
677         u32 i, words, bytes;
678         u32 data, addr_mask;
679         u32 *src_u32 = (u32*)src;
680
681         if (size == 0) {
682                 gk20a_err(dev_from_gk20a(g),
683                         "size is zero");
684                 return;
685         }
686
687         if (dst & 0x3) {
688                 gk20a_err(dev_from_gk20a(g),
689                         "dst (0x%08x) not 4-byte aligned", dst);
690                 return;
691         }
692
693         mutex_lock(&pmu->pmu_copy_lock);
694
695         words = size >> 2;
696         bytes = size & 0x3;
697
698         addr_mask = pwr_falcon_dmemc_offs_m() |
699                     pwr_falcon_dmemc_blk_m();
700
701         dst &= addr_mask;
702
703         gk20a_writel(g, pwr_falcon_dmemc_r(port),
704                 dst | pwr_falcon_dmemc_aincw_f(1));
705
706         for (i = 0; i < words; i++)
707                 gk20a_writel(g, pwr_falcon_dmemd_r(port), src_u32[i]);
708
709         if (bytes > 0) {
710                 data = 0;
711                 for (i = 0; i < bytes; i++)
712                         ((u8 *)&data)[i] = src[(words << 2) + i];
713                 gk20a_writel(g, pwr_falcon_dmemd_r(port), data);
714         }
715
716         data = gk20a_readl(g, pwr_falcon_dmemc_r(port)) & addr_mask;
717         size = ALIGN(size, 4);
718         if (data != dst + size) {
719                 gk20a_err(dev_from_gk20a(g),
720                         "copy failed. bytes written %d, expected %d",
721                         data - dst, size);
722         }
723         mutex_unlock(&pmu->pmu_copy_lock);
724         return;
725 }
726
727 static int pmu_idle(struct pmu_gk20a *pmu)
728 {
729         struct gk20a *g = pmu->g;
730         unsigned long end_jiffies = jiffies +
731                 msecs_to_jiffies(2000);
732         u32 idle_stat;
733
734         /* wait for pmu idle */
735         do {
736                 idle_stat = gk20a_readl(g, pwr_falcon_idlestate_r());
737
738                 if (pwr_falcon_idlestate_falcon_busy_v(idle_stat) == 0 &&
739                     pwr_falcon_idlestate_ext_busy_v(idle_stat) == 0) {
740                         break;
741                 }
742
743                 if (time_after_eq(jiffies, end_jiffies)) {
744                         gk20a_err(dev_from_gk20a(g),
745                                 "timeout waiting pmu idle : 0x%08x",
746                                 idle_stat);
747                         return -EBUSY;
748                 }
749                 usleep_range(100, 200);
750         } while (1);
751
752         gk20a_dbg_fn("done");
753         return 0;
754 }
755
756 static void pmu_enable_irq(struct pmu_gk20a *pmu, bool enable)
757 {
758         struct gk20a *g = pmu->g;
759
760         gk20a_dbg_fn("");
761
762         gk20a_writel(g, mc_intr_mask_0_r(),
763                 gk20a_readl(g, mc_intr_mask_0_r()) &
764                 ~mc_intr_mask_0_pmu_enabled_f());
765         gk20a_writel(g, mc_intr_mask_1_r(),
766                 gk20a_readl(g, mc_intr_mask_1_r()) &
767                 ~mc_intr_mask_1_pmu_enabled_f());
768
769         gk20a_writel(g, pwr_falcon_irqmclr_r(),
770                 pwr_falcon_irqmclr_gptmr_f(1)  |
771                 pwr_falcon_irqmclr_wdtmr_f(1)  |
772                 pwr_falcon_irqmclr_mthd_f(1)   |
773                 pwr_falcon_irqmclr_ctxsw_f(1)  |
774                 pwr_falcon_irqmclr_halt_f(1)   |
775                 pwr_falcon_irqmclr_exterr_f(1) |
776                 pwr_falcon_irqmclr_swgen0_f(1) |
777                 pwr_falcon_irqmclr_swgen1_f(1) |
778                 pwr_falcon_irqmclr_ext_f(0xff));
779
780         if (enable) {
781                 /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 */
782                 gk20a_writel(g, pwr_falcon_irqdest_r(),
783                         pwr_falcon_irqdest_host_gptmr_f(0)    |
784                         pwr_falcon_irqdest_host_wdtmr_f(1)    |
785                         pwr_falcon_irqdest_host_mthd_f(0)     |
786                         pwr_falcon_irqdest_host_ctxsw_f(0)    |
787                         pwr_falcon_irqdest_host_halt_f(1)     |
788                         pwr_falcon_irqdest_host_exterr_f(0)   |
789                         pwr_falcon_irqdest_host_swgen0_f(1)   |
790                         pwr_falcon_irqdest_host_swgen1_f(0)   |
791                         pwr_falcon_irqdest_host_ext_f(0xff)   |
792                         pwr_falcon_irqdest_target_gptmr_f(1)  |
793                         pwr_falcon_irqdest_target_wdtmr_f(0)  |
794                         pwr_falcon_irqdest_target_mthd_f(0)   |
795                         pwr_falcon_irqdest_target_ctxsw_f(0)  |
796                         pwr_falcon_irqdest_target_halt_f(0)   |
797                         pwr_falcon_irqdest_target_exterr_f(0) |
798                         pwr_falcon_irqdest_target_swgen0_f(0) |
799                         pwr_falcon_irqdest_target_swgen1_f(0) |
800                         pwr_falcon_irqdest_target_ext_f(0xff));
801
802                 /* 0=disable, 1=enable */
803                 gk20a_writel(g, pwr_falcon_irqmset_r(),
804                         pwr_falcon_irqmset_gptmr_f(1)  |
805                         pwr_falcon_irqmset_wdtmr_f(1)  |
806                         pwr_falcon_irqmset_mthd_f(0)   |
807                         pwr_falcon_irqmset_ctxsw_f(0)  |
808                         pwr_falcon_irqmset_halt_f(1)   |
809                         pwr_falcon_irqmset_exterr_f(1) |
810                         pwr_falcon_irqmset_swgen0_f(1) |
811                         pwr_falcon_irqmset_swgen1_f(1));
812
813                 gk20a_writel(g, mc_intr_mask_0_r(),
814                         gk20a_readl(g, mc_intr_mask_0_r()) |
815                         mc_intr_mask_0_pmu_enabled_f());
816         }
817
818         gk20a_dbg_fn("done");
819 }
820
821 static int pmu_enable_hw(struct pmu_gk20a *pmu, bool enable)
822 {
823         struct gk20a *g = pmu->g;
824
825         gk20a_dbg_fn("");
826
827         if (enable) {
828                 int retries = GR_IDLE_CHECK_MAX / GR_IDLE_CHECK_DEFAULT;
829                 gk20a_enable(g, mc_enable_pwr_enabled_f());
830
831                 do {
832                         u32 w = gk20a_readl(g, pwr_falcon_dmactl_r()) &
833                                 (pwr_falcon_dmactl_dmem_scrubbing_m() |
834                                  pwr_falcon_dmactl_imem_scrubbing_m());
835
836                         if (!w) {
837                                 gk20a_dbg_fn("done");
838                                 return 0;
839                         }
840                         udelay(GR_IDLE_CHECK_DEFAULT);
841                 } while (--retries || !tegra_platform_is_silicon());
842
843                 gk20a_disable(g, mc_enable_pwr_enabled_f());
844                 gk20a_err(dev_from_gk20a(g), "Falcon mem scrubbing timeout");
845
846                 return -ETIMEDOUT;
847         } else {
848                 gk20a_disable(g, mc_enable_pwr_enabled_f());
849                 return 0;
850         }
851 }
852
853 static int pmu_enable(struct pmu_gk20a *pmu, bool enable)
854 {
855         struct gk20a *g = pmu->g;
856         u32 pmc_enable;
857         int err;
858
859         gk20a_dbg_fn("");
860
861         if (!enable) {
862                 pmc_enable = gk20a_readl(g, mc_enable_r());
863                 if (mc_enable_pwr_v(pmc_enable) !=
864                     mc_enable_pwr_disabled_v()) {
865
866                         pmu_enable_irq(pmu, false);
867                         pmu_enable_hw(pmu, false);
868                 }
869         } else {
870                 err = pmu_enable_hw(pmu, true);
871                 if (err)
872                         return err;
873
874                 /* TBD: post reset */
875
876                 err = pmu_idle(pmu);
877                 if (err)
878                         return err;
879
880                 pmu_enable_irq(pmu, true);
881         }
882
883         gk20a_dbg_fn("done");
884         return 0;
885 }
886
887 static int pmu_reset(struct pmu_gk20a *pmu)
888 {
889         int err;
890
891         err = pmu_idle(pmu);
892         if (err)
893                 return err;
894
895         /* TBD: release pmu hw mutex */
896
897         err = pmu_enable(pmu, false);
898         if (err)
899                 return err;
900
901         /* TBD: cancel all sequences */
902         /* TBD: init all sequences and state tables */
903         /* TBD: restore pre-init message handler */
904
905         err = pmu_enable(pmu, true);
906         if (err)
907                 return err;
908
909         return 0;
910 }
911
912 static int pmu_bootstrap(struct pmu_gk20a *pmu)
913 {
914         struct gk20a *g = pmu->g;
915         struct gk20a_platform *platform = platform_get_drvdata(g->dev);
916         struct mm_gk20a *mm = &g->mm;
917         struct pmu_ucode_desc *desc = pmu->desc;
918         u64 addr_code, addr_data, addr_load;
919         u32 i, blocks, addr_args;
920
921         gk20a_dbg_fn("");
922
923         gk20a_writel(g, pwr_falcon_itfen_r(),
924                 gk20a_readl(g, pwr_falcon_itfen_r()) |
925                 pwr_falcon_itfen_ctxen_enable_f());
926         gk20a_writel(g, pwr_pmu_new_instblk_r(),
927                 pwr_pmu_new_instblk_ptr_f(
928                         mm->pmu.inst_block.cpu_pa >> 12) |
929                 pwr_pmu_new_instblk_valid_f(1) |
930                 pwr_pmu_new_instblk_target_sys_coh_f());
931
932         /* TBD: load all other surfaces */
933
934         g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
935                 clk_get_rate(platform->clk[1]));
936
937         addr_args = (pwr_falcon_hwcfg_dmem_size_v(
938                 gk20a_readl(g, pwr_falcon_hwcfg_r()))
939                         << GK20A_PMU_DMEM_BLKSIZE2) -
940                 g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
941
942         pmu_copy_to_dmem(pmu, addr_args,
943                         (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
944                         g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
945
946         gk20a_writel(g, pwr_falcon_dmemc_r(0),
947                 pwr_falcon_dmemc_offs_f(0) |
948                 pwr_falcon_dmemc_blk_f(0)  |
949                 pwr_falcon_dmemc_aincw_f(1));
950
951         addr_code = u64_lo32((pmu->ucode.pmu_va +
952                         desc->app_start_offset +
953                         desc->app_resident_code_offset) >> 8) ;
954         addr_data = u64_lo32((pmu->ucode.pmu_va +
955                         desc->app_start_offset +
956                         desc->app_resident_data_offset) >> 8);
957         addr_load = u64_lo32((pmu->ucode.pmu_va +
958                         desc->bootloader_start_offset) >> 8);
959
960         gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE);
961         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
962         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_size);
963         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_code_size);
964         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_imem_entry);
965         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_data);
966         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size);
967         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
968         gk20a_writel(g, pwr_falcon_dmemd_r(0), 0x1);
969         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_args);
970
971         gk20a_writel(g, pwr_falcon_dmatrfbase_r(),
972                 addr_load - (desc->bootloader_imem_offset >> 8));
973
974         blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8;
975
976         for (i = 0; i < blocks; i++) {
977                 gk20a_writel(g, pwr_falcon_dmatrfmoffs_r(),
978                         desc->bootloader_imem_offset + (i << 8));
979                 gk20a_writel(g, pwr_falcon_dmatrffboffs_r(),
980                         desc->bootloader_imem_offset + (i << 8));
981                 gk20a_writel(g, pwr_falcon_dmatrfcmd_r(),
982                         pwr_falcon_dmatrfcmd_imem_f(1)  |
983                         pwr_falcon_dmatrfcmd_write_f(0) |
984                         pwr_falcon_dmatrfcmd_size_f(6)  |
985                         pwr_falcon_dmatrfcmd_ctxdma_f(GK20A_PMU_DMAIDX_UCODE));
986         }
987
988         gk20a_writel(g, pwr_falcon_bootvec_r(),
989                 pwr_falcon_bootvec_vec_f(desc->bootloader_entry_point));
990
991         gk20a_writel(g, pwr_falcon_cpuctl_r(),
992                 pwr_falcon_cpuctl_startcpu_f(1));
993
994         gk20a_writel(g, pwr_falcon_os_r(), desc->app_version);
995
996         return 0;
997 }
998
999 static void pmu_seq_init(struct pmu_gk20a *pmu)
1000 {
1001         u32 i;
1002
1003         memset(pmu->seq, 0,
1004                 sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES);
1005         memset(pmu->pmu_seq_tbl, 0,
1006                 sizeof(pmu->pmu_seq_tbl));
1007
1008         for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++)
1009                 pmu->seq[i].id = i;
1010 }
1011
1012 static int pmu_seq_acquire(struct pmu_gk20a *pmu,
1013                         struct pmu_sequence **pseq)
1014 {
1015         struct gk20a *g = pmu->g;
1016         struct pmu_sequence *seq;
1017         u32 index;
1018
1019         mutex_lock(&pmu->pmu_seq_lock);
1020         index = find_first_zero_bit(pmu->pmu_seq_tbl,
1021                                 sizeof(pmu->pmu_seq_tbl));
1022         if (index >= sizeof(pmu->pmu_seq_tbl)) {
1023                 gk20a_err(dev_from_gk20a(g),
1024                         "no free sequence available");
1025                 mutex_unlock(&pmu->pmu_seq_lock);
1026                 return -EAGAIN;
1027         }
1028         set_bit(index, pmu->pmu_seq_tbl);
1029         mutex_unlock(&pmu->pmu_seq_lock);
1030
1031         seq = &pmu->seq[index];
1032         seq->state = PMU_SEQ_STATE_PENDING;
1033
1034         *pseq = seq;
1035         return 0;
1036 }
1037
1038 static void pmu_seq_release(struct pmu_gk20a *pmu,
1039                         struct pmu_sequence *seq)
1040 {
1041         struct gk20a *g = pmu->g;
1042         seq->state      = PMU_SEQ_STATE_FREE;
1043         seq->desc       = PMU_INVALID_SEQ_DESC;
1044         seq->callback   = NULL;
1045         seq->cb_params  = NULL;
1046         seq->msg        = NULL;
1047         seq->out_payload = NULL;
1048         g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
1049                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr(seq), 0);
1050         g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
1051                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr(seq), 0);
1052
1053         clear_bit(seq->id, pmu->pmu_seq_tbl);
1054 }
1055
1056 static int pmu_queue_init(struct pmu_gk20a *pmu,
1057                 u32 id, union pmu_init_msg_pmu *init)
1058 {
1059         struct gk20a *g = pmu->g;
1060         struct pmu_queue *queue = &pmu->queue[id];
1061         queue->id       = id;
1062         g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params(queue, id, init);
1063
1064         queue->mutex_id = id;
1065         mutex_init(&queue->mutex);
1066
1067         gk20a_dbg_pmu("queue %d: index %d, offset 0x%08x, size 0x%08x",
1068                 id, queue->index, queue->offset, queue->size);
1069
1070         return 0;
1071 }
1072
1073 static int pmu_queue_head(struct pmu_gk20a *pmu, struct pmu_queue *queue,
1074                         u32 *head, bool set)
1075 {
1076         struct gk20a *g = pmu->g;
1077
1078         BUG_ON(!head);
1079
1080         if (PMU_IS_COMMAND_QUEUE(queue->id)) {
1081
1082                 if (queue->index >= pwr_pmu_queue_head__size_1_v())
1083                         return -EINVAL;
1084
1085                 if (!set)
1086                         *head = pwr_pmu_queue_head_address_v(
1087                                 gk20a_readl(g,
1088                                         pwr_pmu_queue_head_r(queue->index)));
1089                 else
1090                         gk20a_writel(g,
1091                                 pwr_pmu_queue_head_r(queue->index),
1092                                 pwr_pmu_queue_head_address_f(*head));
1093         } else {
1094                 if (!set)
1095                         *head = pwr_pmu_msgq_head_val_v(
1096                                 gk20a_readl(g, pwr_pmu_msgq_head_r()));
1097                 else
1098                         gk20a_writel(g,
1099                                 pwr_pmu_msgq_head_r(),
1100                                 pwr_pmu_msgq_head_val_f(*head));
1101         }
1102
1103         return 0;
1104 }
1105
1106 static int pmu_queue_tail(struct pmu_gk20a *pmu, struct pmu_queue *queue,
1107                         u32 *tail, bool set)
1108 {
1109         struct gk20a *g = pmu->g;
1110
1111         BUG_ON(!tail);
1112
1113         if (PMU_IS_COMMAND_QUEUE(queue->id)) {
1114
1115                 if (queue->index >= pwr_pmu_queue_tail__size_1_v())
1116                         return -EINVAL;
1117
1118                 if (!set)
1119                         *tail = pwr_pmu_queue_tail_address_v(
1120                                 gk20a_readl(g,
1121                                         pwr_pmu_queue_tail_r(queue->index)));
1122                 else
1123                         gk20a_writel(g,
1124                                 pwr_pmu_queue_tail_r(queue->index),
1125                                 pwr_pmu_queue_tail_address_f(*tail));
1126         } else {
1127                 if (!set)
1128                         *tail = pwr_pmu_msgq_tail_val_v(
1129                                 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
1130                 else
1131                         gk20a_writel(g,
1132                                 pwr_pmu_msgq_tail_r(),
1133                                 pwr_pmu_msgq_tail_val_f(*tail));
1134         }
1135
1136         return 0;
1137 }
1138
1139 static inline void pmu_queue_read(struct pmu_gk20a *pmu,
1140                         u32 offset, u8 *dst, u32 size)
1141 {
1142         pmu_copy_from_dmem(pmu, offset, dst, size, 0);
1143 }
1144
1145 static inline void pmu_queue_write(struct pmu_gk20a *pmu,
1146                         u32 offset, u8 *src, u32 size)
1147 {
1148         pmu_copy_to_dmem(pmu, offset, src, size, 0);
1149 }
1150
1151 int pmu_mutex_acquire(struct pmu_gk20a *pmu, u32 id, u32 *token)
1152 {
1153         struct gk20a *g = pmu->g;
1154         struct pmu_mutex *mutex;
1155         u32 data, owner, max_retry;
1156
1157         if (!pmu->initialized)
1158                 return -EINVAL;
1159
1160         BUG_ON(!token);
1161         BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
1162         BUG_ON(id > pmu->mutex_cnt);
1163
1164         mutex = &pmu->mutex[id];
1165
1166         owner = pwr_pmu_mutex_value_v(
1167                 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1168
1169         if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) {
1170                 BUG_ON(mutex->ref_cnt == 0);
1171                 gk20a_dbg_pmu("already acquired by owner : 0x%08x", *token);
1172                 mutex->ref_cnt++;
1173                 return 0;
1174         }
1175
1176         max_retry = 40;
1177         do {
1178                 data = pwr_pmu_mutex_id_value_v(
1179                         gk20a_readl(g, pwr_pmu_mutex_id_r()));
1180                 if (data == pwr_pmu_mutex_id_value_init_v() ||
1181                     data == pwr_pmu_mutex_id_value_not_avail_v()) {
1182                         gk20a_warn(dev_from_gk20a(g),
1183                                 "fail to generate mutex token: val 0x%08x",
1184                                 owner);
1185                         usleep_range(20, 40);
1186                         continue;
1187                 }
1188
1189                 owner = data;
1190                 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
1191                         pwr_pmu_mutex_value_f(owner));
1192
1193                 data = pwr_pmu_mutex_value_v(
1194                         gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1195
1196                 if (owner == data) {
1197                         mutex->ref_cnt = 1;
1198                         gk20a_dbg_pmu("mutex acquired: id=%d, token=0x%x",
1199                                 mutex->index, *token);
1200                         *token = owner;
1201                         return 0;
1202                 } else {
1203                         gk20a_dbg_info("fail to acquire mutex idx=0x%08x",
1204                                 mutex->index);
1205
1206                         data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
1207                         data = set_field(data,
1208                                 pwr_pmu_mutex_id_release_value_m(),
1209                                 pwr_pmu_mutex_id_release_value_f(owner));
1210                         gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
1211
1212                         usleep_range(20, 40);
1213                         continue;
1214                 }
1215         } while (max_retry-- > 0);
1216
1217         return -EBUSY;
1218 }
1219
1220 int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token)
1221 {
1222         struct gk20a *g = pmu->g;
1223         struct pmu_mutex *mutex;
1224         u32 owner, data;
1225
1226         if (!pmu->initialized)
1227                 return -EINVAL;
1228
1229         BUG_ON(!token);
1230         BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
1231         BUG_ON(id > pmu->mutex_cnt);
1232
1233         mutex = &pmu->mutex[id];
1234
1235         owner = pwr_pmu_mutex_value_v(
1236                 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1237
1238         if (*token != owner) {
1239                 gk20a_err(dev_from_gk20a(g),
1240                         "requester 0x%08x NOT match owner 0x%08x",
1241                         *token, owner);
1242                 return -EINVAL;
1243         }
1244
1245         if (--mutex->ref_cnt == 0) {
1246                 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
1247                         pwr_pmu_mutex_value_initial_lock_f());
1248
1249                 data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
1250                 data = set_field(data, pwr_pmu_mutex_id_release_value_m(),
1251                         pwr_pmu_mutex_id_release_value_f(owner));
1252                 gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
1253
1254                 gk20a_dbg_pmu("mutex released: id=%d, token=0x%x",
1255                         mutex->index, *token);
1256         }
1257
1258         return 0;
1259 }
1260
1261 static int pmu_queue_lock(struct pmu_gk20a *pmu,
1262                         struct pmu_queue *queue)
1263 {
1264         int err;
1265
1266         if (PMU_IS_MESSAGE_QUEUE(queue->id))
1267                 return 0;
1268
1269         if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
1270                 mutex_lock(&queue->mutex);
1271                 return 0;
1272         }
1273
1274         err = pmu_mutex_acquire(pmu, queue->mutex_id, &queue->mutex_lock);
1275         return err;
1276 }
1277
1278 static int pmu_queue_unlock(struct pmu_gk20a *pmu,
1279                         struct pmu_queue *queue)
1280 {
1281         int err;
1282
1283         if (PMU_IS_MESSAGE_QUEUE(queue->id))
1284                 return 0;
1285
1286         if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
1287                 mutex_unlock(&queue->mutex);
1288                 return 0;
1289         }
1290
1291         err = pmu_mutex_release(pmu, queue->mutex_id, &queue->mutex_lock);
1292         return err;
1293 }
1294
1295 /* called by pmu_read_message, no lock */
1296 static bool pmu_queue_is_empty(struct pmu_gk20a *pmu,
1297                         struct pmu_queue *queue)
1298 {
1299         u32 head, tail;
1300
1301         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1302         if (queue->opened && queue->oflag == OFLAG_READ)
1303                 tail = queue->position;
1304         else
1305                 pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
1306
1307         return head == tail;
1308 }
1309
1310 static bool pmu_queue_has_room(struct pmu_gk20a *pmu,
1311                         struct pmu_queue *queue, u32 size, bool *need_rewind)
1312 {
1313         u32 head, tail, free;
1314         bool rewind = false;
1315
1316         size = ALIGN(size, QUEUE_ALIGNMENT);
1317
1318         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1319         pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
1320
1321         if (head >= tail) {
1322                 free = queue->offset + queue->size - head;
1323                 free -= PMU_CMD_HDR_SIZE;
1324
1325                 if (size > free) {
1326                         rewind = true;
1327                         head = queue->offset;
1328                 }
1329         }
1330
1331         if (head < tail)
1332                 free = tail - head - 1;
1333
1334         if (need_rewind)
1335                 *need_rewind = rewind;
1336
1337         return size <= free;
1338 }
1339
1340 static int pmu_queue_push(struct pmu_gk20a *pmu,
1341                         struct pmu_queue *queue, void *data, u32 size)
1342 {
1343         gk20a_dbg_fn("");
1344
1345         if (!queue->opened && queue->oflag == OFLAG_WRITE){
1346                 gk20a_err(dev_from_gk20a(pmu->g),
1347                         "queue not opened for write");
1348                 return -EINVAL;
1349         }
1350
1351         pmu_queue_write(pmu, queue->position, data, size);
1352         queue->position += ALIGN(size, QUEUE_ALIGNMENT);
1353         return 0;
1354 }
1355
1356 static int pmu_queue_pop(struct pmu_gk20a *pmu,
1357                         struct pmu_queue *queue, void *data, u32 size,
1358                         u32 *bytes_read)
1359 {
1360         u32 head, tail, used;
1361
1362         *bytes_read = 0;
1363
1364         if (!queue->opened && queue->oflag == OFLAG_READ){
1365                 gk20a_err(dev_from_gk20a(pmu->g),
1366                         "queue not opened for read");
1367                 return -EINVAL;
1368         }
1369
1370         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1371         tail = queue->position;
1372
1373         if (head == tail)
1374                 return 0;
1375
1376         if (head > tail)
1377                 used = head - tail;
1378         else
1379                 used = queue->offset + queue->size - tail;
1380
1381         if (size > used) {
1382                 gk20a_warn(dev_from_gk20a(pmu->g),
1383                         "queue size smaller than request read");
1384                 size = used;
1385         }
1386
1387         pmu_queue_read(pmu, tail, data, size);
1388         queue->position += ALIGN(size, QUEUE_ALIGNMENT);
1389         *bytes_read = size;
1390         return 0;
1391 }
1392
1393 static void pmu_queue_rewind(struct pmu_gk20a *pmu,
1394                         struct pmu_queue *queue)
1395 {
1396         struct pmu_cmd cmd;
1397
1398         gk20a_dbg_fn("");
1399
1400         if (!queue->opened) {
1401                 gk20a_err(dev_from_gk20a(pmu->g),
1402                         "queue not opened");
1403                 return;
1404         }
1405
1406         if (queue->oflag == OFLAG_WRITE) {
1407                 cmd.hdr.unit_id = PMU_UNIT_REWIND;
1408                 cmd.hdr.size = PMU_CMD_HDR_SIZE;
1409                 pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size);
1410                 gk20a_dbg_pmu("queue %d rewinded", queue->id);
1411         }
1412
1413         queue->position = queue->offset;
1414         return;
1415 }
1416
1417 /* open for read and lock the queue */
1418 static int pmu_queue_open_read(struct pmu_gk20a *pmu,
1419                         struct pmu_queue *queue)
1420 {
1421         int err;
1422
1423         err = pmu_queue_lock(pmu, queue);
1424         if (err)
1425                 return err;
1426
1427         if (queue->opened)
1428                 BUG();
1429
1430         pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET);
1431         queue->oflag = OFLAG_READ;
1432         queue->opened = true;
1433
1434         return 0;
1435 }
1436
1437 /* open for write and lock the queue
1438    make sure there's enough free space for the write */
1439 static int pmu_queue_open_write(struct pmu_gk20a *pmu,
1440                         struct pmu_queue *queue, u32 size)
1441 {
1442         bool rewind = false;
1443         int err;
1444
1445         err = pmu_queue_lock(pmu, queue);
1446         if (err)
1447                 return err;
1448
1449         if (queue->opened)
1450                 BUG();
1451
1452         if (!pmu_queue_has_room(pmu, queue, size, &rewind)) {
1453                 gk20a_err(dev_from_gk20a(pmu->g), "queue full");
1454                 return -EAGAIN;
1455         }
1456
1457         pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET);
1458         queue->oflag = OFLAG_WRITE;
1459         queue->opened = true;
1460
1461         if (rewind)
1462                 pmu_queue_rewind(pmu, queue);
1463
1464         return 0;
1465 }
1466
1467 /* close and unlock the queue */
1468 static int pmu_queue_close(struct pmu_gk20a *pmu,
1469                         struct pmu_queue *queue, bool commit)
1470 {
1471         if (!queue->opened)
1472                 return 0;
1473
1474         if (commit) {
1475                 if (queue->oflag == OFLAG_READ) {
1476                         pmu_queue_tail(pmu, queue,
1477                                 &queue->position, QUEUE_SET);
1478                 }
1479                 else {
1480                         pmu_queue_head(pmu, queue,
1481                                 &queue->position, QUEUE_SET);
1482                 }
1483         }
1484
1485         queue->opened = false;
1486
1487         pmu_queue_unlock(pmu, queue);
1488
1489         return 0;
1490 }
1491
1492 void gk20a_remove_pmu_support(struct pmu_gk20a *pmu)
1493 {
1494         gk20a_dbg_fn("");
1495
1496         gk20a_allocator_destroy(&pmu->dmem);
1497 }
1498
1499 int gk20a_init_pmu_reset_enable_hw(struct gk20a *g)
1500 {
1501         struct pmu_gk20a *pmu = &g->pmu;
1502
1503         gk20a_dbg_fn("");
1504
1505         pmu_enable_hw(pmu, true);
1506
1507         return 0;
1508 }
1509
1510 int gk20a_init_pmu_setup_sw(struct gk20a *g)
1511 {
1512         struct pmu_gk20a *pmu = &g->pmu;
1513         struct mm_gk20a *mm = &g->mm;
1514         struct vm_gk20a *vm = &mm->pmu.vm;
1515         struct device *d = dev_from_gk20a(g);
1516         int i, err = 0;
1517         u8 *ptr;
1518         void *ucode_ptr;
1519         struct sg_table *sgt_pmu_ucode;
1520         struct sg_table *sgt_seq_buf;
1521         DEFINE_DMA_ATTRS(attrs);
1522         dma_addr_t iova;
1523
1524         gk20a_dbg_fn("");
1525
1526         if (pmu->sw_ready) {
1527                 for (i = 0; i < pmu->mutex_cnt; i++) {
1528                         pmu->mutex[i].id    = i;
1529                         pmu->mutex[i].index = i;
1530                 }
1531                 pmu_seq_init(pmu);
1532
1533                 gk20a_dbg_fn("skip init");
1534                 goto skip_init;
1535         }
1536
1537         /* no infoRom script from vbios? */
1538
1539         /* TBD: sysmon subtask */
1540
1541         pmu->mutex_cnt = pwr_pmu_mutex__size_1_v();
1542         pmu->mutex = kzalloc(pmu->mutex_cnt *
1543                 sizeof(struct pmu_mutex), GFP_KERNEL);
1544         if (!pmu->mutex) {
1545                 err = -ENOMEM;
1546                 goto err;
1547         }
1548
1549         for (i = 0; i < pmu->mutex_cnt; i++) {
1550                 pmu->mutex[i].id    = i;
1551                 pmu->mutex[i].index = i;
1552         }
1553
1554         pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES *
1555                 sizeof(struct pmu_sequence), GFP_KERNEL);
1556         if (!pmu->seq) {
1557                 err = -ENOMEM;
1558                 goto err_free_mutex;
1559         }
1560
1561         pmu_seq_init(pmu);
1562
1563         if (!g->pmu_fw) {
1564                 g->pmu_fw = gk20a_request_firmware(g, GK20A_PMU_UCODE_IMAGE);
1565                 if (!g->pmu_fw) {
1566                         gk20a_err(d, "failed to load pmu ucode!!");
1567                         err = -ENOENT;
1568                         goto err_free_seq;
1569                 }
1570         }
1571
1572         gk20a_dbg_fn("firmware loaded");
1573
1574         pmu->desc = (struct pmu_ucode_desc *)g->pmu_fw->data;
1575         pmu->ucode_image = (u32 *)((u8 *)pmu->desc +
1576                         pmu->desc->descriptor_size);
1577
1578         INIT_WORK(&pmu->pg_init, pmu_setup_hw);
1579
1580         gk20a_init_pmu_vm(mm);
1581
1582         dma_set_attr(DMA_ATTR_READ_ONLY, &attrs);
1583         pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
1584                                         &iova,
1585                                         GFP_KERNEL,
1586                                         &attrs);
1587         if (!pmu->ucode.cpuva) {
1588                 gk20a_err(d, "failed to allocate memory\n");
1589                 err = -ENOMEM;
1590                 goto err_release_fw;
1591         }
1592
1593         pmu->ucode.iova = iova;
1594         pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
1595                                         &iova,
1596                                         GFP_KERNEL);
1597         if (!pmu->seq_buf.cpuva) {
1598                 gk20a_err(d, "failed to allocate memory\n");
1599                 err = -ENOMEM;
1600                 goto err_free_pmu_ucode;
1601         }
1602
1603         pmu->seq_buf.iova = iova;
1604
1605         err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
1606                                 pmu->ucode.cpuva,
1607                                 pmu->ucode.iova,
1608                                 GK20A_PMU_UCODE_SIZE_MAX);
1609         if (err) {
1610                 gk20a_err(d, "failed to allocate sg table\n");
1611                 goto err_free_seq_buf;
1612         }
1613
1614         pmu->ucode.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode,
1615                                         GK20A_PMU_UCODE_SIZE_MAX,
1616                                         0, /* flags */
1617                                         gk20a_mem_flag_read_only);
1618         if (!pmu->ucode.pmu_va) {
1619                 gk20a_err(d, "failed to map pmu ucode memory!!");
1620                 goto err_free_ucode_sgt;
1621         }
1622
1623         err = gk20a_get_sgtable(d, &sgt_seq_buf,
1624                                 pmu->seq_buf.cpuva,
1625                                 pmu->seq_buf.iova,
1626                                 GK20A_PMU_SEQ_BUF_SIZE);
1627         if (err) {
1628                 gk20a_err(d, "failed to allocate sg table\n");
1629                 goto err_unmap_ucode;
1630         }
1631
1632         pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf,
1633                                         GK20A_PMU_SEQ_BUF_SIZE,
1634                                         0, /* flags */
1635                                         gk20a_mem_flag_none);
1636         if (!pmu->seq_buf.pmu_va) {
1637                 gk20a_err(d, "failed to map pmu ucode memory!!");
1638                 goto err_free_seq_buf_sgt;
1639         }
1640
1641         ptr = (u8 *)pmu->seq_buf.cpuva;
1642         if (!ptr) {
1643                 gk20a_err(d, "failed to map cpu ptr for zbc buffer");
1644                 goto err_unmap_seq_buf;
1645         }
1646
1647         /* TBD: remove this if ZBC save/restore is handled by PMU
1648          * end an empty ZBC sequence for now */
1649         ptr[0] = 0x16; /* opcode EXIT */
1650         ptr[1] = 0; ptr[2] = 1; ptr[3] = 0;
1651         ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0;
1652
1653         pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;
1654
1655         ucode_ptr = pmu->ucode.cpuva;
1656
1657         for (i = 0; i < (pmu->desc->app_start_offset +
1658                         pmu->desc->app_size) >> 2; i++)
1659                 gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]);
1660
1661         gk20a_free_sgtable(&sgt_pmu_ucode);
1662         gk20a_free_sgtable(&sgt_seq_buf);
1663
1664         pmu->sw_ready = true;
1665
1666 skip_init:
1667         mutex_init(&pmu->elpg_mutex);
1668         mutex_init(&pmu->isr_mutex);
1669         mutex_init(&pmu->isr_enable_lock);
1670         mutex_init(&pmu->pmu_copy_lock);
1671         mutex_init(&pmu->pmu_seq_lock);
1672
1673         pmu->perfmon_counter.index = 3; /* GR & CE2 */
1674         pmu->perfmon_counter.group_id = PMU_DOMAIN_GROUP_PSTATE;
1675
1676         pmu->remove_support = gk20a_remove_pmu_support;
1677         err = gk20a_init_pmu(pmu);
1678         if (err) {
1679                 gk20a_err(d, "failed to set function pointers\n");
1680                 return err;
1681         }
1682
1683         gk20a_dbg_fn("done");
1684         return 0;
1685
1686  err_unmap_seq_buf:
1687         gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va,
1688                 GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none);
1689  err_free_seq_buf_sgt:
1690         gk20a_free_sgtable(&sgt_seq_buf);
1691  err_unmap_ucode:
1692         gk20a_gmmu_unmap(vm, pmu->ucode.pmu_va,
1693                 GK20A_PMU_UCODE_SIZE_MAX, gk20a_mem_flag_none);
1694  err_free_ucode_sgt:
1695         gk20a_free_sgtable(&sgt_pmu_ucode);
1696  err_free_seq_buf:
1697         dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
1698                 pmu->seq_buf.cpuva, pmu->seq_buf.iova);
1699         pmu->seq_buf.cpuva = NULL;
1700         pmu->seq_buf.iova = 0;
1701  err_free_pmu_ucode:
1702         dma_free_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
1703                 pmu->ucode.cpuva, pmu->ucode.iova, &attrs);
1704         pmu->ucode.cpuva = NULL;
1705         pmu->ucode.iova = 0;
1706  err_release_fw:
1707         release_firmware(g->pmu_fw);
1708  err_free_seq:
1709         kfree(pmu->seq);
1710  err_free_mutex:
1711         kfree(pmu->mutex);
1712  err:
1713         gk20a_dbg_fn("fail");
1714         return err;
1715 }
1716
1717 static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
1718                         void *param, u32 handle, u32 status);
1719
1720 static void pmu_handle_pg_buf_config_msg(struct gk20a *g, struct pmu_msg *msg,
1721                         void *param, u32 handle, u32 status)
1722 {
1723         struct pmu_gk20a *pmu = param;
1724         struct pmu_pg_msg_eng_buf_stat *eng_buf_stat = &msg->msg.pg.eng_buf_stat;
1725
1726         gk20a_dbg_fn("");
1727
1728         gk20a_dbg_pmu("reply PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS");
1729         if (status != 0) {
1730                 gk20a_err(dev_from_gk20a(g), "PGENG cmd aborted");
1731                 /* TBD: disable ELPG */
1732                 return;
1733         }
1734
1735         if (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_FAILED) {
1736                 gk20a_err(dev_from_gk20a(g), "failed to load PGENG buffer");
1737         }
1738
1739         pmu->buf_loaded = (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_LOADED);
1740         schedule_work(&pmu->pg_init);
1741 }
1742
1743 int gk20a_init_pmu_setup_hw1(struct gk20a *g)
1744 {
1745         struct pmu_gk20a *pmu = &g->pmu;
1746         int err;
1747
1748         gk20a_dbg_fn("");
1749
1750         mutex_lock(&pmu->isr_enable_lock);
1751         pmu_reset(pmu);
1752         pmu->isr_enabled = true;
1753         mutex_unlock(&pmu->isr_enable_lock);
1754
1755         /* setup apertures - virtual */
1756         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
1757                 pwr_fbif_transcfg_mem_type_virtual_f());
1758         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
1759                 pwr_fbif_transcfg_mem_type_virtual_f());
1760         /* setup apertures - physical */
1761         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
1762                 pwr_fbif_transcfg_mem_type_physical_f() |
1763                 pwr_fbif_transcfg_target_local_fb_f());
1764         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
1765                 pwr_fbif_transcfg_mem_type_physical_f() |
1766                 pwr_fbif_transcfg_target_coherent_sysmem_f());
1767         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
1768                 pwr_fbif_transcfg_mem_type_physical_f() |
1769                 pwr_fbif_transcfg_target_noncoherent_sysmem_f());
1770
1771         /* TBD: load pmu ucode */
1772         err = pmu_bootstrap(pmu);
1773         if (err)
1774                 return err;
1775
1776         return 0;
1777
1778 }
1779
1780 static int gk20a_aelpg_init(struct gk20a *g);
1781 static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id);
1782
1783 static void pmu_setup_hw_load_zbc(struct gk20a *g);
1784 static void pmu_setup_hw_enable_elpg(struct gk20a *g);
1785
1786 static void pmu_setup_hw(struct work_struct *work)
1787 {
1788         struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init);
1789         struct gk20a *g = pmu->g;
1790
1791         switch (pmu->pmu_state) {
1792         case PMU_STATE_ELPG_BOOTED:
1793                 gk20a_dbg_pmu("elpg booted");
1794                 gk20a_init_pmu_bind_fecs(g);
1795                 break;
1796         case PMU_STATE_LOADING_PG_BUF:
1797                 gk20a_dbg_pmu("loaded pg buf");
1798                 pmu_setup_hw_load_zbc(g);
1799                 break;
1800         case PMU_STATE_LOADING_ZBC:
1801                 gk20a_dbg_pmu("loaded zbc");
1802                 pmu_setup_hw_enable_elpg(g);
1803                 break;
1804         case PMU_STATE_STARTED:
1805                 gk20a_dbg_pmu("PMU booted");
1806                 break;
1807         default:
1808                 gk20a_dbg_pmu("invalid state");
1809                 break;
1810         }
1811 }
1812
1813 int gk20a_init_pmu_bind_fecs(struct gk20a *g)
1814 {
1815         struct pmu_gk20a *pmu = &g->pmu;
1816         struct mm_gk20a *mm = &g->mm;
1817         struct vm_gk20a *vm = &mm->pmu.vm;
1818         struct device *d = dev_from_gk20a(g);
1819         struct pmu_cmd cmd;
1820         u32 desc;
1821         int err;
1822         u32 size;
1823         struct sg_table *sgt_pg_buf;
1824         dma_addr_t iova;
1825
1826         gk20a_dbg_fn("");
1827
1828         size = 0;
1829         gk20a_gr_wait_initialized(g);
1830         err = gr_gk20a_fecs_get_reglist_img_size(g, &size);
1831         if (err) {
1832                 gk20a_err(dev_from_gk20a(g),
1833                         "fail to query fecs pg buffer size");
1834                 return err;
1835         }
1836
1837         if (!pmu->pg_buf.cpuva) {
1838                 pmu->pg_buf.cpuva = dma_alloc_coherent(d, size,
1839                                                 &iova,
1840                                                 GFP_KERNEL);
1841                 if (!pmu->pg_buf.cpuva) {
1842                         gk20a_err(d, "failed to allocate memory\n");
1843                         return -ENOMEM;
1844                 }
1845
1846                 pmu->pg_buf.iova = iova;
1847                 pmu->pg_buf.size = size;
1848
1849                 err = gk20a_get_sgtable(d, &sgt_pg_buf,
1850                                         pmu->pg_buf.cpuva,
1851                                         pmu->pg_buf.iova,
1852                                         size);
1853                 if (err) {
1854                         gk20a_err(d, "failed to create sg table\n");
1855                         goto err_free_pg_buf;
1856                 }
1857
1858                 pmu->pg_buf.pmu_va = gk20a_gmmu_map(vm,
1859                                         &sgt_pg_buf,
1860                                         size,
1861                                         0, /* flags */
1862                                         gk20a_mem_flag_none);
1863                 if (!pmu->pg_buf.pmu_va) {
1864                         gk20a_err(d, "failed to map fecs pg buffer");
1865                         err = -ENOMEM;
1866                         goto err_free_sgtable;
1867                 }
1868
1869                 gk20a_free_sgtable(&sgt_pg_buf);
1870         }
1871
1872         err = gr_gk20a_fecs_set_reglist_bind_inst(g, mm->pmu.inst_block.cpu_pa);
1873         if (err) {
1874                 gk20a_err(dev_from_gk20a(g),
1875                         "fail to bind pmu inst to gr");
1876                 return err;
1877         }
1878
1879         err = gr_gk20a_fecs_set_reglist_virtual_addr(g, pmu->pg_buf.pmu_va);
1880         if (err) {
1881                 gk20a_err(dev_from_gk20a(g),
1882                         "fail to set pg buffer pmu va");
1883                 return err;
1884         }
1885
1886         memset(&cmd, 0, sizeof(struct pmu_cmd));
1887         cmd.hdr.unit_id = PMU_UNIT_PG;
1888         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
1889         cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
1890         cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
1891         cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_FECS;
1892         cmd.cmd.pg.eng_buf_load.buf_size = pmu->pg_buf.size;
1893         cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.pmu_va >> 8);
1894         cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.pmu_va & 0xFF);
1895         cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
1896
1897         pmu->buf_loaded = false;
1898         gk20a_dbg_pmu("cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS");
1899         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
1900                         pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
1901         pmu->pmu_state = PMU_STATE_LOADING_PG_BUF;
1902         return err;
1903
1904 err_free_sgtable:
1905         gk20a_free_sgtable(&sgt_pg_buf);
1906 err_free_pg_buf:
1907         dma_free_coherent(d, size,
1908                 pmu->pg_buf.cpuva, pmu->pg_buf.iova);
1909         pmu->pg_buf.cpuva = NULL;
1910         pmu->pg_buf.iova = 0;
1911         return err;
1912 }
1913
1914 static void pmu_setup_hw_load_zbc(struct gk20a *g)
1915 {
1916         struct pmu_gk20a *pmu = &g->pmu;
1917         struct pmu_cmd cmd;
1918         u32 desc;
1919
1920         memset(&cmd, 0, sizeof(struct pmu_cmd));
1921         cmd.hdr.unit_id = PMU_UNIT_PG;
1922         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
1923         cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
1924         cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
1925         cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_ZBC;
1926         cmd.cmd.pg.eng_buf_load.buf_size = pmu->seq_buf.size;
1927         cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.pmu_va >> 8);
1928         cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.pmu_va & 0xFF);
1929         cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
1930
1931         pmu->buf_loaded = false;
1932         gk20a_dbg_pmu("cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_ZBC");
1933         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
1934                         pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
1935         pmu->pmu_state = PMU_STATE_LOADING_ZBC;
1936 }
1937
1938 static void pmu_setup_hw_enable_elpg(struct gk20a *g)
1939 {
1940         struct pmu_gk20a *pmu = &g->pmu;
1941
1942         /*
1943          * FIXME: To enable ELPG, we increase the PMU ext2priv timeout unit to
1944          * 7. This prevents PMU stalling on Host register accesses. Once the
1945          * cause for this hang is discovered and fixed, this WAR should be
1946          * removed.
1947          */
1948         gk20a_writel(g, 0x10a164, 0x109ff);
1949
1950         pmu->initialized = true;
1951         pmu->pmu_state = PMU_STATE_STARTED;
1952
1953         pmu->zbc_ready = true;
1954         /* Save zbc table after PMU is initialized. */
1955         gr_gk20a_pmu_save_zbc(g, 0xf);
1956
1957         if (g->elpg_enabled)
1958                 gk20a_pmu_enable_elpg(g);
1959
1960         udelay(50);
1961
1962         /* Enable AELPG */
1963         if (g->aelpg_enabled) {
1964                 gk20a_aelpg_init(g);
1965                 gk20a_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS);
1966         }
1967 }
1968
1969 int gk20a_init_pmu_support(struct gk20a *g)
1970 {
1971         struct pmu_gk20a *pmu = &g->pmu;
1972         u32 err;
1973
1974         gk20a_dbg_fn("");
1975
1976         if (pmu->initialized)
1977                 return 0;
1978
1979         pmu->g = g;
1980
1981         err = gk20a_init_pmu_reset_enable_hw(g);
1982         if (err)
1983                 return err;
1984
1985         if (support_gk20a_pmu()) {
1986                 err = gk20a_init_pmu_setup_sw(g);
1987                 if (err)
1988                         return err;
1989
1990                 err = gk20a_init_pmu_setup_hw1(g);
1991                 if (err)
1992                         return err;
1993         }
1994
1995         return err;
1996 }
1997
1998 static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
1999                         void *param, u32 handle, u32 status)
2000 {
2001         struct pmu_gk20a *pmu = param;
2002         struct pmu_pg_msg_elpg_msg *elpg_msg = &msg->msg.pg.elpg_msg;
2003
2004         gk20a_dbg_fn("");
2005
2006         if (status != 0) {
2007                 gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
2008                 /* TBD: disable ELPG */
2009                 return;
2010         }
2011
2012         switch (elpg_msg->msg) {
2013         case PMU_PG_ELPG_MSG_INIT_ACK:
2014                 gk20a_dbg_pmu("INIT_PG is acknowledged from PMU");
2015                 break;
2016         case PMU_PG_ELPG_MSG_ALLOW_ACK:
2017                 gk20a_dbg_pmu("ALLOW is acknowledged from PMU");
2018                 pmu->elpg_stat = PMU_ELPG_STAT_ON;
2019                 break;
2020         case PMU_PG_ELPG_MSG_DISALLOW_ACK:
2021                 gk20a_dbg_pmu("DISALLOW is acknowledged from PMU");
2022                 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
2023                 if (pmu->pmu_state == PMU_STATE_STARTING)
2024                         pmu->pmu_state = PMU_STATE_ELPG_BOOTED;
2025                 schedule_work(&pmu->pg_init);
2026                 break;
2027         default:
2028                 gk20a_err(dev_from_gk20a(g),
2029                         "unsupported ELPG message : 0x%04x", elpg_msg->msg);
2030         }
2031
2032         return;
2033 }
2034
2035 static void pmu_handle_pg_stat_msg(struct gk20a *g, struct pmu_msg *msg,
2036                         void *param, u32 handle, u32 status)
2037 {
2038         struct pmu_gk20a *pmu = param;
2039
2040         gk20a_dbg_fn("");
2041
2042         if (status != 0) {
2043                 gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
2044                 /* TBD: disable ELPG */
2045                 return;
2046         }
2047
2048         switch (msg->msg.pg.stat.sub_msg_id) {
2049         case PMU_PG_STAT_MSG_RESP_DMEM_OFFSET:
2050                 gk20a_dbg_pmu("ALLOC_DMEM_OFFSET is acknowledged from PMU");
2051                 pmu->stat_dmem_offset = msg->msg.pg.stat.data;
2052                 break;
2053         default:
2054                 break;
2055         }
2056 }
2057
2058 static int pmu_init_powergating(struct pmu_gk20a *pmu)
2059 {
2060         struct gk20a *g = pmu->g;
2061         struct pmu_cmd cmd;
2062         u32 seq;
2063
2064         gk20a_dbg_fn("");
2065
2066         if (tegra_cpu_is_asim()) {
2067                 /* TBD: calculate threshold for silicon */
2068                 gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
2069                                 PMU_PG_IDLE_THRESHOLD_SIM);
2070                 gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
2071                                 PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM);
2072         } else {
2073                 /* TBD: calculate threshold for silicon */
2074                 gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
2075                                 PMU_PG_IDLE_THRESHOLD);
2076                 gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
2077                                 PMU_PG_POST_POWERUP_IDLE_THRESHOLD);
2078         }
2079
2080         /* init ELPG */
2081         memset(&cmd, 0, sizeof(struct pmu_cmd));
2082         cmd.hdr.unit_id = PMU_UNIT_PG;
2083         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
2084         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
2085         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
2086         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_INIT;
2087
2088         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_INIT");
2089         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2090                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
2091
2092         /* alloc dmem for powergating state log */
2093         pmu->stat_dmem_offset = 0;
2094         memset(&cmd, 0, sizeof(struct pmu_cmd));
2095         cmd.hdr.unit_id = PMU_UNIT_PG;
2096         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_stat);
2097         cmd.cmd.pg.stat.cmd_type = PMU_PG_CMD_ID_PG_STAT;
2098         cmd.cmd.pg.stat.engine_id = ENGINE_GR_GK20A;
2099         cmd.cmd.pg.stat.sub_cmd_id = PMU_PG_STAT_CMD_ALLOC_DMEM;
2100         cmd.cmd.pg.stat.data = 0;
2101
2102         gk20a_dbg_pmu("cmd post PMU_PG_STAT_CMD_ALLOC_DMEM");
2103         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
2104                         pmu_handle_pg_stat_msg, pmu, &seq, ~0);
2105
2106         /* disallow ELPG initially
2107            PMU ucode requires a disallow cmd before allow cmd */
2108         pmu->elpg_stat = PMU_ELPG_STAT_OFF; /* set for wait_event PMU_ELPG_STAT_OFF */
2109         memset(&cmd, 0, sizeof(struct pmu_cmd));
2110         cmd.hdr.unit_id = PMU_UNIT_PG;
2111         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
2112         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
2113         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
2114         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
2115
2116         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_DISALLOW");
2117         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2118                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
2119
2120         /* start with elpg disabled until first enable call */
2121         pmu->elpg_refcnt = 0;
2122
2123         pmu->pmu_state = PMU_STATE_STARTING;
2124
2125         return 0;
2126 }
2127
2128 static int pmu_init_perfmon(struct pmu_gk20a *pmu)
2129 {
2130         struct gk20a *g = pmu->g;
2131         struct pmu_v *pv = &g->ops.pmu_ver;
2132         struct pmu_cmd cmd;
2133         struct pmu_payload payload;
2134         u32 seq;
2135         u32 data;
2136         int err = 0;
2137
2138         gk20a_dbg_fn("");
2139
2140         pmu->perfmon_ready = 0;
2141
2142         /* use counter #3 for GR && CE2 busy cycles */
2143         gk20a_writel(g, pwr_pmu_idle_mask_r(3),
2144                 pwr_pmu_idle_mask_gr_enabled_f() |
2145                 pwr_pmu_idle_mask_ce_2_enabled_f());
2146
2147         /* disable idle filtering for counters 3 and 6 */
2148         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3));
2149         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2150                         pwr_pmu_idle_ctrl_filter_m(),
2151                         pwr_pmu_idle_ctrl_value_busy_f() |
2152                         pwr_pmu_idle_ctrl_filter_disabled_f());
2153         gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data);
2154
2155         /* use counter #6 for total cycles */
2156         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6));
2157         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2158                         pwr_pmu_idle_ctrl_filter_m(),
2159                         pwr_pmu_idle_ctrl_value_always_f() |
2160                         pwr_pmu_idle_ctrl_filter_disabled_f());
2161         gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data);
2162
2163         /*
2164          * We don't want to disturb counters #3 and #6, which are used by
2165          * perfmon, so we add wiring also to counters #1 and #2 for
2166          * exposing raw counter readings.
2167          */
2168         gk20a_writel(g, pwr_pmu_idle_mask_r(1),
2169                 pwr_pmu_idle_mask_gr_enabled_f() |
2170                 pwr_pmu_idle_mask_ce_2_enabled_f());
2171
2172         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1));
2173         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2174                         pwr_pmu_idle_ctrl_filter_m(),
2175                         pwr_pmu_idle_ctrl_value_busy_f() |
2176                         pwr_pmu_idle_ctrl_filter_disabled_f());
2177         gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data);
2178
2179         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2));
2180         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2181                         pwr_pmu_idle_ctrl_filter_m(),
2182                         pwr_pmu_idle_ctrl_value_always_f() |
2183                         pwr_pmu_idle_ctrl_filter_disabled_f());
2184         gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
2185
2186         if (!pmu->sample_buffer)
2187                 err = pmu->dmem.alloc(&pmu->dmem,
2188                                       &pmu->sample_buffer, 2 * sizeof(u16));
2189         if (err) {
2190                 gk20a_err(dev_from_gk20a(g),
2191                         "failed to allocate perfmon sample buffer");
2192                 return -ENOMEM;
2193         }
2194
2195         /* init PERFMON */
2196         memset(&cmd, 0, sizeof(struct pmu_cmd));
2197         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2198         cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size();
2199         cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT;
2200         /* buffer to save counter values for pmu perfmon */
2201         pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon,
2202         (u16)pmu->sample_buffer);
2203         /* number of sample periods below lower threshold
2204            before pmu triggers perfmon decrease event
2205            TBD: = 15 */
2206         pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15);
2207         /* index of base counter, aka. always ticking counter */
2208         pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6);
2209         /* microseconds interval between pmu polls perf counters */
2210         pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700);
2211         /* number of perfmon counters
2212            counter #3 (GR and CE2) for gk20a */
2213         pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1);
2214         /* moving average window for sample periods
2215            TBD: = 3000000 / sample_period_us = 17 */
2216         pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17);
2217
2218         memset(&payload, 0, sizeof(struct pmu_payload));
2219         payload.in.buf = &pmu->perfmon_counter;
2220         payload.in.size = sizeof(struct pmu_perfmon_counter);
2221         payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC);
2222
2223         gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_INIT");
2224         gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
2225                         NULL, NULL, &seq, ~0);
2226
2227         return 0;
2228 }
2229
2230 static int pmu_process_init_msg(struct pmu_gk20a *pmu,
2231                         struct pmu_msg *msg)
2232 {
2233         struct gk20a *g = pmu->g;
2234         struct pmu_v *pv = &g->ops.pmu_ver;
2235         union pmu_init_msg_pmu *init;
2236         struct pmu_sha1_gid_data gid_data;
2237         u32 i, tail = 0;
2238
2239         tail = pwr_pmu_msgq_tail_val_v(
2240                 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
2241
2242         pmu_copy_from_dmem(pmu, tail,
2243                 (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0);
2244
2245         if (msg->hdr.unit_id != PMU_UNIT_INIT) {
2246                 gk20a_err(dev_from_gk20a(g),
2247                         "expecting init msg");
2248                 return -EINVAL;
2249         }
2250
2251         pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE,
2252                 (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0);
2253
2254         if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) {
2255                 gk20a_err(dev_from_gk20a(g),
2256                         "expecting init msg");
2257                 return -EINVAL;
2258         }
2259
2260         tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT);
2261         gk20a_writel(g, pwr_pmu_msgq_tail_r(),
2262                 pwr_pmu_msgq_tail_val_f(tail));
2263
2264         init = pv->get_pmu_msg_pmu_init_msg_ptr(&(msg->msg.init));
2265         if (!pmu->gid_info.valid) {
2266
2267                 pmu_copy_from_dmem(pmu,
2268                         pv->get_pmu_init_msg_pmu_sw_mg_off(init),
2269                         (u8 *)&gid_data,
2270                         sizeof(struct pmu_sha1_gid_data), 0);
2271
2272                 pmu->gid_info.valid =
2273                         (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE);
2274
2275                 if (pmu->gid_info.valid) {
2276
2277                         BUG_ON(sizeof(pmu->gid_info.gid) !=
2278                                 sizeof(gid_data.gid));
2279
2280                         memcpy(pmu->gid_info.gid, gid_data.gid,
2281                                 sizeof(pmu->gid_info.gid));
2282                 }
2283         }
2284
2285         for (i = 0; i < PMU_QUEUE_COUNT; i++)
2286                 pmu_queue_init(pmu, i, init);
2287
2288         if (!pmu->dmem.alloc)
2289                 gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem",
2290                                 pv->get_pmu_init_msg_pmu_sw_mg_off(init),
2291                                 pv->get_pmu_init_msg_pmu_sw_mg_size(init),
2292                                 PMU_DMEM_ALLOC_ALIGNMENT);
2293
2294         pmu->pmu_ready = true;
2295
2296         return 0;
2297 }
2298
2299 static bool pmu_read_message(struct pmu_gk20a *pmu, struct pmu_queue *queue,
2300                         struct pmu_msg *msg, int *status)
2301 {
2302         struct gk20a *g = pmu->g;
2303         u32 read_size, bytes_read;
2304         int err;
2305
2306         *status = 0;
2307
2308         if (pmu_queue_is_empty(pmu, queue))
2309                 return false;
2310
2311         err = pmu_queue_open_read(pmu, queue);
2312         if (err) {
2313                 gk20a_err(dev_from_gk20a(g),
2314                         "fail to open queue %d for read", queue->id);
2315                 *status = err;
2316                 return false;
2317         }
2318
2319         err = pmu_queue_pop(pmu, queue, &msg->hdr,
2320                         PMU_MSG_HDR_SIZE, &bytes_read);
2321         if (err || bytes_read != PMU_MSG_HDR_SIZE) {
2322                 gk20a_err(dev_from_gk20a(g),
2323                         "fail to read msg from queue %d", queue->id);
2324                 *status = err | -EINVAL;
2325                 goto clean_up;
2326         }
2327
2328         if (msg->hdr.unit_id == PMU_UNIT_REWIND) {
2329                 pmu_queue_rewind(pmu, queue);
2330                 /* read again after rewind */
2331                 err = pmu_queue_pop(pmu, queue, &msg->hdr,
2332                                 PMU_MSG_HDR_SIZE, &bytes_read);
2333                 if (err || bytes_read != PMU_MSG_HDR_SIZE) {
2334                         gk20a_err(dev_from_gk20a(g),
2335                                 "fail to read msg from queue %d", queue->id);
2336                         *status = err | -EINVAL;
2337                         goto clean_up;
2338                 }
2339         }
2340
2341         if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) {
2342                 gk20a_err(dev_from_gk20a(g),
2343                         "read invalid unit_id %d from queue %d",
2344                         msg->hdr.unit_id, queue->id);
2345                         *status = -EINVAL;
2346                         goto clean_up;
2347         }
2348
2349         if (msg->hdr.size > PMU_MSG_HDR_SIZE) {
2350                 read_size = msg->hdr.size - PMU_MSG_HDR_SIZE;
2351                 err = pmu_queue_pop(pmu, queue, &msg->msg,
2352                         read_size, &bytes_read);
2353                 if (err || bytes_read != read_size) {
2354                         gk20a_err(dev_from_gk20a(g),
2355                                 "fail to read msg from queue %d", queue->id);
2356                         *status = err;
2357                         goto clean_up;
2358                 }
2359         }
2360
2361         err = pmu_queue_close(pmu, queue, true);
2362         if (err) {
2363                 gk20a_err(dev_from_gk20a(g),
2364                         "fail to close queue %d", queue->id);
2365                 *status = err;
2366                 return false;
2367         }
2368
2369         return true;
2370
2371 clean_up:
2372         err = pmu_queue_close(pmu, queue, false);
2373         if (err)
2374                 gk20a_err(dev_from_gk20a(g),
2375                         "fail to close queue %d", queue->id);
2376         return false;
2377 }
2378
2379 static int pmu_response_handle(struct pmu_gk20a *pmu,
2380                         struct pmu_msg *msg)
2381 {
2382         struct gk20a *g = pmu->g;
2383         struct pmu_sequence *seq;
2384         struct pmu_v *pv = &g->ops.pmu_ver;
2385         int ret = 0;
2386
2387         gk20a_dbg_fn("");
2388
2389         seq = &pmu->seq[msg->hdr.seq_id];
2390         if (seq->state != PMU_SEQ_STATE_USED &&
2391             seq->state != PMU_SEQ_STATE_CANCELLED) {
2392                 gk20a_err(dev_from_gk20a(g),
2393                         "msg for an unknown sequence %d", seq->id);
2394                 return -EINVAL;
2395         }
2396
2397         if (msg->hdr.unit_id == PMU_UNIT_RC &&
2398             msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) {
2399                 gk20a_err(dev_from_gk20a(g),
2400                         "unhandled cmd: seq %d", seq->id);
2401         }
2402         else if (seq->state != PMU_SEQ_STATE_CANCELLED) {
2403                 if (seq->msg) {
2404                         if (seq->msg->hdr.size >= msg->hdr.size) {
2405                                 memcpy(seq->msg, msg, msg->hdr.size);
2406                                 if (pv->pmu_allocation_get_dmem_size(pmu,
2407                                 pv->get_pmu_seq_out_a_ptr(seq)) != 0) {
2408                                         pmu_copy_from_dmem(pmu,
2409                                         pv->pmu_allocation_get_dmem_offset(pmu,
2410                                         pv->get_pmu_seq_out_a_ptr(seq)),
2411                                         seq->out_payload,
2412                                         pv->pmu_allocation_get_dmem_size(pmu,
2413                                         pv->get_pmu_seq_out_a_ptr(seq)), 0);
2414                                 }
2415                         } else {
2416                                 gk20a_err(dev_from_gk20a(g),
2417                                         "sequence %d msg buffer too small",
2418                                         seq->id);
2419                         }
2420                 }
2421         } else
2422                 seq->callback = NULL;
2423         if (pv->pmu_allocation_get_dmem_size(pmu,
2424                         pv->get_pmu_seq_in_a_ptr(seq)) != 0)
2425                 pmu->dmem.free(&pmu->dmem,
2426                 pv->pmu_allocation_get_dmem_offset(pmu,
2427                 pv->get_pmu_seq_in_a_ptr(seq)),
2428                 pv->pmu_allocation_get_dmem_size(pmu,
2429                 pv->get_pmu_seq_in_a_ptr(seq)));
2430         if (pv->pmu_allocation_get_dmem_size(pmu,
2431                         pv->get_pmu_seq_out_a_ptr(seq)) != 0)
2432                 pmu->dmem.free(&pmu->dmem,
2433                 pv->pmu_allocation_get_dmem_offset(pmu,
2434                 pv->get_pmu_seq_out_a_ptr(seq)),
2435                 pv->pmu_allocation_get_dmem_size(pmu,
2436                 pv->get_pmu_seq_out_a_ptr(seq)));
2437
2438         if (seq->callback)
2439                 seq->callback(g, msg, seq->cb_params, seq->desc, ret);
2440
2441         pmu_seq_release(pmu, seq);
2442
2443         /* TBD: notify client waiting for available dmem */
2444
2445         gk20a_dbg_fn("done");
2446
2447         return 0;
2448 }
2449
2450 static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
2451                                  u32 *var, u32 val);
2452
2453 static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg,
2454                         void *param, u32 handle, u32 status)
2455 {
2456         struct pmu_gk20a *pmu = param;
2457         gk20a_dbg_pmu("reply ZBC_TABLE_UPDATE");
2458         pmu->zbc_save_done = 1;
2459 }
2460
2461 void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
2462 {
2463         struct pmu_gk20a *pmu = &g->pmu;
2464         struct pmu_cmd cmd;
2465         u32 seq;
2466
2467         if (!pmu->pmu_ready || !entries || !pmu->zbc_ready)
2468                 return;
2469
2470         memset(&cmd, 0, sizeof(struct pmu_cmd));
2471         cmd.hdr.unit_id = PMU_UNIT_PG;
2472         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_zbc_cmd);
2473         cmd.cmd.zbc.cmd_type = g->ops.pmu_ver.cmd_id_zbc_table_update;
2474         cmd.cmd.zbc.entry_mask = ZBC_MASK(entries);
2475
2476         pmu->zbc_save_done = 0;
2477
2478         gk20a_dbg_pmu("cmd post ZBC_TABLE_UPDATE");
2479         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2480                            pmu_handle_zbc_msg, pmu, &seq, ~0);
2481         pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
2482                               &pmu->zbc_save_done, 1);
2483         if (!pmu->zbc_save_done)
2484                 gk20a_err(dev_from_gk20a(g), "ZBC save timeout");
2485 }
2486
2487 static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu)
2488 {
2489         struct gk20a *g = pmu->g;
2490         struct pmu_v *pv = &g->ops.pmu_ver;
2491         struct pmu_cmd cmd;
2492         struct pmu_payload payload;
2493         u32 current_rate = 0;
2494         u32 seq;
2495
2496         /* PERFMON Start */
2497         memset(&cmd, 0, sizeof(struct pmu_cmd));
2498         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2499         cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size();
2500         pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon,
2501                 PMU_PERFMON_CMD_ID_START);
2502         pv->perfmon_start_set_group_id(&cmd.cmd.perfmon,
2503                 PMU_DOMAIN_GROUP_PSTATE);
2504         pv->perfmon_start_set_state_id(&cmd.cmd.perfmon,
2505                 pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);
2506
2507         current_rate = rate_gpu_to_gpc2clk(gk20a_clk_get_rate(g));
2508         if (current_rate >= gpc_pll_params.max_freq)
2509                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2510                 PMU_PERFMON_FLAG_ENABLE_DECREASE);
2511         else if (current_rate <= gpc_pll_params.min_freq)
2512                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2513                 PMU_PERFMON_FLAG_ENABLE_INCREASE);
2514         else
2515                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2516                 PMU_PERFMON_FLAG_ENABLE_INCREASE |
2517                 PMU_PERFMON_FLAG_ENABLE_DECREASE);
2518
2519         pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2520                 pv->perfmon_start_get_flags(&cmd.cmd.perfmon) |
2521                 PMU_PERFMON_FLAG_CLEAR_PREV);
2522
2523         memset(&payload, 0, sizeof(struct pmu_payload));
2524
2525         /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */
2526         pmu->perfmon_counter.upper_threshold = 3000; /* 30% */
2527         /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */
2528         pmu->perfmon_counter.lower_threshold = 1000; /* 10% */
2529         pmu->perfmon_counter.valid = true;
2530
2531         payload.in.buf = &pmu->perfmon_counter;
2532         payload.in.size = sizeof(pmu->perfmon_counter);
2533         payload.in.offset =
2534                 pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC);
2535
2536         gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_START");
2537         gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
2538                         NULL, NULL, &seq, ~0);
2539
2540         return 0;
2541 }
2542
2543 static int pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu)
2544 {
2545         struct gk20a *g = pmu->g;
2546         struct pmu_cmd cmd;
2547         u32 seq;
2548
2549         /* PERFMON Stop */
2550         memset(&cmd, 0, sizeof(struct pmu_cmd));
2551         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2552         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop);
2553         cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP;
2554
2555         gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_STOP");
2556         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
2557                         NULL, NULL, &seq, ~0);
2558         return 0;
2559 }
2560
2561 static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu,
2562                         struct pmu_perfmon_msg *msg)
2563 {
2564         struct gk20a *g = pmu->g;
2565         u32 rate;
2566
2567         gk20a_dbg_fn("");
2568
2569         switch (msg->msg_type) {
2570         case PMU_PERFMON_MSG_ID_INCREASE_EVENT:
2571                 gk20a_dbg_pmu("perfmon increase event: "
2572                         "state_id %d, ground_id %d, pct %d",
2573                         msg->gen.state_id, msg->gen.group_id, msg->gen.data);
2574                 /* increase gk20a clock freq by 20% */
2575                 rate = gk20a_clk_get_rate(g);
2576                 gk20a_clk_set_rate(g, rate * 6 / 5);
2577                 break;
2578         case PMU_PERFMON_MSG_ID_DECREASE_EVENT:
2579                 gk20a_dbg_pmu("perfmon decrease event: "
2580                         "state_id %d, ground_id %d, pct %d",
2581                         msg->gen.state_id, msg->gen.group_id, msg->gen.data);
2582                 /* decrease gk20a clock freq by 10% */
2583                 rate = gk20a_clk_get_rate(g);
2584                 gk20a_clk_set_rate(g, (rate / 10) * 7);
2585                 break;
2586         case PMU_PERFMON_MSG_ID_INIT_EVENT:
2587                 pmu->perfmon_ready = 1;
2588                 gk20a_dbg_pmu("perfmon init event");
2589                 break;
2590         default:
2591                 break;
2592         }
2593
2594         /* restart sampling */
2595         if (IS_ENABLED(CONFIG_GK20A_PERFMON))
2596                 return pmu_perfmon_start_sampling(pmu);
2597         return 0;
2598 }
2599
2600
2601 static int pmu_handle_event(struct pmu_gk20a *pmu, struct pmu_msg *msg)
2602 {
2603         int err;
2604
2605         gk20a_dbg_fn("");
2606
2607         switch (msg->hdr.unit_id) {
2608         case PMU_UNIT_PERFMON:
2609                 err = pmu_handle_perfmon_event(pmu, &msg->msg.perfmon);
2610                 break;
2611         default:
2612                 break;
2613         }
2614
2615         return err;
2616 }
2617
2618 static int pmu_process_message(struct pmu_gk20a *pmu)
2619 {
2620         struct pmu_msg msg;
2621         int status;
2622
2623         if (unlikely(!pmu->pmu_ready)) {
2624                 pmu_process_init_msg(pmu, &msg);
2625                 pmu_init_powergating(pmu);
2626                 pmu_init_perfmon(pmu);
2627                 return 0;
2628         }
2629
2630         while (pmu_read_message(pmu,
2631                 &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) {
2632
2633                 gk20a_dbg_pmu("read msg hdr: "
2634                                 "unit_id = 0x%08x, size = 0x%08x, "
2635                                 "ctrl_flags = 0x%08x, seq_id = 0x%08x",
2636                                 msg.hdr.unit_id, msg.hdr.size,
2637                                 msg.hdr.ctrl_flags, msg.hdr.seq_id);
2638
2639                 msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK;
2640
2641                 if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) {
2642                         pmu_handle_event(pmu, &msg);
2643                 } else {
2644                         pmu_response_handle(pmu, &msg);
2645                 }
2646         }
2647
2648         return 0;
2649 }
2650
2651 static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
2652                                  u32 *var, u32 val)
2653 {
2654         struct gk20a *g = pmu->g;
2655         unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
2656         unsigned long delay = GR_IDLE_CHECK_DEFAULT;
2657
2658         do {
2659                 if (*var == val)
2660                         return 0;
2661
2662                 if (gk20a_readl(g, pwr_falcon_irqstat_r()))
2663                         gk20a_pmu_isr(g);
2664
2665                 usleep_range(delay, delay * 2);
2666                 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
2667         } while (time_before(jiffies, end_jiffies) ||
2668                         !tegra_platform_is_silicon());
2669
2670         return -ETIMEDOUT;
2671 }
2672
2673 static void pmu_dump_elpg_stats(struct pmu_gk20a *pmu)
2674 {
2675         struct gk20a *g = pmu->g;
2676         struct pmu_pg_stats stats;
2677
2678         pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
2679                 (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
2680
2681         gk20a_dbg_pmu("pg_entry_start_timestamp : 0x%016llx",
2682                 stats.pg_entry_start_timestamp);
2683         gk20a_dbg_pmu("pg_exit_start_timestamp : 0x%016llx",
2684                 stats.pg_exit_start_timestamp);
2685         gk20a_dbg_pmu("pg_ingating_start_timestamp : 0x%016llx",
2686                 stats.pg_ingating_start_timestamp);
2687         gk20a_dbg_pmu("pg_ungating_start_timestamp : 0x%016llx",
2688                 stats.pg_ungating_start_timestamp);
2689         gk20a_dbg_pmu("pg_avg_entry_time_us : 0x%08x",
2690                 stats.pg_avg_entry_time_us);
2691         gk20a_dbg_pmu("pg_avg_exit_time_us : 0x%08x",
2692                 stats.pg_avg_exit_time_us);
2693         gk20a_dbg_pmu("pg_ingating_cnt : 0x%08x",
2694                 stats.pg_ingating_cnt);
2695         gk20a_dbg_pmu("pg_ingating_time_us : 0x%08x",
2696                 stats.pg_ingating_time_us);
2697         gk20a_dbg_pmu("pg_ungating_count : 0x%08x",
2698                 stats.pg_ungating_count);
2699         gk20a_dbg_pmu("pg_ungating_time_us 0x%08x: ",
2700                 stats.pg_ungating_time_us);
2701         gk20a_dbg_pmu("pg_gating_cnt : 0x%08x",
2702                 stats.pg_gating_cnt);
2703         gk20a_dbg_pmu("pg_gating_deny_cnt : 0x%08x",
2704                 stats.pg_gating_deny_cnt);
2705
2706         /*
2707            Turn on PG_DEBUG in ucode and locate symbol "ElpgLog" offset
2708            in .nm file, e.g. 0x1000066c. use 0x66c.
2709         u32 i, val[20];
2710         pmu_copy_from_dmem(pmu, 0x66c,
2711                 (u8 *)val, sizeof(val), 0);
2712         gk20a_dbg_pmu("elpg log begin");
2713         for (i = 0; i < 20; i++)
2714                 gk20a_dbg_pmu("0x%08x", val[i]);
2715         gk20a_dbg_pmu("elpg log end");
2716         */
2717
2718         gk20a_dbg_pmu("pwr_pmu_idle_mask_supp_r(3): 0x%08x",
2719                 gk20a_readl(g, pwr_pmu_idle_mask_supp_r(3)));
2720         gk20a_dbg_pmu("pwr_pmu_idle_mask_1_supp_r(3): 0x%08x",
2721                 gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(3)));
2722         gk20a_dbg_pmu("pwr_pmu_idle_ctrl_supp_r(3): 0x%08x",
2723                 gk20a_readl(g, pwr_pmu_idle_ctrl_supp_r(3)));
2724         gk20a_dbg_pmu("pwr_pmu_pg_idle_cnt_r(0): 0x%08x",
2725                 gk20a_readl(g, pwr_pmu_pg_idle_cnt_r(0)));
2726         gk20a_dbg_pmu("pwr_pmu_pg_intren_r(0): 0x%08x",
2727                 gk20a_readl(g, pwr_pmu_pg_intren_r(0)));
2728
2729         gk20a_dbg_pmu("pwr_pmu_idle_count_r(3): 0x%08x",
2730                 gk20a_readl(g, pwr_pmu_idle_count_r(3)));
2731         gk20a_dbg_pmu("pwr_pmu_idle_count_r(4): 0x%08x",
2732                 gk20a_readl(g, pwr_pmu_idle_count_r(4)));
2733         gk20a_dbg_pmu("pwr_pmu_idle_count_r(7): 0x%08x",
2734                 gk20a_readl(g, pwr_pmu_idle_count_r(7)));
2735
2736         /*
2737          TBD: script can't generate those registers correctly
2738         gk20a_dbg_pmu("pwr_pmu_idle_status_r(): 0x%08x",
2739                 gk20a_readl(g, pwr_pmu_idle_status_r()));
2740         gk20a_dbg_pmu("pwr_pmu_pg_ctrl_r(): 0x%08x",
2741                 gk20a_readl(g, pwr_pmu_pg_ctrl_r()));
2742         */
2743 }
2744
2745 static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu)
2746 {
2747         struct gk20a *g = pmu->g;
2748         int i;
2749
2750         gk20a_err(dev_from_gk20a(g), "pwr_falcon_os_r : %d",
2751                 gk20a_readl(g, pwr_falcon_os_r()));
2752         gk20a_err(dev_from_gk20a(g), "pwr_falcon_cpuctl_r : 0x%x",
2753                 gk20a_readl(g, pwr_falcon_cpuctl_r()));
2754         gk20a_err(dev_from_gk20a(g), "pwr_falcon_idlestate_r : 0x%x",
2755                 gk20a_readl(g, pwr_falcon_idlestate_r()));
2756         gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox0_r : 0x%x",
2757                 gk20a_readl(g, pwr_falcon_mailbox0_r()));
2758         gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox1_r : 0x%x",
2759                 gk20a_readl(g, pwr_falcon_mailbox1_r()));
2760         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqstat_r : 0x%x",
2761                 gk20a_readl(g, pwr_falcon_irqstat_r()));
2762         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmode_r : 0x%x",
2763                 gk20a_readl(g, pwr_falcon_irqmode_r()));
2764         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmask_r : 0x%x",
2765                 gk20a_readl(g, pwr_falcon_irqmask_r()));
2766         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqdest_r : 0x%x",
2767                 gk20a_readl(g, pwr_falcon_irqdest_r()));
2768
2769         for (i = 0; i < pwr_pmu_mailbox__size_1_v(); i++)
2770                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_mailbox_r(%d) : 0x%x",
2771                         i, gk20a_readl(g, pwr_pmu_mailbox_r(i)));
2772
2773         for (i = 0; i < pwr_pmu_debug__size_1_v(); i++)
2774                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_debug_r(%d) : 0x%x",
2775                         i, gk20a_readl(g, pwr_pmu_debug_r(i)));
2776
2777         for (i = 0; i < 6/*NV_PPWR_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) {
2778                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2779                         pwr_pmu_falcon_icd_cmd_opc_rstat_f() |
2780                         pwr_pmu_falcon_icd_cmd_idx_f(i));
2781                 gk20a_err(dev_from_gk20a(g), "pmu_rstat (%d) : 0x%x",
2782                         i, gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2783         }
2784
2785         i = gk20a_readl(g, pwr_pmu_bar0_error_status_r());
2786         gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_error_status_r : 0x%x", i);
2787         if (i != 0) {
2788                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_addr_r : 0x%x",
2789                         gk20a_readl(g, pwr_pmu_bar0_addr_r()));
2790                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_data_r : 0x%x",
2791                         gk20a_readl(g, pwr_pmu_bar0_data_r()));
2792                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_timeout_r : 0x%x",
2793                         gk20a_readl(g, pwr_pmu_bar0_timeout_r()));
2794                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_ctl_r : 0x%x",
2795                         gk20a_readl(g, pwr_pmu_bar0_ctl_r()));
2796         }
2797
2798         i = gk20a_readl(g, pwr_pmu_bar0_fecs_error_r());
2799         gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_fecs_error_r : 0x%x", i);
2800
2801         i = gk20a_readl(g, pwr_falcon_exterrstat_r());
2802         gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterrstat_r : 0x%x", i);
2803         if (pwr_falcon_exterrstat_valid_v(i) ==
2804                         pwr_falcon_exterrstat_valid_true_v()) {
2805                 gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterraddr_r : 0x%x",
2806                         gk20a_readl(g, pwr_falcon_exterraddr_r()));
2807                 gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x",
2808                         gk20a_readl(g, mc_enable_r()));
2809         }
2810
2811         gk20a_err(dev_from_gk20a(g), "pwr_falcon_engctl_r : 0x%x",
2812                 gk20a_readl(g, pwr_falcon_engctl_r()));
2813         gk20a_err(dev_from_gk20a(g), "pwr_falcon_curctx_r : 0x%x",
2814                 gk20a_readl(g, pwr_falcon_curctx_r()));
2815         gk20a_err(dev_from_gk20a(g), "pwr_falcon_nxtctx_r : 0x%x",
2816                 gk20a_readl(g, pwr_falcon_nxtctx_r()));
2817
2818         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2819                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2820                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_IMB));
2821         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_IMB : 0x%x",
2822                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2823
2824         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2825                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2826                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_DMB));
2827         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_DMB : 0x%x",
2828                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2829
2830         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2831                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2832                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CSW));
2833         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CSW : 0x%x",
2834                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2835
2836         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2837                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2838                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CTX));
2839         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CTX : 0x%x",
2840                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2841
2842         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2843                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2844                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_EXCI));
2845         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_EXCI : 0x%x",
2846                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2847
2848         for (i = 0; i < 4; i++) {
2849                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2850                         pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2851                         pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_PC));
2852                 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_PC : 0x%x",
2853                         gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2854
2855                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2856                         pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2857                         pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_SP));
2858                 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_SP : 0x%x",
2859                         gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2860         }
2861         gk20a_err(dev_from_gk20a(g), "elpg stat: %d\n",
2862                         pmu->elpg_stat);
2863
2864         /* PMU may crash due to FECS crash. Dump FECS status */
2865         gk20a_fecs_dump_falcon_stats(g);
2866 }
2867
2868 void gk20a_pmu_isr(struct gk20a *g)
2869 {
2870         struct pmu_gk20a *pmu = &g->pmu;
2871         struct pmu_queue *queue;
2872         u32 intr, mask;
2873         bool recheck = false;
2874
2875         gk20a_dbg_fn("");
2876
2877         mutex_lock(&pmu->isr_enable_lock);
2878         if (!pmu->isr_enabled) {
2879                 mutex_unlock(&pmu->isr_enable_lock);
2880                 return;
2881         }
2882
2883         mutex_lock(&pmu->isr_mutex);
2884
2885         mask = gk20a_readl(g, pwr_falcon_irqmask_r()) &
2886                 gk20a_readl(g, pwr_falcon_irqdest_r());
2887
2888         intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask;
2889
2890         gk20a_dbg_pmu("received falcon interrupt: 0x%08x", intr);
2891
2892         if (!intr) {
2893                 mutex_unlock(&pmu->isr_mutex);
2894                 mutex_unlock(&pmu->isr_enable_lock);
2895                 return;
2896         }
2897
2898         if (intr & pwr_falcon_irqstat_halt_true_f()) {
2899                 gk20a_err(dev_from_gk20a(g),
2900                         "pmu halt intr not implemented");
2901                 pmu_dump_falcon_stats(pmu);
2902         }
2903         if (intr & pwr_falcon_irqstat_exterr_true_f()) {
2904                 gk20a_err(dev_from_gk20a(g),
2905                         "pmu exterr intr not implemented. Clearing interrupt.");
2906                 pmu_dump_falcon_stats(pmu);
2907
2908                 gk20a_writel(g, pwr_falcon_exterrstat_r(),
2909                         gk20a_readl(g, pwr_falcon_exterrstat_r()) &
2910                                 ~pwr_falcon_exterrstat_valid_m());
2911         }
2912         if (intr & pwr_falcon_irqstat_swgen0_true_f()) {
2913                 pmu_process_message(pmu);
2914                 recheck = true;
2915         }
2916
2917         gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);
2918
2919         if (recheck) {
2920                 queue = &pmu->queue[PMU_MESSAGE_QUEUE];
2921                 if (!pmu_queue_is_empty(pmu, queue))
2922                         gk20a_writel(g, pwr_falcon_irqsset_r(),
2923                                 pwr_falcon_irqsset_swgen0_set_f());
2924         }
2925
2926         mutex_unlock(&pmu->isr_mutex);
2927         mutex_unlock(&pmu->isr_enable_lock);
2928 }
2929
2930 static bool pmu_validate_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
2931                         struct pmu_msg *msg, struct pmu_payload *payload,
2932                         u32 queue_id)
2933 {
2934         struct gk20a *g = pmu->g;
2935         struct pmu_queue *queue;
2936         u32 in_size, out_size;
2937
2938         if (!PMU_IS_SW_COMMAND_QUEUE(queue_id))
2939                 goto invalid_cmd;
2940
2941         queue = &pmu->queue[queue_id];
2942         if (cmd->hdr.size < PMU_CMD_HDR_SIZE)
2943                 goto invalid_cmd;
2944
2945         if (cmd->hdr.size > (queue->size >> 1))
2946                 goto invalid_cmd;
2947
2948         if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE)
2949                 goto invalid_cmd;
2950
2951         if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id))
2952                 goto invalid_cmd;
2953
2954         if (payload == NULL)
2955                 return true;
2956
2957         if (payload->in.buf == NULL && payload->out.buf == NULL)
2958                 goto invalid_cmd;
2959
2960         if ((payload->in.buf != NULL && payload->in.size == 0) ||
2961             (payload->out.buf != NULL && payload->out.size == 0))
2962                 goto invalid_cmd;
2963
2964         in_size = PMU_CMD_HDR_SIZE;
2965         if (payload->in.buf) {
2966                 in_size += payload->in.offset;
2967                 in_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
2968         }
2969
2970         out_size = PMU_CMD_HDR_SIZE;
2971         if (payload->out.buf) {
2972                 out_size += payload->out.offset;
2973                 out_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
2974         }
2975
2976         if (in_size > cmd->hdr.size || out_size > cmd->hdr.size)
2977                 goto invalid_cmd;
2978
2979
2980         if ((payload->in.offset != 0 && payload->in.buf == NULL) ||
2981             (payload->out.offset != 0 && payload->out.buf == NULL))
2982                 goto invalid_cmd;
2983
2984         return true;
2985
2986 invalid_cmd:
2987         gk20a_err(dev_from_gk20a(g), "invalid pmu cmd :\n"
2988                 "queue_id=%d,\n"
2989                 "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n"
2990                 "payload in=%p, in_size=%d, in_offset=%d,\n"
2991                 "payload out=%p, out_size=%d, out_offset=%d",
2992                 queue_id, cmd->hdr.size, cmd->hdr.unit_id,
2993                 msg, msg?msg->hdr.unit_id:~0,
2994                 &payload->in, payload->in.size, payload->in.offset,
2995                 &payload->out, payload->out.size, payload->out.offset);
2996
2997         return false;
2998 }
2999
3000 static int pmu_write_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
3001                         u32 queue_id, unsigned long timeout)
3002 {
3003         struct gk20a *g = pmu->g;
3004         struct pmu_queue *queue;
3005         unsigned long end_jiffies = jiffies +
3006                 msecs_to_jiffies(timeout);
3007         int err;
3008
3009         gk20a_dbg_fn("");
3010
3011         queue = &pmu->queue[queue_id];
3012
3013         do {
3014                 err = pmu_queue_open_write(pmu, queue, cmd->hdr.size);
3015                 if (err == -EAGAIN && time_before(jiffies, end_jiffies))
3016                         usleep_range(1000, 2000);
3017                 else
3018                         break;
3019         } while (1);
3020
3021         if (err)
3022                 goto clean_up;
3023
3024         pmu_queue_push(pmu, queue, cmd, cmd->hdr.size);
3025
3026         err = pmu_queue_close(pmu, queue, true);
3027
3028 clean_up:
3029         if (err)
3030                 gk20a_err(dev_from_gk20a(g),
3031                         "fail to write cmd to queue %d", queue_id);
3032         else
3033                 gk20a_dbg_fn("done");
3034
3035         return err;
3036 }
3037
3038 int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
3039                 struct pmu_msg *msg, struct pmu_payload *payload,
3040                 u32 queue_id, pmu_callback callback, void* cb_param,
3041                 u32 *seq_desc, unsigned long timeout)
3042 {
3043         struct pmu_gk20a *pmu = &g->pmu;
3044         struct pmu_v *pv = &g->ops.pmu_ver;
3045         struct pmu_sequence *seq;
3046         void *in = NULL, *out = NULL;
3047         int err;
3048
3049         gk20a_dbg_fn("");
3050
3051         BUG_ON(!cmd);
3052         BUG_ON(!seq_desc);
3053         BUG_ON(!pmu->pmu_ready);
3054
3055         if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id))
3056                 return -EINVAL;
3057
3058         err = pmu_seq_acquire(pmu, &seq);
3059         if (err)
3060                 return err;
3061
3062         cmd->hdr.seq_id = seq->id;
3063
3064         cmd->hdr.ctrl_flags = 0;
3065         cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS;
3066         cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR;
3067
3068         seq->callback = callback;
3069         seq->cb_params = cb_param;
3070         seq->msg = msg;
3071         seq->out_payload = NULL;
3072         seq->desc = pmu->next_seq_desc++;
3073
3074         if (payload)
3075                 seq->out_payload = payload->out.buf;
3076
3077         *seq_desc = seq->desc;
3078
3079         if (payload && payload->in.offset != 0) {
3080                 pv->set_pmu_allocation_ptr(pmu, &in,
3081                 ((u8 *)&cmd->cmd + payload->in.offset));
3082
3083                 if (payload->in.buf != payload->out.buf)
3084                         pv->pmu_allocation_set_dmem_size(pmu, in,
3085                         (u16)payload->in.size);
3086                 else
3087                         pv->pmu_allocation_set_dmem_size(pmu, in,
3088                         (u16)max(payload->in.size, payload->out.size));
3089
3090                 err = pmu->dmem.alloc(&pmu->dmem,
3091                 pv->pmu_allocation_get_dmem_offset_addr(pmu, in),
3092                 pv->pmu_allocation_get_dmem_size(pmu, in));
3093                 if (err)
3094                         goto clean_up;
3095
3096                 pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu,
3097                 in)),
3098                         payload->in.buf, payload->in.size, 0);
3099                 pv->pmu_allocation_set_dmem_size(pmu,
3100                 pv->get_pmu_seq_in_a_ptr(seq),
3101                 pv->pmu_allocation_get_dmem_size(pmu, in));
3102                 pv->pmu_allocation_set_dmem_offset(pmu,
3103                 pv->get_pmu_seq_in_a_ptr(seq),
3104                 pv->pmu_allocation_get_dmem_offset(pmu, in));
3105         }
3106
3107         if (payload && payload->out.offset != 0) {
3108                 pv->set_pmu_allocation_ptr(pmu, &out,
3109                 ((u8 *)&cmd->cmd + payload->out.offset));
3110                 pv->pmu_allocation_set_dmem_size(pmu, out,
3111                 (u16)payload->out.size);
3112
3113                 if (payload->out.buf != payload->in.buf) {
3114                         err = pmu->dmem.alloc(&pmu->dmem,
3115                         pv->pmu_allocation_get_dmem_offset_addr(pmu, out),
3116                         pv->pmu_allocation_get_dmem_size(pmu, out));
3117                         if (err)
3118                                 goto clean_up;
3119                 } else {
3120                         BUG_ON(in == NULL);
3121                         pv->pmu_allocation_set_dmem_offset(pmu, out,
3122                         pv->pmu_allocation_get_dmem_offset(pmu, in));
3123                 }
3124
3125                 pv->pmu_allocation_set_dmem_size(pmu,
3126                 pv->get_pmu_seq_out_a_ptr(seq),
3127                 pv->pmu_allocation_get_dmem_size(pmu, out));
3128                 pv->pmu_allocation_set_dmem_offset(pmu,
3129                 pv->get_pmu_seq_out_a_ptr(seq),
3130                 pv->pmu_allocation_get_dmem_offset(pmu, out));
3131         }
3132
3133         seq->state = PMU_SEQ_STATE_USED;
3134         err = pmu_write_cmd(pmu, cmd, queue_id, timeout);
3135         if (err)
3136                 seq->state = PMU_SEQ_STATE_PENDING;
3137
3138         gk20a_dbg_fn("done");
3139
3140         return 0;
3141
3142 clean_up:
3143         gk20a_dbg_fn("fail");
3144         if (in)
3145                 pmu->dmem.free(&pmu->dmem,
3146                 pv->pmu_allocation_get_dmem_offset(pmu, in),
3147                 pv->pmu_allocation_get_dmem_size(pmu, in));
3148         if (out)
3149                 pmu->dmem.free(&pmu->dmem,
3150                 pv->pmu_allocation_get_dmem_offset(pmu, out),
3151                 pv->pmu_allocation_get_dmem_size(pmu, out));
3152
3153         pmu_seq_release(pmu, seq);
3154         return err;
3155 }
3156
3157 static int gk20a_pmu_enable_elpg_locked(struct gk20a *g)
3158 {
3159         struct pmu_gk20a *pmu = &g->pmu;
3160         struct pmu_cmd cmd;
3161         u32 seq, status;
3162
3163         gk20a_dbg_fn("");
3164
3165         memset(&cmd, 0, sizeof(struct pmu_cmd));
3166         cmd.hdr.unit_id = PMU_UNIT_PG;
3167         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
3168         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
3169         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
3170         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_ALLOW;
3171
3172         /* no need to wait ack for ELPG enable but set pending to sync
3173            with follow up ELPG disable */
3174         pmu->elpg_stat = PMU_ELPG_STAT_ON_PENDING;
3175
3176         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_ALLOW");
3177         status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3178                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
3179
3180         BUG_ON(status != 0);
3181
3182         gk20a_dbg_fn("done");
3183         return 0;
3184 }
3185
3186 int gk20a_pmu_enable_elpg(struct gk20a *g)
3187 {
3188         struct pmu_gk20a *pmu = &g->pmu;
3189         struct gr_gk20a *gr = &g->gr;
3190
3191         int ret = 0;
3192
3193         gk20a_dbg_fn("");
3194
3195         mutex_lock(&pmu->elpg_mutex);
3196
3197         pmu->elpg_refcnt++;
3198         if (pmu->elpg_refcnt <= 0)
3199                 goto exit_unlock;
3200
3201         /* something is not right if we end up in following code path */
3202         if (unlikely(pmu->elpg_refcnt > 1)) {
3203                 gk20a_warn(dev_from_gk20a(g),
3204                 "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
3205                 __func__, pmu->elpg_refcnt);
3206                 WARN_ON(1);
3207         }
3208
3209         /* do NOT enable elpg until golden ctx is created,
3210            which is related with the ctx that ELPG save and restore. */
3211         if (unlikely(!gr->ctx_vars.golden_image_initialized))
3212                 goto exit_unlock;
3213
3214         /* return if ELPG is already on or on_pending or off_on_pending */
3215         if (pmu->elpg_stat != PMU_ELPG_STAT_OFF)
3216                 goto exit_unlock;
3217
3218         ret = gk20a_pmu_enable_elpg_locked(g);
3219
3220 exit_unlock:
3221         mutex_unlock(&pmu->elpg_mutex);
3222         gk20a_dbg_fn("done");
3223         return ret;
3224 }
3225
3226 int gk20a_pmu_disable_elpg(struct gk20a *g)
3227 {
3228         struct pmu_gk20a *pmu = &g->pmu;
3229         struct pmu_cmd cmd;
3230         u32 seq;
3231         int ret = 0;
3232
3233         gk20a_dbg_fn("");
3234
3235         mutex_lock(&pmu->elpg_mutex);
3236
3237         pmu->elpg_refcnt--;
3238         if (pmu->elpg_refcnt > 0) {
3239                 gk20a_warn(dev_from_gk20a(g),
3240                 "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
3241                 __func__, pmu->elpg_refcnt);
3242                 WARN_ON(1);
3243                 ret = 0;
3244                 goto exit_unlock;
3245         }
3246
3247         /* cancel off_on_pending and return */
3248         if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) {
3249                 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
3250                 ret = 0;
3251                 goto exit_reschedule;
3252         }
3253         /* wait if on_pending */
3254         else if (pmu->elpg_stat == PMU_ELPG_STAT_ON_PENDING) {
3255
3256                 pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
3257                                       &pmu->elpg_stat, PMU_ELPG_STAT_ON);
3258
3259                 if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
3260                         gk20a_err(dev_from_gk20a(g),
3261                                 "ELPG_ALLOW_ACK failed, elpg_stat=%d",
3262                                 pmu->elpg_stat);
3263                         pmu_dump_elpg_stats(pmu);
3264                         pmu_dump_falcon_stats(pmu);
3265                         ret = -EBUSY;
3266                         goto exit_unlock;
3267                 }
3268         }
3269         /* return if ELPG is already off */
3270         else if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
3271                 ret = 0;
3272                 goto exit_reschedule;
3273         }
3274
3275         memset(&cmd, 0, sizeof(struct pmu_cmd));
3276         cmd.hdr.unit_id = PMU_UNIT_PG;
3277         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
3278         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
3279         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
3280         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
3281
3282         pmu->elpg_stat = PMU_ELPG_STAT_OFF_PENDING;
3283
3284         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_DISALLOW");
3285         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3286                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
3287
3288         pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
3289                               &pmu->elpg_stat, PMU_ELPG_STAT_OFF);
3290         if (pmu->elpg_stat != PMU_ELPG_STAT_OFF) {
3291                 gk20a_err(dev_from_gk20a(g),
3292                         "ELPG_DISALLOW_ACK failed");
3293                 pmu_dump_elpg_stats(pmu);
3294                 pmu_dump_falcon_stats(pmu);
3295                 ret = -EBUSY;
3296                 goto exit_unlock;
3297         }
3298
3299 exit_reschedule:
3300 exit_unlock:
3301         mutex_unlock(&pmu->elpg_mutex);
3302         gk20a_dbg_fn("done");
3303         return ret;
3304 }
3305
3306 int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable)
3307 {
3308         struct pmu_gk20a *pmu = &g->pmu;
3309         int err;
3310
3311         gk20a_dbg_fn("");
3312
3313         if (enable)
3314                 err = pmu_perfmon_start_sampling(pmu);
3315         else
3316                 err = pmu_perfmon_stop_sampling(pmu);
3317
3318         return err;
3319 }
3320
3321 int gk20a_pmu_destroy(struct gk20a *g)
3322 {
3323         struct pmu_gk20a *pmu = &g->pmu;
3324         u32 elpg_ingating_time, elpg_ungating_time, gating_cnt;
3325
3326         gk20a_dbg_fn("");
3327
3328         if (!support_gk20a_pmu())
3329                 return 0;
3330
3331         /* make sure the pending operations are finished before we continue */
3332         cancel_work_sync(&pmu->pg_init);
3333
3334         gk20a_pmu_get_elpg_residency_gating(g, &elpg_ingating_time,
3335                 &elpg_ungating_time, &gating_cnt);
3336
3337         gk20a_pmu_disable_elpg(g);
3338         pmu->initialized = false;
3339
3340         /* update the s/w ELPG residency counters */
3341         g->pg_ingating_time_us += (u64)elpg_ingating_time;
3342         g->pg_ungating_time_us += (u64)elpg_ungating_time;
3343         g->pg_gating_cnt += gating_cnt;
3344
3345         mutex_lock(&pmu->isr_enable_lock);
3346         pmu_enable(pmu, false);
3347         pmu->isr_enabled = false;
3348         mutex_unlock(&pmu->isr_enable_lock);
3349
3350         pmu->pmu_state = PMU_STATE_OFF;
3351         pmu->pmu_ready = false;
3352         pmu->perfmon_ready = false;
3353         pmu->zbc_ready = false;
3354
3355         gk20a_dbg_fn("done");
3356         return 0;
3357 }
3358
3359 int gk20a_pmu_load_norm(struct gk20a *g, u32 *load)
3360 {
3361         struct pmu_gk20a *pmu = &g->pmu;
3362         u16 _load = 0;
3363
3364         if (!pmu->perfmon_ready) {
3365                 *load = 0;
3366                 return 0;
3367         }
3368
3369         pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0);
3370         *load = _load / 10;
3371
3372         return 0;
3373 }
3374
3375 void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
3376                                  u32 *total_cycles)
3377 {
3378         if (!g->power_on) {
3379                 *busy_cycles = 0;
3380                 *total_cycles = 0;
3381                 return;
3382         }
3383
3384         gk20a_busy(g->dev);
3385         *busy_cycles = pwr_pmu_idle_count_value_v(
3386                 gk20a_readl(g, pwr_pmu_idle_count_r(1)));
3387         rmb();
3388         *total_cycles = pwr_pmu_idle_count_value_v(
3389                 gk20a_readl(g, pwr_pmu_idle_count_r(2)));
3390         gk20a_idle(g->dev);
3391 }
3392
3393 void gk20a_pmu_reset_load_counters(struct gk20a *g)
3394 {
3395         u32 reg_val = pwr_pmu_idle_count_reset_f(1);
3396
3397         if (!g->power_on)
3398                 return;
3399
3400         gk20a_busy(g->dev);
3401         gk20a_writel(g, pwr_pmu_idle_count_r(2), reg_val);
3402         wmb();
3403         gk20a_writel(g, pwr_pmu_idle_count_r(1), reg_val);
3404         gk20a_idle(g->dev);
3405 }
3406
3407 static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
3408                         u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt)
3409 {
3410         struct pmu_gk20a *pmu = &g->pmu;
3411         struct pmu_pg_stats stats;
3412
3413         if (!pmu->initialized) {
3414                 *ingating_time = 0;
3415                 *ungating_time = 0;
3416                 *gating_cnt = 0;
3417                 return 0;
3418         }
3419
3420         pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
3421                 (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
3422
3423         *ingating_time = stats.pg_ingating_time_us;
3424         *ungating_time = stats.pg_ungating_time_us;
3425         *gating_cnt = stats.pg_gating_cnt;
3426
3427         return 0;
3428 }
3429
3430 /* Send an Adaptive Power (AP) related command to PMU */
3431 static int gk20a_pmu_ap_send_command(struct gk20a *g,
3432                         union pmu_ap_cmd *p_ap_cmd, bool b_block)
3433 {
3434         struct pmu_gk20a *pmu = &g->pmu;
3435         /* FIXME: where is the PG structure defined?? */
3436         u32 status = 0;
3437         struct pmu_cmd cmd;
3438         u32 seq;
3439         pmu_callback p_callback = NULL;
3440
3441         memset(&cmd, 0, sizeof(struct pmu_cmd));
3442
3443         /* Copy common members */
3444         cmd.hdr.unit_id = PMU_UNIT_PG;
3445         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(union pmu_ap_cmd);
3446
3447         cmd.cmd.pg.ap_cmd.cmn.cmd_type = PMU_PG_CMD_ID_AP;
3448         cmd.cmd.pg.ap_cmd.cmn.cmd_id = p_ap_cmd->cmn.cmd_id;
3449
3450         /* Copy other members of command */
3451         switch (p_ap_cmd->cmn.cmd_id) {
3452         case PMU_AP_CMD_ID_INIT:
3453                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_INIT");
3454                 cmd.cmd.pg.ap_cmd.init.pg_sampling_period_us =
3455                         p_ap_cmd->init.pg_sampling_period_us;
3456                 p_callback = ap_callback_init_and_enable_ctrl;
3457                 break;
3458
3459         case PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL:
3460                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL");
3461                 cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.ctrl_id =
3462                 p_ap_cmd->init_and_enable_ctrl.ctrl_id;
3463                 memcpy(
3464                 (void *)&(cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.params),
3465                         (void *)&(p_ap_cmd->init_and_enable_ctrl.params),
3466                         sizeof(struct pmu_ap_ctrl_init_params));
3467
3468                 p_callback = ap_callback_init_and_enable_ctrl;
3469                 break;
3470
3471         case PMU_AP_CMD_ID_ENABLE_CTRL:
3472                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_ENABLE_CTRL");
3473                 cmd.cmd.pg.ap_cmd.enable_ctrl.ctrl_id =
3474                         p_ap_cmd->enable_ctrl.ctrl_id;
3475                 break;
3476
3477         case PMU_AP_CMD_ID_DISABLE_CTRL:
3478                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_DISABLE_CTRL");
3479                 cmd.cmd.pg.ap_cmd.disable_ctrl.ctrl_id =
3480                         p_ap_cmd->disable_ctrl.ctrl_id;
3481                 break;
3482
3483         case PMU_AP_CMD_ID_KICK_CTRL:
3484                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_KICK_CTRL");
3485                 cmd.cmd.pg.ap_cmd.kick_ctrl.ctrl_id =
3486                         p_ap_cmd->kick_ctrl.ctrl_id;
3487                 cmd.cmd.pg.ap_cmd.kick_ctrl.skip_count =
3488                         p_ap_cmd->kick_ctrl.skip_count;
3489                 break;
3490
3491         default:
3492                 gk20a_dbg_pmu("%s: Invalid Adaptive Power command %d\n",
3493                         __func__, p_ap_cmd->cmn.cmd_id);
3494                 return 0x2f;
3495         }
3496
3497         status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3498                         p_callback, pmu, &seq, ~0);
3499
3500         if (!status) {
3501                 gk20a_dbg_pmu(
3502                         "%s: Unable to submit Adaptive Power Command %d\n",
3503                         __func__, p_ap_cmd->cmn.cmd_id);
3504                 goto err_return;
3505         }
3506
3507         /* TODO: Implement blocking calls (b_block) */
3508
3509 err_return:
3510         return status;
3511 }
3512
3513 static void ap_callback_init_and_enable_ctrl(
3514                 struct gk20a *g, struct pmu_msg *msg,
3515                 void *param, u32 seq_desc, u32 status)
3516 {
3517         /* Define p_ap (i.e pointer to pmu_ap structure) */
3518         WARN_ON(!msg);
3519
3520         if (!status) {
3521                 switch (msg->msg.pg.ap_msg.cmn.msg_id) {
3522                 case PMU_AP_MSG_ID_INIT_ACK:
3523                         gk20a_dbg_pmu("reply PMU_AP_CMD_ID_INIT");
3524                         break;
3525
3526                 default:
3527                         gk20a_dbg_pmu(
3528                         "%s: Invalid Adaptive Power Message: %x\n",
3529                         __func__, msg->msg.pg.ap_msg.cmn.msg_id);
3530                         break;
3531                 }
3532         }
3533 }
3534
3535 static int gk20a_aelpg_init(struct gk20a *g)
3536 {
3537         int status = 0;
3538
3539         /* Remove reliance on app_ctrl field. */
3540         union pmu_ap_cmd ap_cmd;
3541
3542         /* TODO: Check for elpg being ready? */
3543         ap_cmd.init.cmd_id = PMU_AP_CMD_ID_INIT;
3544         ap_cmd.init.pg_sampling_period_us =
3545                 APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US;
3546
3547         status = gk20a_pmu_ap_send_command(g, &ap_cmd, false);
3548         return status;
3549 }
3550
3551 static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id)
3552 {
3553         int status = 0;
3554         union pmu_ap_cmd ap_cmd;
3555
3556         /* TODO: Probably check if ELPG is ready? */
3557
3558         ap_cmd.init_and_enable_ctrl.cmd_id = PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL;
3559         ap_cmd.init_and_enable_ctrl.ctrl_id = ctrl_id;
3560         ap_cmd.init_and_enable_ctrl.params.min_idle_filter_us =
3561                 APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US;
3562         ap_cmd.init_and_enable_ctrl.params.min_target_saving_us =
3563                 APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US;
3564         ap_cmd.init_and_enable_ctrl.params.power_break_even_us =
3565                 APCTRL_POWER_BREAKEVEN_DEFAULT_US;
3566         ap_cmd.init_and_enable_ctrl.params.cycles_per_sample_max =
3567                 APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT;
3568
3569         switch (ctrl_id) {
3570         case PMU_AP_CTRL_ID_GRAPHICS:
3571                 break;
3572         default:
3573                 break;
3574         }
3575
3576         status = gk20a_pmu_ap_send_command(g, &ap_cmd, true);
3577         return status;
3578 }
3579
3580 #if CONFIG_DEBUG_FS
3581 static int elpg_residency_show(struct seq_file *s, void *data)
3582 {
3583         struct gk20a *g = s->private;
3584         u32 ingating_time = 0;
3585         u32 ungating_time = 0;
3586         u32 gating_cnt;
3587         u64 total_ingating, total_ungating, residency, divisor, dividend;
3588
3589         /* Don't unnecessarily power on the device */
3590         if (g->power_on) {
3591                 gk20a_busy(g->dev);
3592                 gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
3593                         &ungating_time, &gating_cnt);
3594                 gk20a_idle(g->dev);
3595         }
3596         total_ingating = g->pg_ingating_time_us + (u64)ingating_time;
3597         total_ungating = g->pg_ungating_time_us + (u64)ungating_time;
3598         divisor = total_ingating + total_ungating;
3599
3600         /* We compute the residency on a scale of 1000 */
3601         dividend = total_ingating * 1000;
3602
3603         if (divisor)
3604                 residency = div64_u64(dividend, divisor);
3605         else
3606                 residency = 0;
3607
3608         seq_printf(s, "Time in ELPG: %llu us\n"
3609                         "Time out of ELPG: %llu us\n"
3610                         "ELPG residency ratio: %llu\n",
3611                         total_ingating, total_ungating, residency);
3612         return 0;
3613
3614 }
3615
3616 static int elpg_residency_open(struct inode *inode, struct file *file)
3617 {
3618         return single_open(file, elpg_residency_show, inode->i_private);
3619 }
3620
3621 static const struct file_operations elpg_residency_fops = {
3622         .open           = elpg_residency_open,
3623         .read           = seq_read,
3624         .llseek         = seq_lseek,
3625         .release        = single_release,
3626 };
3627
3628 static int elpg_transitions_show(struct seq_file *s, void *data)
3629 {
3630         struct gk20a *g = s->private;
3631         u32 ingating_time, ungating_time, total_gating_cnt;
3632         u32 gating_cnt = 0;
3633
3634         if (g->power_on) {
3635                 gk20a_busy(g->dev);
3636                 gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
3637                         &ungating_time, &gating_cnt);
3638                 gk20a_idle(g->dev);
3639         }
3640         total_gating_cnt = g->pg_gating_cnt + gating_cnt;
3641
3642         seq_printf(s, "%u\n", total_gating_cnt);
3643         return 0;
3644
3645 }
3646
3647 static int elpg_transitions_open(struct inode *inode, struct file *file)
3648 {
3649         return single_open(file, elpg_transitions_show, inode->i_private);
3650 }
3651
3652 static const struct file_operations elpg_transitions_fops = {
3653         .open           = elpg_transitions_open,
3654         .read           = seq_read,
3655         .llseek         = seq_lseek,
3656         .release        = single_release,
3657 };
3658
3659 int gk20a_pmu_debugfs_init(struct platform_device *dev)
3660 {
3661         struct dentry *d;
3662         struct gk20a_platform *platform = platform_get_drvdata(dev);
3663         struct gk20a *g = get_gk20a(dev);
3664
3665         d = debugfs_create_file(
3666                 "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
3667                                                 &elpg_residency_fops);
3668         if (!d)
3669                 goto err_out;
3670
3671         d = debugfs_create_file(
3672                 "elpg_transitions", S_IRUGO, platform->debugfs, g,
3673                                                 &elpg_transitions_fops);
3674         if (!d)
3675                 goto err_out;
3676
3677         return 0;
3678
3679 err_out:
3680         pr_err("%s: Failed to make debugfs node\n", __func__);
3681         debugfs_remove_recursive(platform->debugfs);
3682         return -ENOMEM;
3683 }
3684 #endif