0e5d80af7f079c905879eeaa6c6161ae8baf1d5b
[linux-3.10.git] / drivers / gpu / nvgpu / gk20a / pmu_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/pmu_gk20a.c
3  *
4  * GK20A PMU (aka. gPMU outside gk20a context)
5  *
6  * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/delay.h>        /* for mdelay */
23 #include <linux/firmware.h>
24 #include <linux/clk.h>
25 #include <linux/module.h>
26 #include <linux/debugfs.h>
27 #include <linux/dma-mapping.h>
28
29 #include "gk20a.h"
30 #include "gr_gk20a.h"
31 #include "hw_mc_gk20a.h"
32 #include "hw_pwr_gk20a.h"
33 #include "hw_top_gk20a.h"
34
35 #define GK20A_PMU_UCODE_IMAGE   "gpmu_ucode.bin"
36
37 #define gk20a_dbg_pmu(fmt, arg...) \
38         gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
39
40 static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu);
41 static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
42                 u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt);
43 static void pmu_setup_hw(struct work_struct *work);
44 static void ap_callback_init_and_enable_ctrl(
45                 struct gk20a *g, struct pmu_msg *msg,
46                 void *param, u32 seq_desc, u32 status);
47 static int gk20a_pmu_ap_send_command(struct gk20a *g,
48                         union pmu_ap_cmd *p_ap_cmd, bool b_block);
49
50 static int pmu_init_powergating(struct gk20a *g);
51
52 static u32 pmu_cmdline_size_v0(struct pmu_gk20a *pmu)
53 {
54         return sizeof(struct pmu_cmdline_args_v0);
55 }
56
57 static u32 pmu_cmdline_size_v1(struct pmu_gk20a *pmu)
58 {
59         return sizeof(struct pmu_cmdline_args_v1);
60 }
61
62 static void set_pmu_cmdline_args_cpufreq_v1(struct pmu_gk20a *pmu, u32 freq)
63 {
64         pmu->args_v1.cpu_freq_hz = freq;
65 }
66
67 static void set_pmu_cmdline_args_cpufreq_v0(struct pmu_gk20a *pmu, u32 freq)
68 {
69         pmu->args_v0.cpu_freq_hz = freq;
70 }
71
72 static void *get_pmu_cmdline_args_ptr_v1(struct pmu_gk20a *pmu)
73 {
74         return (void *)(&pmu->args_v1);
75 }
76
77 static void *get_pmu_cmdline_args_ptr_v0(struct pmu_gk20a *pmu)
78 {
79         return (void *)(&pmu->args_v0);
80 }
81
82 static u32 get_pmu_allocation_size_v1(struct pmu_gk20a *pmu)
83 {
84         return sizeof(struct pmu_allocation_v1);
85 }
86
87 static u32 get_pmu_allocation_size_v0(struct pmu_gk20a *pmu)
88 {
89         return sizeof(struct pmu_allocation_v0);
90 }
91
92 static void set_pmu_allocation_ptr_v1(struct pmu_gk20a *pmu,
93         void **pmu_alloc_ptr, void *assign_ptr)
94 {
95         struct pmu_allocation_v1 **pmu_a_ptr =
96                 (struct pmu_allocation_v1 **)pmu_alloc_ptr;
97         *pmu_a_ptr = (struct pmu_allocation_v1 *)assign_ptr;
98 }
99
100 static void set_pmu_allocation_ptr_v0(struct pmu_gk20a *pmu,
101         void **pmu_alloc_ptr, void *assign_ptr)
102 {
103         struct pmu_allocation_v0 **pmu_a_ptr =
104                 (struct pmu_allocation_v0 **)pmu_alloc_ptr;
105         *pmu_a_ptr = (struct pmu_allocation_v0 *)assign_ptr;
106 }
107
108 static void pmu_allocation_set_dmem_size_v1(struct pmu_gk20a *pmu,
109         void *pmu_alloc_ptr, u16 size)
110 {
111         struct pmu_allocation_v1 *pmu_a_ptr =
112                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
113         pmu_a_ptr->alloc.dmem.size = size;
114 }
115
116 static void pmu_allocation_set_dmem_size_v0(struct pmu_gk20a *pmu,
117         void *pmu_alloc_ptr, u16 size)
118 {
119         struct pmu_allocation_v0 *pmu_a_ptr =
120                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
121         pmu_a_ptr->alloc.dmem.size = size;
122 }
123
124 static u16 pmu_allocation_get_dmem_size_v1(struct pmu_gk20a *pmu,
125         void *pmu_alloc_ptr)
126 {
127         struct pmu_allocation_v1 *pmu_a_ptr =
128                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
129         return pmu_a_ptr->alloc.dmem.size;
130 }
131
132 static u16 pmu_allocation_get_dmem_size_v0(struct pmu_gk20a *pmu,
133         void *pmu_alloc_ptr)
134 {
135         struct pmu_allocation_v0 *pmu_a_ptr =
136                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
137         return pmu_a_ptr->alloc.dmem.size;
138 }
139
140 static u32 pmu_allocation_get_dmem_offset_v1(struct pmu_gk20a *pmu,
141         void *pmu_alloc_ptr)
142 {
143         struct pmu_allocation_v1 *pmu_a_ptr =
144                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
145         return pmu_a_ptr->alloc.dmem.offset;
146 }
147
148 static u32 pmu_allocation_get_dmem_offset_v0(struct pmu_gk20a *pmu,
149         void *pmu_alloc_ptr)
150 {
151         struct pmu_allocation_v0 *pmu_a_ptr =
152                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
153         return pmu_a_ptr->alloc.dmem.offset;
154 }
155
156 static u32 *pmu_allocation_get_dmem_offset_addr_v1(struct pmu_gk20a *pmu,
157         void *pmu_alloc_ptr)
158 {
159         struct pmu_allocation_v1 *pmu_a_ptr =
160                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
161         return &pmu_a_ptr->alloc.dmem.offset;
162 }
163
164 static u32 *pmu_allocation_get_dmem_offset_addr_v0(struct pmu_gk20a *pmu,
165         void *pmu_alloc_ptr)
166 {
167         struct pmu_allocation_v0 *pmu_a_ptr =
168                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
169         return &pmu_a_ptr->alloc.dmem.offset;
170 }
171
172 static void pmu_allocation_set_dmem_offset_v1(struct pmu_gk20a *pmu,
173         void *pmu_alloc_ptr, u32 offset)
174 {
175         struct pmu_allocation_v1 *pmu_a_ptr =
176                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
177         pmu_a_ptr->alloc.dmem.offset = offset;
178 }
179
180 static void pmu_allocation_set_dmem_offset_v0(struct pmu_gk20a *pmu,
181         void *pmu_alloc_ptr, u32 offset)
182 {
183         struct pmu_allocation_v0 *pmu_a_ptr =
184                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
185         pmu_a_ptr->alloc.dmem.offset = offset;
186 }
187
188 static void *get_pmu_msg_pmu_init_msg_ptr_v1(struct pmu_init_msg *init)
189 {
190         return (void *)(&(init->pmu_init_v1));
191 }
192
193 static u16 get_pmu_init_msg_pmu_sw_mg_off_v1(union pmu_init_msg_pmu *init_msg)
194 {
195         struct pmu_init_msg_pmu_v1 *init =
196                 (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
197         return init->sw_managed_area_offset;
198 }
199
200 static u16 get_pmu_init_msg_pmu_sw_mg_size_v1(union pmu_init_msg_pmu *init_msg)
201 {
202         struct pmu_init_msg_pmu_v1 *init =
203                 (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
204         return init->sw_managed_area_size;
205 }
206
207 static void *get_pmu_msg_pmu_init_msg_ptr_v0(struct pmu_init_msg *init)
208 {
209         return (void *)(&(init->pmu_init_v0));
210 }
211
212 static u16 get_pmu_init_msg_pmu_sw_mg_off_v0(union pmu_init_msg_pmu *init_msg)
213 {
214         struct pmu_init_msg_pmu_v0 *init =
215                 (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
216         return init->sw_managed_area_offset;
217 }
218
219 static u16 get_pmu_init_msg_pmu_sw_mg_size_v0(union pmu_init_msg_pmu *init_msg)
220 {
221         struct pmu_init_msg_pmu_v0 *init =
222                 (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
223         return init->sw_managed_area_size;
224 }
225
226 static u32 get_pmu_perfmon_cmd_start_size_v1(void)
227 {
228         return sizeof(struct pmu_perfmon_cmd_start_v1);
229 }
230
231 static u32 get_pmu_perfmon_cmd_start_size_v0(void)
232 {
233         return sizeof(struct pmu_perfmon_cmd_start_v0);
234 }
235
236 static int get_perfmon_cmd_start_offsetofvar_v1(
237         enum pmu_perfmon_cmd_start_fields field)
238 {
239         switch (field) {
240         case COUNTER_ALLOC:
241                 return offsetof(struct pmu_perfmon_cmd_start_v1,
242                 counter_alloc);
243         default:
244                 return -EINVAL;
245                 break;
246         }
247         return 0;
248 }
249
250 static int get_perfmon_cmd_start_offsetofvar_v0(
251         enum pmu_perfmon_cmd_start_fields field)
252 {
253         switch (field) {
254         case COUNTER_ALLOC:
255                 return offsetof(struct pmu_perfmon_cmd_start_v0,
256                 counter_alloc);
257         default:
258                 return -EINVAL;
259                 break;
260         }
261         return 0;
262 }
263
264 static u32 get_pmu_perfmon_cmd_init_size_v1(void)
265 {
266         return sizeof(struct pmu_perfmon_cmd_init_v1);
267 }
268
269 static u32 get_pmu_perfmon_cmd_init_size_v0(void)
270 {
271         return sizeof(struct pmu_perfmon_cmd_init_v0);
272 }
273
274 static int get_perfmon_cmd_init_offsetofvar_v1(
275         enum pmu_perfmon_cmd_start_fields field)
276 {
277         switch (field) {
278         case COUNTER_ALLOC:
279                 return offsetof(struct pmu_perfmon_cmd_init_v1,
280                 counter_alloc);
281         default:
282                 return -EINVAL;
283                 break;
284         }
285         return 0;
286 }
287
288 static int get_perfmon_cmd_init_offsetofvar_v0(
289         enum pmu_perfmon_cmd_start_fields field)
290 {
291         switch (field) {
292         case COUNTER_ALLOC:
293                 return offsetof(struct pmu_perfmon_cmd_init_v0,
294                 counter_alloc);
295         default:
296                 return -EINVAL;
297                 break;
298         }
299         return 0;
300 }
301
302 static void perfmon_start_set_cmd_type_v1(struct pmu_perfmon_cmd *pc, u8 value)
303 {
304         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
305         start->cmd_type = value;
306 }
307
308 static void perfmon_start_set_cmd_type_v0(struct pmu_perfmon_cmd *pc, u8 value)
309 {
310         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
311         start->cmd_type = value;
312 }
313
314 static void perfmon_start_set_group_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
315 {
316         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
317         start->group_id = value;
318 }
319
320 static void perfmon_start_set_group_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
321 {
322         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
323         start->group_id = value;
324 }
325
326 static void perfmon_start_set_state_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
327 {
328         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
329         start->state_id = value;
330 }
331
332 static void perfmon_start_set_state_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
333 {
334         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
335         start->state_id = value;
336 }
337
338 static void perfmon_start_set_flags_v1(struct pmu_perfmon_cmd *pc, u8 value)
339 {
340         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
341         start->flags = value;
342 }
343
344 static void perfmon_start_set_flags_v0(struct pmu_perfmon_cmd *pc, u8 value)
345 {
346         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
347         start->flags = value;
348 }
349
350 static u8 perfmon_start_get_flags_v1(struct pmu_perfmon_cmd *pc)
351 {
352         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
353         return start->flags;
354 }
355
356 static u8 perfmon_start_get_flags_v0(struct pmu_perfmon_cmd *pc)
357 {
358         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
359         return start->flags;
360 }
361
362 static void perfmon_cmd_init_set_sample_buffer_v1(struct pmu_perfmon_cmd *pc,
363         u16 value)
364 {
365         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
366         init->sample_buffer = value;
367 }
368
369 static void perfmon_cmd_init_set_sample_buffer_v0(struct pmu_perfmon_cmd *pc,
370         u16 value)
371 {
372         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
373         init->sample_buffer = value;
374 }
375
376 static void perfmon_cmd_init_set_dec_cnt_v1(struct pmu_perfmon_cmd *pc,
377         u8 value)
378 {
379         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
380         init->to_decrease_count = value;
381 }
382
383 static void perfmon_cmd_init_set_dec_cnt_v0(struct pmu_perfmon_cmd *pc,
384         u8 value)
385 {
386         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
387         init->to_decrease_count = value;
388 }
389
390 static void perfmon_cmd_init_set_base_cnt_id_v1(struct pmu_perfmon_cmd *pc,
391         u8 value)
392 {
393         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
394         init->base_counter_id = value;
395 }
396
397 static void perfmon_cmd_init_set_base_cnt_id_v0(struct pmu_perfmon_cmd *pc,
398         u8 value)
399 {
400         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
401         init->base_counter_id = value;
402 }
403
404 static void perfmon_cmd_init_set_samp_period_us_v1(struct pmu_perfmon_cmd *pc,
405         u32 value)
406 {
407         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
408         init->sample_period_us = value;
409 }
410
411 static void perfmon_cmd_init_set_samp_period_us_v0(struct pmu_perfmon_cmd *pc,
412         u32 value)
413 {
414         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
415         init->sample_period_us = value;
416 }
417
418 static void perfmon_cmd_init_set_num_cnt_v1(struct pmu_perfmon_cmd *pc,
419         u8 value)
420 {
421         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
422         init->num_counters = value;
423 }
424
425 static void perfmon_cmd_init_set_num_cnt_v0(struct pmu_perfmon_cmd *pc,
426         u8 value)
427 {
428         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
429         init->num_counters = value;
430 }
431
432 static void perfmon_cmd_init_set_mov_avg_v1(struct pmu_perfmon_cmd *pc,
433         u8 value)
434 {
435         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
436         init->samples_in_moving_avg = value;
437 }
438
439 static void perfmon_cmd_init_set_mov_avg_v0(struct pmu_perfmon_cmd *pc,
440         u8 value)
441 {
442         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
443         init->samples_in_moving_avg = value;
444 }
445
446 static void get_pmu_init_msg_pmu_queue_params_v0(struct pmu_queue *queue,
447         u32 id, void *pmu_init_msg)
448 {
449         struct pmu_init_msg_pmu_v0 *init =
450                 (struct pmu_init_msg_pmu_v0 *)pmu_init_msg;
451         queue->index    = init->queue_info[id].index;
452         queue->offset   = init->queue_info[id].offset;
453         queue->size = init->queue_info[id].size;
454 }
455
456 static void get_pmu_init_msg_pmu_queue_params_v1(struct pmu_queue *queue,
457         u32 id, void *pmu_init_msg)
458 {
459         struct pmu_init_msg_pmu_v1 *init =
460                 (struct pmu_init_msg_pmu_v1 *)pmu_init_msg;
461         queue->index    = init->queue_info[id].index;
462         queue->offset   = init->queue_info[id].offset;
463         queue->size = init->queue_info[id].size;
464 }
465
466 static void *get_pmu_sequence_in_alloc_ptr_v1(struct pmu_sequence *seq)
467 {
468         return (void *)(&seq->in_v1);
469 }
470
471 static void *get_pmu_sequence_in_alloc_ptr_v0(struct pmu_sequence *seq)
472 {
473         return (void *)(&seq->in_v0);
474 }
475
476 static void *get_pmu_sequence_out_alloc_ptr_v1(struct pmu_sequence *seq)
477 {
478         return (void *)(&seq->out_v1);
479 }
480
481 static void *get_pmu_sequence_out_alloc_ptr_v0(struct pmu_sequence *seq)
482 {
483         return (void *)(&seq->out_v0);
484 }
485
486 static int gk20a_init_pmu(struct pmu_gk20a *pmu)
487 {
488         struct gk20a *g = pmu->g;
489         switch (pmu->desc->app_version) {
490         case APP_VERSION_1:
491                 g->ops.pmu_ver.cmd_id_zbc_table_update = 16;
492                 g->ops.pmu_ver.get_pmu_cmdline_args_size =
493                         pmu_cmdline_size_v1;
494                 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
495                         set_pmu_cmdline_args_cpufreq_v1;
496                 g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
497                         get_pmu_cmdline_args_ptr_v1;
498                 g->ops.pmu_ver.get_pmu_allocation_struct_size =
499                         get_pmu_allocation_size_v1;
500                 g->ops.pmu_ver.set_pmu_allocation_ptr =
501                         set_pmu_allocation_ptr_v1;
502                 g->ops.pmu_ver.pmu_allocation_set_dmem_size =
503                         pmu_allocation_set_dmem_size_v1;
504                 g->ops.pmu_ver.pmu_allocation_get_dmem_size =
505                         pmu_allocation_get_dmem_size_v1;
506                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
507                         pmu_allocation_get_dmem_offset_v1;
508                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
509                         pmu_allocation_get_dmem_offset_addr_v1;
510                 g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
511                         pmu_allocation_set_dmem_offset_v1;
512                 g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
513                         get_pmu_init_msg_pmu_queue_params_v1;
514                 g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
515                         get_pmu_msg_pmu_init_msg_ptr_v1;
516                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
517                         get_pmu_init_msg_pmu_sw_mg_off_v1;
518                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
519                         get_pmu_init_msg_pmu_sw_mg_size_v1;
520                 g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
521                         get_pmu_perfmon_cmd_start_size_v1;
522                 g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
523                         get_perfmon_cmd_start_offsetofvar_v1;
524                 g->ops.pmu_ver.perfmon_start_set_cmd_type =
525                         perfmon_start_set_cmd_type_v1;
526                 g->ops.pmu_ver.perfmon_start_set_group_id =
527                         perfmon_start_set_group_id_v1;
528                 g->ops.pmu_ver.perfmon_start_set_state_id =
529                         perfmon_start_set_state_id_v1;
530                 g->ops.pmu_ver.perfmon_start_set_flags =
531                         perfmon_start_set_flags_v1;
532                 g->ops.pmu_ver.perfmon_start_get_flags =
533                         perfmon_start_get_flags_v1;
534                 g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
535                         get_pmu_perfmon_cmd_init_size_v1;
536                 g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
537                         get_perfmon_cmd_init_offsetofvar_v1;
538                 g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
539                         perfmon_cmd_init_set_sample_buffer_v1;
540                 g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
541                         perfmon_cmd_init_set_dec_cnt_v1;
542                 g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
543                         perfmon_cmd_init_set_base_cnt_id_v1;
544                 g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
545                         perfmon_cmd_init_set_samp_period_us_v1;
546                 g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
547                         perfmon_cmd_init_set_num_cnt_v1;
548                 g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
549                         perfmon_cmd_init_set_mov_avg_v1;
550                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
551                         get_pmu_sequence_in_alloc_ptr_v1;
552                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
553                         get_pmu_sequence_out_alloc_ptr_v1;
554                 break;
555         case APP_VERSION_0:
556                 g->ops.pmu_ver.cmd_id_zbc_table_update = 14;
557                 g->ops.pmu_ver.get_pmu_cmdline_args_size =
558                         pmu_cmdline_size_v0;
559                 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
560                         set_pmu_cmdline_args_cpufreq_v0;
561                 g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
562                         get_pmu_cmdline_args_ptr_v0;
563                 g->ops.pmu_ver.get_pmu_allocation_struct_size =
564                         get_pmu_allocation_size_v0;
565                 g->ops.pmu_ver.set_pmu_allocation_ptr =
566                         set_pmu_allocation_ptr_v0;
567                 g->ops.pmu_ver.pmu_allocation_set_dmem_size =
568                         pmu_allocation_set_dmem_size_v0;
569                 g->ops.pmu_ver.pmu_allocation_get_dmem_size =
570                         pmu_allocation_get_dmem_size_v0;
571                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
572                         pmu_allocation_get_dmem_offset_v0;
573                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
574                         pmu_allocation_get_dmem_offset_addr_v0;
575                 g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
576                         pmu_allocation_set_dmem_offset_v0;
577                 g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
578                         get_pmu_init_msg_pmu_queue_params_v0;
579                 g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
580                         get_pmu_msg_pmu_init_msg_ptr_v0;
581                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
582                         get_pmu_init_msg_pmu_sw_mg_off_v0;
583                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
584                         get_pmu_init_msg_pmu_sw_mg_size_v0;
585                 g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
586                         get_pmu_perfmon_cmd_start_size_v0;
587                 g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
588                         get_perfmon_cmd_start_offsetofvar_v0;
589                 g->ops.pmu_ver.perfmon_start_set_cmd_type =
590                         perfmon_start_set_cmd_type_v0;
591                 g->ops.pmu_ver.perfmon_start_set_group_id =
592                         perfmon_start_set_group_id_v0;
593                 g->ops.pmu_ver.perfmon_start_set_state_id =
594                         perfmon_start_set_state_id_v0;
595                 g->ops.pmu_ver.perfmon_start_set_flags =
596                         perfmon_start_set_flags_v0;
597                 g->ops.pmu_ver.perfmon_start_get_flags =
598                         perfmon_start_get_flags_v0;
599                 g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
600                         get_pmu_perfmon_cmd_init_size_v0;
601                 g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
602                         get_perfmon_cmd_init_offsetofvar_v0;
603                 g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
604                         perfmon_cmd_init_set_sample_buffer_v0;
605                 g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
606                         perfmon_cmd_init_set_dec_cnt_v0;
607                 g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
608                         perfmon_cmd_init_set_base_cnt_id_v0;
609                 g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
610                         perfmon_cmd_init_set_samp_period_us_v0;
611                 g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
612                         perfmon_cmd_init_set_num_cnt_v0;
613                 g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
614                         perfmon_cmd_init_set_mov_avg_v0;
615                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
616                         get_pmu_sequence_in_alloc_ptr_v0;
617                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
618                         get_pmu_sequence_out_alloc_ptr_v0;
619                 break;
620         default:
621                 gk20a_err(dev_from_gk20a(pmu->g),
622                 "PMU code version not supported\n");
623                 return -EINVAL;
624                 break;
625         }
626         return 0;
627 }
628
629 static void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
630                 u32 src, u8 *dst, u32 size, u8 port)
631 {
632         struct gk20a *g = pmu->g;
633         u32 i, words, bytes;
634         u32 data, addr_mask;
635         u32 *dst_u32 = (u32*)dst;
636
637         if (size == 0) {
638                 gk20a_err(dev_from_gk20a(g),
639                         "size is zero");
640                 return;
641         }
642
643         if (src & 0x3) {
644                 gk20a_err(dev_from_gk20a(g),
645                         "src (0x%08x) not 4-byte aligned", src);
646                 return;
647         }
648
649         mutex_lock(&pmu->pmu_copy_lock);
650
651         words = size >> 2;
652         bytes = size & 0x3;
653
654         addr_mask = pwr_falcon_dmemc_offs_m() |
655                     pwr_falcon_dmemc_blk_m();
656
657         src &= addr_mask;
658
659         gk20a_writel(g, pwr_falcon_dmemc_r(port),
660                 src | pwr_falcon_dmemc_aincr_f(1));
661
662         for (i = 0; i < words; i++)
663                 dst_u32[i] = gk20a_readl(g, pwr_falcon_dmemd_r(port));
664
665         if (bytes > 0) {
666                 data = gk20a_readl(g, pwr_falcon_dmemd_r(port));
667                 for (i = 0; i < bytes; i++) {
668                         dst[(words << 2) + i] = ((u8 *)&data)[i];
669                 }
670         }
671         mutex_unlock(&pmu->pmu_copy_lock);
672         return;
673 }
674
675 static void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
676                 u32 dst, u8 *src, u32 size, u8 port)
677 {
678         struct gk20a *g = pmu->g;
679         u32 i, words, bytes;
680         u32 data, addr_mask;
681         u32 *src_u32 = (u32*)src;
682
683         if (size == 0) {
684                 gk20a_err(dev_from_gk20a(g),
685                         "size is zero");
686                 return;
687         }
688
689         if (dst & 0x3) {
690                 gk20a_err(dev_from_gk20a(g),
691                         "dst (0x%08x) not 4-byte aligned", dst);
692                 return;
693         }
694
695         mutex_lock(&pmu->pmu_copy_lock);
696
697         words = size >> 2;
698         bytes = size & 0x3;
699
700         addr_mask = pwr_falcon_dmemc_offs_m() |
701                     pwr_falcon_dmemc_blk_m();
702
703         dst &= addr_mask;
704
705         gk20a_writel(g, pwr_falcon_dmemc_r(port),
706                 dst | pwr_falcon_dmemc_aincw_f(1));
707
708         for (i = 0; i < words; i++)
709                 gk20a_writel(g, pwr_falcon_dmemd_r(port), src_u32[i]);
710
711         if (bytes > 0) {
712                 data = 0;
713                 for (i = 0; i < bytes; i++)
714                         ((u8 *)&data)[i] = src[(words << 2) + i];
715                 gk20a_writel(g, pwr_falcon_dmemd_r(port), data);
716         }
717
718         data = gk20a_readl(g, pwr_falcon_dmemc_r(port)) & addr_mask;
719         size = ALIGN(size, 4);
720         if (data != dst + size) {
721                 gk20a_err(dev_from_gk20a(g),
722                         "copy failed. bytes written %d, expected %d",
723                         data - dst, size);
724         }
725         mutex_unlock(&pmu->pmu_copy_lock);
726         return;
727 }
728
729 static int pmu_idle(struct pmu_gk20a *pmu)
730 {
731         struct gk20a *g = pmu->g;
732         unsigned long end_jiffies = jiffies +
733                 msecs_to_jiffies(2000);
734         u32 idle_stat;
735
736         /* wait for pmu idle */
737         do {
738                 idle_stat = gk20a_readl(g, pwr_falcon_idlestate_r());
739
740                 if (pwr_falcon_idlestate_falcon_busy_v(idle_stat) == 0 &&
741                     pwr_falcon_idlestate_ext_busy_v(idle_stat) == 0) {
742                         break;
743                 }
744
745                 if (time_after_eq(jiffies, end_jiffies)) {
746                         gk20a_err(dev_from_gk20a(g),
747                                 "timeout waiting pmu idle : 0x%08x",
748                                 idle_stat);
749                         return -EBUSY;
750                 }
751                 usleep_range(100, 200);
752         } while (1);
753
754         gk20a_dbg_fn("done");
755         return 0;
756 }
757
758 static void pmu_enable_irq(struct pmu_gk20a *pmu, bool enable)
759 {
760         struct gk20a *g = pmu->g;
761
762         gk20a_dbg_fn("");
763
764         gk20a_writel(g, mc_intr_mask_0_r(),
765                 gk20a_readl(g, mc_intr_mask_0_r()) &
766                 ~mc_intr_mask_0_pmu_enabled_f());
767         gk20a_writel(g, mc_intr_mask_1_r(),
768                 gk20a_readl(g, mc_intr_mask_1_r()) &
769                 ~mc_intr_mask_1_pmu_enabled_f());
770
771         gk20a_writel(g, pwr_falcon_irqmclr_r(),
772                 pwr_falcon_irqmclr_gptmr_f(1)  |
773                 pwr_falcon_irqmclr_wdtmr_f(1)  |
774                 pwr_falcon_irqmclr_mthd_f(1)   |
775                 pwr_falcon_irqmclr_ctxsw_f(1)  |
776                 pwr_falcon_irqmclr_halt_f(1)   |
777                 pwr_falcon_irqmclr_exterr_f(1) |
778                 pwr_falcon_irqmclr_swgen0_f(1) |
779                 pwr_falcon_irqmclr_swgen1_f(1) |
780                 pwr_falcon_irqmclr_ext_f(0xff));
781
782         if (enable) {
783                 /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 */
784                 gk20a_writel(g, pwr_falcon_irqdest_r(),
785                         pwr_falcon_irqdest_host_gptmr_f(0)    |
786                         pwr_falcon_irqdest_host_wdtmr_f(1)    |
787                         pwr_falcon_irqdest_host_mthd_f(0)     |
788                         pwr_falcon_irqdest_host_ctxsw_f(0)    |
789                         pwr_falcon_irqdest_host_halt_f(1)     |
790                         pwr_falcon_irqdest_host_exterr_f(0)   |
791                         pwr_falcon_irqdest_host_swgen0_f(1)   |
792                         pwr_falcon_irqdest_host_swgen1_f(0)   |
793                         pwr_falcon_irqdest_host_ext_f(0xff)   |
794                         pwr_falcon_irqdest_target_gptmr_f(1)  |
795                         pwr_falcon_irqdest_target_wdtmr_f(0)  |
796                         pwr_falcon_irqdest_target_mthd_f(0)   |
797                         pwr_falcon_irqdest_target_ctxsw_f(0)  |
798                         pwr_falcon_irqdest_target_halt_f(0)   |
799                         pwr_falcon_irqdest_target_exterr_f(0) |
800                         pwr_falcon_irqdest_target_swgen0_f(0) |
801                         pwr_falcon_irqdest_target_swgen1_f(0) |
802                         pwr_falcon_irqdest_target_ext_f(0xff));
803
804                 /* 0=disable, 1=enable */
805                 gk20a_writel(g, pwr_falcon_irqmset_r(),
806                         pwr_falcon_irqmset_gptmr_f(1)  |
807                         pwr_falcon_irqmset_wdtmr_f(1)  |
808                         pwr_falcon_irqmset_mthd_f(0)   |
809                         pwr_falcon_irqmset_ctxsw_f(0)  |
810                         pwr_falcon_irqmset_halt_f(1)   |
811                         pwr_falcon_irqmset_exterr_f(1) |
812                         pwr_falcon_irqmset_swgen0_f(1) |
813                         pwr_falcon_irqmset_swgen1_f(1));
814
815                 gk20a_writel(g, mc_intr_mask_0_r(),
816                         gk20a_readl(g, mc_intr_mask_0_r()) |
817                         mc_intr_mask_0_pmu_enabled_f());
818         }
819
820         gk20a_dbg_fn("done");
821 }
822
823 static int pmu_enable_hw(struct pmu_gk20a *pmu, bool enable)
824 {
825         struct gk20a *g = pmu->g;
826
827         gk20a_dbg_fn("");
828
829         if (enable) {
830                 int retries = GR_IDLE_CHECK_MAX / GR_IDLE_CHECK_DEFAULT;
831                 gk20a_enable(g, mc_enable_pwr_enabled_f());
832
833                 do {
834                         u32 w = gk20a_readl(g, pwr_falcon_dmactl_r()) &
835                                 (pwr_falcon_dmactl_dmem_scrubbing_m() |
836                                  pwr_falcon_dmactl_imem_scrubbing_m());
837
838                         if (!w) {
839                                 gk20a_dbg_fn("done");
840                                 return 0;
841                         }
842                         udelay(GR_IDLE_CHECK_DEFAULT);
843                 } while (--retries || !tegra_platform_is_silicon());
844
845                 gk20a_disable(g, mc_enable_pwr_enabled_f());
846                 gk20a_err(dev_from_gk20a(g), "Falcon mem scrubbing timeout");
847
848                 return -ETIMEDOUT;
849         } else {
850                 gk20a_disable(g, mc_enable_pwr_enabled_f());
851                 return 0;
852         }
853 }
854
855 static int pmu_enable(struct pmu_gk20a *pmu, bool enable)
856 {
857         struct gk20a *g = pmu->g;
858         u32 pmc_enable;
859         int err;
860
861         gk20a_dbg_fn("");
862
863         if (!enable) {
864                 pmc_enable = gk20a_readl(g, mc_enable_r());
865                 if (mc_enable_pwr_v(pmc_enable) !=
866                     mc_enable_pwr_disabled_v()) {
867
868                         pmu_enable_irq(pmu, false);
869                         pmu_enable_hw(pmu, false);
870                 }
871         } else {
872                 err = pmu_enable_hw(pmu, true);
873                 if (err)
874                         return err;
875
876                 /* TBD: post reset */
877
878                 err = pmu_idle(pmu);
879                 if (err)
880                         return err;
881
882                 pmu_enable_irq(pmu, true);
883         }
884
885         gk20a_dbg_fn("done");
886         return 0;
887 }
888
889 static int pmu_reset(struct pmu_gk20a *pmu)
890 {
891         int err;
892
893         err = pmu_idle(pmu);
894         if (err)
895                 return err;
896
897         /* TBD: release pmu hw mutex */
898
899         err = pmu_enable(pmu, false);
900         if (err)
901                 return err;
902
903         /* TBD: cancel all sequences */
904         /* TBD: init all sequences and state tables */
905         /* TBD: restore pre-init message handler */
906
907         err = pmu_enable(pmu, true);
908         if (err)
909                 return err;
910
911         return 0;
912 }
913
914 static int pmu_bootstrap(struct pmu_gk20a *pmu)
915 {
916         struct gk20a *g = pmu->g;
917         struct gk20a_platform *platform = platform_get_drvdata(g->dev);
918         struct mm_gk20a *mm = &g->mm;
919         struct pmu_ucode_desc *desc = pmu->desc;
920         u64 addr_code, addr_data, addr_load;
921         u32 i, blocks, addr_args;
922
923         gk20a_dbg_fn("");
924
925         gk20a_writel(g, pwr_falcon_itfen_r(),
926                 gk20a_readl(g, pwr_falcon_itfen_r()) |
927                 pwr_falcon_itfen_ctxen_enable_f());
928         gk20a_writel(g, pwr_pmu_new_instblk_r(),
929                 pwr_pmu_new_instblk_ptr_f(
930                         mm->pmu.inst_block.cpu_pa >> 12) |
931                 pwr_pmu_new_instblk_valid_f(1) |
932                 pwr_pmu_new_instblk_target_sys_coh_f());
933
934         /* TBD: load all other surfaces */
935
936         g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
937                 clk_get_rate(platform->clk[1]));
938
939         addr_args = (pwr_falcon_hwcfg_dmem_size_v(
940                 gk20a_readl(g, pwr_falcon_hwcfg_r()))
941                         << GK20A_PMU_DMEM_BLKSIZE2) -
942                 g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
943
944         pmu_copy_to_dmem(pmu, addr_args,
945                         (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
946                         g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
947
948         gk20a_writel(g, pwr_falcon_dmemc_r(0),
949                 pwr_falcon_dmemc_offs_f(0) |
950                 pwr_falcon_dmemc_blk_f(0)  |
951                 pwr_falcon_dmemc_aincw_f(1));
952
953         addr_code = u64_lo32((pmu->ucode.pmu_va +
954                         desc->app_start_offset +
955                         desc->app_resident_code_offset) >> 8) ;
956         addr_data = u64_lo32((pmu->ucode.pmu_va +
957                         desc->app_start_offset +
958                         desc->app_resident_data_offset) >> 8);
959         addr_load = u64_lo32((pmu->ucode.pmu_va +
960                         desc->bootloader_start_offset) >> 8);
961
962         gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE);
963         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
964         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_size);
965         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_code_size);
966         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_imem_entry);
967         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_data);
968         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size);
969         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
970         gk20a_writel(g, pwr_falcon_dmemd_r(0), 0x1);
971         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_args);
972
973         gk20a_writel(g, pwr_falcon_dmatrfbase_r(),
974                 addr_load - (desc->bootloader_imem_offset >> 8));
975
976         blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8;
977
978         for (i = 0; i < blocks; i++) {
979                 gk20a_writel(g, pwr_falcon_dmatrfmoffs_r(),
980                         desc->bootloader_imem_offset + (i << 8));
981                 gk20a_writel(g, pwr_falcon_dmatrffboffs_r(),
982                         desc->bootloader_imem_offset + (i << 8));
983                 gk20a_writel(g, pwr_falcon_dmatrfcmd_r(),
984                         pwr_falcon_dmatrfcmd_imem_f(1)  |
985                         pwr_falcon_dmatrfcmd_write_f(0) |
986                         pwr_falcon_dmatrfcmd_size_f(6)  |
987                         pwr_falcon_dmatrfcmd_ctxdma_f(GK20A_PMU_DMAIDX_UCODE));
988         }
989
990         gk20a_writel(g, pwr_falcon_bootvec_r(),
991                 pwr_falcon_bootvec_vec_f(desc->bootloader_entry_point));
992
993         gk20a_writel(g, pwr_falcon_cpuctl_r(),
994                 pwr_falcon_cpuctl_startcpu_f(1));
995
996         gk20a_writel(g, pwr_falcon_os_r(), desc->app_version);
997
998         return 0;
999 }
1000
1001 static void pmu_seq_init(struct pmu_gk20a *pmu)
1002 {
1003         u32 i;
1004
1005         memset(pmu->seq, 0,
1006                 sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES);
1007         memset(pmu->pmu_seq_tbl, 0,
1008                 sizeof(pmu->pmu_seq_tbl));
1009
1010         for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++)
1011                 pmu->seq[i].id = i;
1012 }
1013
1014 static int pmu_seq_acquire(struct pmu_gk20a *pmu,
1015                         struct pmu_sequence **pseq)
1016 {
1017         struct gk20a *g = pmu->g;
1018         struct pmu_sequence *seq;
1019         u32 index;
1020
1021         mutex_lock(&pmu->pmu_seq_lock);
1022         index = find_first_zero_bit(pmu->pmu_seq_tbl,
1023                                 sizeof(pmu->pmu_seq_tbl));
1024         if (index >= sizeof(pmu->pmu_seq_tbl)) {
1025                 gk20a_err(dev_from_gk20a(g),
1026                         "no free sequence available");
1027                 mutex_unlock(&pmu->pmu_seq_lock);
1028                 return -EAGAIN;
1029         }
1030         set_bit(index, pmu->pmu_seq_tbl);
1031         mutex_unlock(&pmu->pmu_seq_lock);
1032
1033         seq = &pmu->seq[index];
1034         seq->state = PMU_SEQ_STATE_PENDING;
1035
1036         *pseq = seq;
1037         return 0;
1038 }
1039
1040 static void pmu_seq_release(struct pmu_gk20a *pmu,
1041                         struct pmu_sequence *seq)
1042 {
1043         struct gk20a *g = pmu->g;
1044         seq->state      = PMU_SEQ_STATE_FREE;
1045         seq->desc       = PMU_INVALID_SEQ_DESC;
1046         seq->callback   = NULL;
1047         seq->cb_params  = NULL;
1048         seq->msg        = NULL;
1049         seq->out_payload = NULL;
1050         g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
1051                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr(seq), 0);
1052         g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
1053                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr(seq), 0);
1054
1055         clear_bit(seq->id, pmu->pmu_seq_tbl);
1056 }
1057
1058 static int pmu_queue_init(struct pmu_gk20a *pmu,
1059                 u32 id, union pmu_init_msg_pmu *init)
1060 {
1061         struct gk20a *g = pmu->g;
1062         struct pmu_queue *queue = &pmu->queue[id];
1063         queue->id       = id;
1064         g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params(queue, id, init);
1065
1066         queue->mutex_id = id;
1067         mutex_init(&queue->mutex);
1068
1069         gk20a_dbg_pmu("queue %d: index %d, offset 0x%08x, size 0x%08x",
1070                 id, queue->index, queue->offset, queue->size);
1071
1072         return 0;
1073 }
1074
1075 static int pmu_queue_head(struct pmu_gk20a *pmu, struct pmu_queue *queue,
1076                         u32 *head, bool set)
1077 {
1078         struct gk20a *g = pmu->g;
1079
1080         BUG_ON(!head);
1081
1082         if (PMU_IS_COMMAND_QUEUE(queue->id)) {
1083
1084                 if (queue->index >= pwr_pmu_queue_head__size_1_v())
1085                         return -EINVAL;
1086
1087                 if (!set)
1088                         *head = pwr_pmu_queue_head_address_v(
1089                                 gk20a_readl(g,
1090                                         pwr_pmu_queue_head_r(queue->index)));
1091                 else
1092                         gk20a_writel(g,
1093                                 pwr_pmu_queue_head_r(queue->index),
1094                                 pwr_pmu_queue_head_address_f(*head));
1095         } else {
1096                 if (!set)
1097                         *head = pwr_pmu_msgq_head_val_v(
1098                                 gk20a_readl(g, pwr_pmu_msgq_head_r()));
1099                 else
1100                         gk20a_writel(g,
1101                                 pwr_pmu_msgq_head_r(),
1102                                 pwr_pmu_msgq_head_val_f(*head));
1103         }
1104
1105         return 0;
1106 }
1107
1108 static int pmu_queue_tail(struct pmu_gk20a *pmu, struct pmu_queue *queue,
1109                         u32 *tail, bool set)
1110 {
1111         struct gk20a *g = pmu->g;
1112
1113         BUG_ON(!tail);
1114
1115         if (PMU_IS_COMMAND_QUEUE(queue->id)) {
1116
1117                 if (queue->index >= pwr_pmu_queue_tail__size_1_v())
1118                         return -EINVAL;
1119
1120                 if (!set)
1121                         *tail = pwr_pmu_queue_tail_address_v(
1122                                 gk20a_readl(g,
1123                                         pwr_pmu_queue_tail_r(queue->index)));
1124                 else
1125                         gk20a_writel(g,
1126                                 pwr_pmu_queue_tail_r(queue->index),
1127                                 pwr_pmu_queue_tail_address_f(*tail));
1128         } else {
1129                 if (!set)
1130                         *tail = pwr_pmu_msgq_tail_val_v(
1131                                 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
1132                 else
1133                         gk20a_writel(g,
1134                                 pwr_pmu_msgq_tail_r(),
1135                                 pwr_pmu_msgq_tail_val_f(*tail));
1136         }
1137
1138         return 0;
1139 }
1140
1141 static inline void pmu_queue_read(struct pmu_gk20a *pmu,
1142                         u32 offset, u8 *dst, u32 size)
1143 {
1144         pmu_copy_from_dmem(pmu, offset, dst, size, 0);
1145 }
1146
1147 static inline void pmu_queue_write(struct pmu_gk20a *pmu,
1148                         u32 offset, u8 *src, u32 size)
1149 {
1150         pmu_copy_to_dmem(pmu, offset, src, size, 0);
1151 }
1152
1153 int pmu_mutex_acquire(struct pmu_gk20a *pmu, u32 id, u32 *token)
1154 {
1155         struct gk20a *g = pmu->g;
1156         struct pmu_mutex *mutex;
1157         u32 data, owner, max_retry;
1158
1159         if (!pmu->initialized)
1160                 return -EINVAL;
1161
1162         BUG_ON(!token);
1163         BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
1164         BUG_ON(id > pmu->mutex_cnt);
1165
1166         mutex = &pmu->mutex[id];
1167
1168         owner = pwr_pmu_mutex_value_v(
1169                 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1170
1171         if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) {
1172                 BUG_ON(mutex->ref_cnt == 0);
1173                 gk20a_dbg_pmu("already acquired by owner : 0x%08x", *token);
1174                 mutex->ref_cnt++;
1175                 return 0;
1176         }
1177
1178         max_retry = 40;
1179         do {
1180                 data = pwr_pmu_mutex_id_value_v(
1181                         gk20a_readl(g, pwr_pmu_mutex_id_r()));
1182                 if (data == pwr_pmu_mutex_id_value_init_v() ||
1183                     data == pwr_pmu_mutex_id_value_not_avail_v()) {
1184                         gk20a_warn(dev_from_gk20a(g),
1185                                 "fail to generate mutex token: val 0x%08x",
1186                                 owner);
1187                         usleep_range(20, 40);
1188                         continue;
1189                 }
1190
1191                 owner = data;
1192                 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
1193                         pwr_pmu_mutex_value_f(owner));
1194
1195                 data = pwr_pmu_mutex_value_v(
1196                         gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1197
1198                 if (owner == data) {
1199                         mutex->ref_cnt = 1;
1200                         gk20a_dbg_pmu("mutex acquired: id=%d, token=0x%x",
1201                                 mutex->index, *token);
1202                         *token = owner;
1203                         return 0;
1204                 } else {
1205                         gk20a_dbg_info("fail to acquire mutex idx=0x%08x",
1206                                 mutex->index);
1207
1208                         data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
1209                         data = set_field(data,
1210                                 pwr_pmu_mutex_id_release_value_m(),
1211                                 pwr_pmu_mutex_id_release_value_f(owner));
1212                         gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
1213
1214                         usleep_range(20, 40);
1215                         continue;
1216                 }
1217         } while (max_retry-- > 0);
1218
1219         return -EBUSY;
1220 }
1221
1222 int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token)
1223 {
1224         struct gk20a *g = pmu->g;
1225         struct pmu_mutex *mutex;
1226         u32 owner, data;
1227
1228         if (!pmu->initialized)
1229                 return -EINVAL;
1230
1231         BUG_ON(!token);
1232         BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
1233         BUG_ON(id > pmu->mutex_cnt);
1234
1235         mutex = &pmu->mutex[id];
1236
1237         owner = pwr_pmu_mutex_value_v(
1238                 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1239
1240         if (*token != owner) {
1241                 gk20a_err(dev_from_gk20a(g),
1242                         "requester 0x%08x NOT match owner 0x%08x",
1243                         *token, owner);
1244                 return -EINVAL;
1245         }
1246
1247         if (--mutex->ref_cnt == 0) {
1248                 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
1249                         pwr_pmu_mutex_value_initial_lock_f());
1250
1251                 data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
1252                 data = set_field(data, pwr_pmu_mutex_id_release_value_m(),
1253                         pwr_pmu_mutex_id_release_value_f(owner));
1254                 gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
1255
1256                 gk20a_dbg_pmu("mutex released: id=%d, token=0x%x",
1257                         mutex->index, *token);
1258         }
1259
1260         return 0;
1261 }
1262
1263 static int pmu_queue_lock(struct pmu_gk20a *pmu,
1264                         struct pmu_queue *queue)
1265 {
1266         int err;
1267
1268         if (PMU_IS_MESSAGE_QUEUE(queue->id))
1269                 return 0;
1270
1271         if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
1272                 mutex_lock(&queue->mutex);
1273                 return 0;
1274         }
1275
1276         err = pmu_mutex_acquire(pmu, queue->mutex_id, &queue->mutex_lock);
1277         return err;
1278 }
1279
1280 static int pmu_queue_unlock(struct pmu_gk20a *pmu,
1281                         struct pmu_queue *queue)
1282 {
1283         int err;
1284
1285         if (PMU_IS_MESSAGE_QUEUE(queue->id))
1286                 return 0;
1287
1288         if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
1289                 mutex_unlock(&queue->mutex);
1290                 return 0;
1291         }
1292
1293         err = pmu_mutex_release(pmu, queue->mutex_id, &queue->mutex_lock);
1294         return err;
1295 }
1296
1297 /* called by pmu_read_message, no lock */
1298 static bool pmu_queue_is_empty(struct pmu_gk20a *pmu,
1299                         struct pmu_queue *queue)
1300 {
1301         u32 head, tail;
1302
1303         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1304         if (queue->opened && queue->oflag == OFLAG_READ)
1305                 tail = queue->position;
1306         else
1307                 pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
1308
1309         return head == tail;
1310 }
1311
1312 static bool pmu_queue_has_room(struct pmu_gk20a *pmu,
1313                         struct pmu_queue *queue, u32 size, bool *need_rewind)
1314 {
1315         u32 head, tail, free;
1316         bool rewind = false;
1317
1318         size = ALIGN(size, QUEUE_ALIGNMENT);
1319
1320         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1321         pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
1322
1323         if (head >= tail) {
1324                 free = queue->offset + queue->size - head;
1325                 free -= PMU_CMD_HDR_SIZE;
1326
1327                 if (size > free) {
1328                         rewind = true;
1329                         head = queue->offset;
1330                 }
1331         }
1332
1333         if (head < tail)
1334                 free = tail - head - 1;
1335
1336         if (need_rewind)
1337                 *need_rewind = rewind;
1338
1339         return size <= free;
1340 }
1341
1342 static int pmu_queue_push(struct pmu_gk20a *pmu,
1343                         struct pmu_queue *queue, void *data, u32 size)
1344 {
1345         gk20a_dbg_fn("");
1346
1347         if (!queue->opened && queue->oflag == OFLAG_WRITE){
1348                 gk20a_err(dev_from_gk20a(pmu->g),
1349                         "queue not opened for write");
1350                 return -EINVAL;
1351         }
1352
1353         pmu_queue_write(pmu, queue->position, data, size);
1354         queue->position += ALIGN(size, QUEUE_ALIGNMENT);
1355         return 0;
1356 }
1357
1358 static int pmu_queue_pop(struct pmu_gk20a *pmu,
1359                         struct pmu_queue *queue, void *data, u32 size,
1360                         u32 *bytes_read)
1361 {
1362         u32 head, tail, used;
1363
1364         *bytes_read = 0;
1365
1366         if (!queue->opened && queue->oflag == OFLAG_READ){
1367                 gk20a_err(dev_from_gk20a(pmu->g),
1368                         "queue not opened for read");
1369                 return -EINVAL;
1370         }
1371
1372         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1373         tail = queue->position;
1374
1375         if (head == tail)
1376                 return 0;
1377
1378         if (head > tail)
1379                 used = head - tail;
1380         else
1381                 used = queue->offset + queue->size - tail;
1382
1383         if (size > used) {
1384                 gk20a_warn(dev_from_gk20a(pmu->g),
1385                         "queue size smaller than request read");
1386                 size = used;
1387         }
1388
1389         pmu_queue_read(pmu, tail, data, size);
1390         queue->position += ALIGN(size, QUEUE_ALIGNMENT);
1391         *bytes_read = size;
1392         return 0;
1393 }
1394
1395 static void pmu_queue_rewind(struct pmu_gk20a *pmu,
1396                         struct pmu_queue *queue)
1397 {
1398         struct pmu_cmd cmd;
1399
1400         gk20a_dbg_fn("");
1401
1402         if (!queue->opened) {
1403                 gk20a_err(dev_from_gk20a(pmu->g),
1404                         "queue not opened");
1405                 return;
1406         }
1407
1408         if (queue->oflag == OFLAG_WRITE) {
1409                 cmd.hdr.unit_id = PMU_UNIT_REWIND;
1410                 cmd.hdr.size = PMU_CMD_HDR_SIZE;
1411                 pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size);
1412                 gk20a_dbg_pmu("queue %d rewinded", queue->id);
1413         }
1414
1415         queue->position = queue->offset;
1416         return;
1417 }
1418
1419 /* open for read and lock the queue */
1420 static int pmu_queue_open_read(struct pmu_gk20a *pmu,
1421                         struct pmu_queue *queue)
1422 {
1423         int err;
1424
1425         err = pmu_queue_lock(pmu, queue);
1426         if (err)
1427                 return err;
1428
1429         if (queue->opened)
1430                 BUG();
1431
1432         pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET);
1433         queue->oflag = OFLAG_READ;
1434         queue->opened = true;
1435
1436         return 0;
1437 }
1438
1439 /* open for write and lock the queue
1440    make sure there's enough free space for the write */
1441 static int pmu_queue_open_write(struct pmu_gk20a *pmu,
1442                         struct pmu_queue *queue, u32 size)
1443 {
1444         bool rewind = false;
1445         int err;
1446
1447         err = pmu_queue_lock(pmu, queue);
1448         if (err)
1449                 return err;
1450
1451         if (queue->opened)
1452                 BUG();
1453
1454         if (!pmu_queue_has_room(pmu, queue, size, &rewind)) {
1455                 gk20a_err(dev_from_gk20a(pmu->g), "queue full");
1456                 return -EAGAIN;
1457         }
1458
1459         pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET);
1460         queue->oflag = OFLAG_WRITE;
1461         queue->opened = true;
1462
1463         if (rewind)
1464                 pmu_queue_rewind(pmu, queue);
1465
1466         return 0;
1467 }
1468
1469 /* close and unlock the queue */
1470 static int pmu_queue_close(struct pmu_gk20a *pmu,
1471                         struct pmu_queue *queue, bool commit)
1472 {
1473         if (!queue->opened)
1474                 return 0;
1475
1476         if (commit) {
1477                 if (queue->oflag == OFLAG_READ) {
1478                         pmu_queue_tail(pmu, queue,
1479                                 &queue->position, QUEUE_SET);
1480                 }
1481                 else {
1482                         pmu_queue_head(pmu, queue,
1483                                 &queue->position, QUEUE_SET);
1484                 }
1485         }
1486
1487         queue->opened = false;
1488
1489         pmu_queue_unlock(pmu, queue);
1490
1491         return 0;
1492 }
1493
1494 void gk20a_remove_pmu_support(struct pmu_gk20a *pmu)
1495 {
1496         gk20a_dbg_fn("");
1497
1498         gk20a_allocator_destroy(&pmu->dmem);
1499 }
1500
1501 int gk20a_init_pmu_reset_enable_hw(struct gk20a *g)
1502 {
1503         struct pmu_gk20a *pmu = &g->pmu;
1504
1505         gk20a_dbg_fn("");
1506
1507         pmu_enable_hw(pmu, true);
1508
1509         return 0;
1510 }
1511
1512 int gk20a_init_pmu_setup_sw(struct gk20a *g)
1513 {
1514         struct pmu_gk20a *pmu = &g->pmu;
1515         struct mm_gk20a *mm = &g->mm;
1516         struct vm_gk20a *vm = &mm->pmu.vm;
1517         struct device *d = dev_from_gk20a(g);
1518         int i, err = 0;
1519         u8 *ptr;
1520         void *ucode_ptr;
1521         struct sg_table *sgt_pmu_ucode;
1522         struct sg_table *sgt_seq_buf;
1523         DEFINE_DMA_ATTRS(attrs);
1524         dma_addr_t iova;
1525
1526         gk20a_dbg_fn("");
1527
1528         /* start with elpg disabled until first enable call */
1529         mutex_init(&pmu->elpg_mutex);
1530         pmu->elpg_refcnt = 0;
1531
1532         if (pmu->sw_ready) {
1533                 for (i = 0; i < pmu->mutex_cnt; i++) {
1534                         pmu->mutex[i].id    = i;
1535                         pmu->mutex[i].index = i;
1536                 }
1537                 pmu_seq_init(pmu);
1538
1539                 gk20a_dbg_fn("skip init");
1540                 goto skip_init;
1541         }
1542
1543         /* no infoRom script from vbios? */
1544
1545         /* TBD: sysmon subtask */
1546
1547         pmu->mutex_cnt = pwr_pmu_mutex__size_1_v();
1548         pmu->mutex = kzalloc(pmu->mutex_cnt *
1549                 sizeof(struct pmu_mutex), GFP_KERNEL);
1550         if (!pmu->mutex) {
1551                 err = -ENOMEM;
1552                 goto err;
1553         }
1554
1555         for (i = 0; i < pmu->mutex_cnt; i++) {
1556                 pmu->mutex[i].id    = i;
1557                 pmu->mutex[i].index = i;
1558         }
1559
1560         pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES *
1561                 sizeof(struct pmu_sequence), GFP_KERNEL);
1562         if (!pmu->seq) {
1563                 err = -ENOMEM;
1564                 goto err_free_mutex;
1565         }
1566
1567         pmu_seq_init(pmu);
1568
1569         if (!g->pmu_fw) {
1570                 g->pmu_fw = gk20a_request_firmware(g, GK20A_PMU_UCODE_IMAGE);
1571                 if (!g->pmu_fw) {
1572                         gk20a_err(d, "failed to load pmu ucode!!");
1573                         err = -ENOENT;
1574                         goto err_free_seq;
1575                 }
1576         }
1577
1578         gk20a_dbg_fn("firmware loaded");
1579
1580         pmu->desc = (struct pmu_ucode_desc *)g->pmu_fw->data;
1581         pmu->ucode_image = (u32 *)((u8 *)pmu->desc +
1582                         pmu->desc->descriptor_size);
1583
1584         INIT_WORK(&pmu->pg_init, pmu_setup_hw);
1585
1586         gk20a_init_pmu_vm(mm);
1587
1588         dma_set_attr(DMA_ATTR_READ_ONLY, &attrs);
1589         pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
1590                                         &iova,
1591                                         GFP_KERNEL,
1592                                         &attrs);
1593         if (!pmu->ucode.cpuva) {
1594                 gk20a_err(d, "failed to allocate memory\n");
1595                 err = -ENOMEM;
1596                 goto err_release_fw;
1597         }
1598
1599         pmu->ucode.iova = iova;
1600         pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
1601                                         &iova,
1602                                         GFP_KERNEL);
1603         if (!pmu->seq_buf.cpuva) {
1604                 gk20a_err(d, "failed to allocate memory\n");
1605                 err = -ENOMEM;
1606                 goto err_free_pmu_ucode;
1607         }
1608
1609         pmu->seq_buf.iova = iova;
1610
1611         err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
1612                                 pmu->ucode.cpuva,
1613                                 pmu->ucode.iova,
1614                                 GK20A_PMU_UCODE_SIZE_MAX);
1615         if (err) {
1616                 gk20a_err(d, "failed to allocate sg table\n");
1617                 goto err_free_seq_buf;
1618         }
1619
1620         pmu->ucode.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode,
1621                                         GK20A_PMU_UCODE_SIZE_MAX,
1622                                         0, /* flags */
1623                                         gk20a_mem_flag_read_only);
1624         if (!pmu->ucode.pmu_va) {
1625                 gk20a_err(d, "failed to map pmu ucode memory!!");
1626                 goto err_free_ucode_sgt;
1627         }
1628
1629         err = gk20a_get_sgtable(d, &sgt_seq_buf,
1630                                 pmu->seq_buf.cpuva,
1631                                 pmu->seq_buf.iova,
1632                                 GK20A_PMU_SEQ_BUF_SIZE);
1633         if (err) {
1634                 gk20a_err(d, "failed to allocate sg table\n");
1635                 goto err_unmap_ucode;
1636         }
1637
1638         pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf,
1639                                         GK20A_PMU_SEQ_BUF_SIZE,
1640                                         0, /* flags */
1641                                         gk20a_mem_flag_none);
1642         if (!pmu->seq_buf.pmu_va) {
1643                 gk20a_err(d, "failed to map pmu ucode memory!!");
1644                 goto err_free_seq_buf_sgt;
1645         }
1646
1647         ptr = (u8 *)pmu->seq_buf.cpuva;
1648         if (!ptr) {
1649                 gk20a_err(d, "failed to map cpu ptr for zbc buffer");
1650                 goto err_unmap_seq_buf;
1651         }
1652
1653         /* TBD: remove this if ZBC save/restore is handled by PMU
1654          * end an empty ZBC sequence for now */
1655         ptr[0] = 0x16; /* opcode EXIT */
1656         ptr[1] = 0; ptr[2] = 1; ptr[3] = 0;
1657         ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0;
1658
1659         pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;
1660
1661         ucode_ptr = pmu->ucode.cpuva;
1662
1663         for (i = 0; i < (pmu->desc->app_start_offset +
1664                         pmu->desc->app_size) >> 2; i++)
1665                 gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]);
1666
1667         gk20a_free_sgtable(&sgt_pmu_ucode);
1668         gk20a_free_sgtable(&sgt_seq_buf);
1669
1670         pmu->sw_ready = true;
1671
1672 skip_init:
1673         mutex_init(&pmu->isr_mutex);
1674         mutex_init(&pmu->isr_enable_lock);
1675         mutex_init(&pmu->pmu_copy_lock);
1676         mutex_init(&pmu->pmu_seq_lock);
1677
1678         pmu->perfmon_counter.index = 3; /* GR & CE2 */
1679         pmu->perfmon_counter.group_id = PMU_DOMAIN_GROUP_PSTATE;
1680
1681         pmu->remove_support = gk20a_remove_pmu_support;
1682         err = gk20a_init_pmu(pmu);
1683         if (err) {
1684                 gk20a_err(d, "failed to set function pointers\n");
1685                 return err;
1686         }
1687
1688         gk20a_dbg_fn("done");
1689         return 0;
1690
1691  err_unmap_seq_buf:
1692         gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va,
1693                 GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none);
1694  err_free_seq_buf_sgt:
1695         gk20a_free_sgtable(&sgt_seq_buf);
1696  err_unmap_ucode:
1697         gk20a_gmmu_unmap(vm, pmu->ucode.pmu_va,
1698                 GK20A_PMU_UCODE_SIZE_MAX, gk20a_mem_flag_none);
1699  err_free_ucode_sgt:
1700         gk20a_free_sgtable(&sgt_pmu_ucode);
1701  err_free_seq_buf:
1702         dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
1703                 pmu->seq_buf.cpuva, pmu->seq_buf.iova);
1704         pmu->seq_buf.cpuva = NULL;
1705         pmu->seq_buf.iova = 0;
1706  err_free_pmu_ucode:
1707         dma_free_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
1708                 pmu->ucode.cpuva, pmu->ucode.iova, &attrs);
1709         pmu->ucode.cpuva = NULL;
1710         pmu->ucode.iova = 0;
1711  err_release_fw:
1712         release_firmware(g->pmu_fw);
1713  err_free_seq:
1714         kfree(pmu->seq);
1715  err_free_mutex:
1716         kfree(pmu->mutex);
1717  err:
1718         gk20a_dbg_fn("fail");
1719         return err;
1720 }
1721
1722 static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
1723                         void *param, u32 handle, u32 status);
1724
1725 static void pmu_handle_pg_buf_config_msg(struct gk20a *g, struct pmu_msg *msg,
1726                         void *param, u32 handle, u32 status)
1727 {
1728         struct pmu_gk20a *pmu = param;
1729         struct pmu_pg_msg_eng_buf_stat *eng_buf_stat = &msg->msg.pg.eng_buf_stat;
1730
1731         gk20a_dbg_fn("");
1732
1733         gk20a_dbg_pmu("reply PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS");
1734         if (status != 0) {
1735                 gk20a_err(dev_from_gk20a(g), "PGENG cmd aborted");
1736                 /* TBD: disable ELPG */
1737                 return;
1738         }
1739
1740         pmu->buf_loaded = (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_LOADED);
1741         if ((!pmu->buf_loaded) &&
1742                 (pmu->pmu_state == PMU_STATE_LOADING_PG_BUF))
1743                         gk20a_err(dev_from_gk20a(g), "failed to load PGENG buffer");
1744         else {
1745                 schedule_work(&pmu->pg_init);
1746         }
1747 }
1748
1749 int gk20a_init_pmu_setup_hw1(struct gk20a *g)
1750 {
1751         struct pmu_gk20a *pmu = &g->pmu;
1752         int err;
1753
1754         gk20a_dbg_fn("");
1755
1756         mutex_lock(&pmu->isr_enable_lock);
1757         pmu_reset(pmu);
1758         pmu->isr_enabled = true;
1759         mutex_unlock(&pmu->isr_enable_lock);
1760
1761         /* setup apertures - virtual */
1762         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
1763                 pwr_fbif_transcfg_mem_type_virtual_f());
1764         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
1765                 pwr_fbif_transcfg_mem_type_virtual_f());
1766         /* setup apertures - physical */
1767         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
1768                 pwr_fbif_transcfg_mem_type_physical_f() |
1769                 pwr_fbif_transcfg_target_local_fb_f());
1770         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
1771                 pwr_fbif_transcfg_mem_type_physical_f() |
1772                 pwr_fbif_transcfg_target_coherent_sysmem_f());
1773         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
1774                 pwr_fbif_transcfg_mem_type_physical_f() |
1775                 pwr_fbif_transcfg_target_noncoherent_sysmem_f());
1776
1777         /* TBD: load pmu ucode */
1778         err = pmu_bootstrap(pmu);
1779         if (err)
1780                 return err;
1781
1782         return 0;
1783
1784 }
1785
1786 static int gk20a_aelpg_init(struct gk20a *g);
1787 static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id);
1788
1789 static void pmu_setup_hw_load_zbc(struct gk20a *g);
1790 static void pmu_setup_hw_enable_elpg(struct gk20a *g);
1791
1792 static void pmu_setup_hw(struct work_struct *work)
1793 {
1794         struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init);
1795         struct gk20a *g = pmu->g;
1796
1797         switch (pmu->pmu_state) {
1798         case PMU_STATE_INIT_RECEIVED:
1799                 gk20a_dbg_pmu("pmu starting");
1800                 pmu_init_powergating(g);
1801                 break;
1802         case PMU_STATE_ELPG_BOOTED:
1803                 gk20a_dbg_pmu("elpg booted");
1804                 gk20a_init_pmu_bind_fecs(g);
1805                 break;
1806         case PMU_STATE_LOADING_PG_BUF:
1807                 gk20a_dbg_pmu("loaded pg buf");
1808                 pmu_setup_hw_load_zbc(g);
1809                 break;
1810         case PMU_STATE_LOADING_ZBC:
1811                 gk20a_dbg_pmu("loaded zbc");
1812                 pmu_setup_hw_enable_elpg(g);
1813                 break;
1814         case PMU_STATE_STARTED:
1815                 gk20a_dbg_pmu("PMU booted");
1816                 break;
1817         default:
1818                 gk20a_dbg_pmu("invalid state");
1819                 break;
1820         }
1821 }
1822
1823 int gk20a_init_pmu_bind_fecs(struct gk20a *g)
1824 {
1825         struct pmu_gk20a *pmu = &g->pmu;
1826         struct mm_gk20a *mm = &g->mm;
1827         struct vm_gk20a *vm = &mm->pmu.vm;
1828         struct device *d = dev_from_gk20a(g);
1829         struct pmu_cmd cmd;
1830         u32 desc;
1831         int err;
1832         u32 size;
1833         struct sg_table *sgt_pg_buf;
1834         dma_addr_t iova;
1835
1836         gk20a_dbg_fn("");
1837
1838         size = 0;
1839         err = gr_gk20a_fecs_get_reglist_img_size(g, &size);
1840         if (err && (pmu->pmu_state == PMU_STATE_ELPG_BOOTED)) {
1841                 gk20a_err(dev_from_gk20a(g),
1842                         "fail to query fecs pg buffer size");
1843                 return err;
1844         }
1845
1846         if (err) {
1847                 gk20a_err(dev_from_gk20a(g),
1848                         "fail to query fecs pg buffer size invalid boot state");
1849                 return err;
1850         }
1851
1852         if (!pmu->pg_buf.cpuva) {
1853                 pmu->pg_buf.cpuva = dma_alloc_coherent(d, size,
1854                                                 &iova,
1855                                                 GFP_KERNEL);
1856                 if (!pmu->pg_buf.cpuva) {
1857                         gk20a_err(d, "failed to allocate memory\n");
1858                         return -ENOMEM;
1859                 }
1860
1861                 pmu->pg_buf.iova = iova;
1862                 pmu->pg_buf.size = size;
1863
1864                 err = gk20a_get_sgtable(d, &sgt_pg_buf,
1865                                         pmu->pg_buf.cpuva,
1866                                         pmu->pg_buf.iova,
1867                                         size);
1868                 if (err) {
1869                         gk20a_err(d, "failed to create sg table\n");
1870                         goto err_free_pg_buf;
1871                 }
1872
1873                 pmu->pg_buf.pmu_va = gk20a_gmmu_map(vm,
1874                                         &sgt_pg_buf,
1875                                         size,
1876                                         0, /* flags */
1877                                         gk20a_mem_flag_none);
1878                 if (!pmu->pg_buf.pmu_va) {
1879                         gk20a_err(d, "failed to map fecs pg buffer");
1880                         err = -ENOMEM;
1881                         goto err_free_sgtable;
1882                 }
1883
1884                 gk20a_free_sgtable(&sgt_pg_buf);
1885         }
1886
1887         err = gr_gk20a_fecs_set_reglist_bind_inst(g, mm->pmu.inst_block.cpu_pa);
1888         if (err && (pmu->pmu_state == PMU_STATE_ELPG_BOOTED)) {
1889                 gk20a_err(dev_from_gk20a(g),
1890                         "fail to bind pmu inst to gr");
1891                 return err;
1892         }
1893
1894         if (err) {
1895                 gk20a_err(dev_from_gk20a(g),
1896                         "fail to bind pmu inst to gr invalid boot state");
1897                 return err;
1898         }
1899
1900         err = gr_gk20a_fecs_set_reglist_virtual_addr(g, pmu->pg_buf.pmu_va);
1901         if (err && (pmu->pmu_state == PMU_STATE_ELPG_BOOTED)) {
1902                 gk20a_err(dev_from_gk20a(g),
1903                         "fail to set pg buffer pmu va");
1904                 return err;
1905         }
1906
1907         if (err) {
1908                 gk20a_err(dev_from_gk20a(g),
1909                         "fail to set pg buffer pmu va invalid boot state");
1910                 return err;
1911         }
1912
1913         memset(&cmd, 0, sizeof(struct pmu_cmd));
1914         cmd.hdr.unit_id = PMU_UNIT_PG;
1915         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
1916         cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
1917         cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
1918         cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_FECS;
1919         cmd.cmd.pg.eng_buf_load.buf_size = pmu->pg_buf.size;
1920         cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.pmu_va >> 8);
1921         cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.pmu_va & 0xFF);
1922         cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
1923
1924         pmu->buf_loaded = false;
1925         gk20a_dbg_pmu("cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS");
1926         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
1927                         pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
1928         pmu->pmu_state = PMU_STATE_LOADING_PG_BUF;
1929         return err;
1930
1931 err_free_sgtable:
1932         gk20a_free_sgtable(&sgt_pg_buf);
1933 err_free_pg_buf:
1934         dma_free_coherent(d, size,
1935                 pmu->pg_buf.cpuva, pmu->pg_buf.iova);
1936         pmu->pg_buf.cpuva = NULL;
1937         pmu->pg_buf.iova = 0;
1938         return err;
1939 }
1940
1941 static void pmu_setup_hw_load_zbc(struct gk20a *g)
1942 {
1943         struct pmu_gk20a *pmu = &g->pmu;
1944         struct pmu_cmd cmd;
1945         u32 desc;
1946
1947         memset(&cmd, 0, sizeof(struct pmu_cmd));
1948         cmd.hdr.unit_id = PMU_UNIT_PG;
1949         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
1950         cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
1951         cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
1952         cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_ZBC;
1953         cmd.cmd.pg.eng_buf_load.buf_size = pmu->seq_buf.size;
1954         cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.pmu_va >> 8);
1955         cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.pmu_va & 0xFF);
1956         cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
1957
1958         pmu->buf_loaded = false;
1959         gk20a_dbg_pmu("cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_ZBC");
1960         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
1961                         pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
1962         pmu->pmu_state = PMU_STATE_LOADING_ZBC;
1963 }
1964
1965 static void pmu_setup_hw_enable_elpg(struct gk20a *g)
1966 {
1967         struct pmu_gk20a *pmu = &g->pmu;
1968
1969         /*
1970          * FIXME: To enable ELPG, we increase the PMU ext2priv timeout unit to
1971          * 7. This prevents PMU stalling on Host register accesses. Once the
1972          * cause for this hang is discovered and fixed, this WAR should be
1973          * removed.
1974          */
1975         gk20a_writel(g, 0x10a164, 0x109ff);
1976
1977         pmu->initialized = true;
1978         pmu->pmu_state = PMU_STATE_STARTED;
1979
1980         pmu->zbc_ready = true;
1981         /* Save zbc table after PMU is initialized. */
1982         gr_gk20a_pmu_save_zbc(g, 0xf);
1983
1984         if (g->elpg_enabled)
1985                 gk20a_pmu_enable_elpg(g);
1986
1987         udelay(50);
1988
1989         /* Enable AELPG */
1990         if (g->aelpg_enabled) {
1991                 gk20a_aelpg_init(g);
1992                 gk20a_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS);
1993         }
1994
1995         wake_up(&g->pmu.boot_wq);
1996 }
1997
1998 int gk20a_init_pmu_support(struct gk20a *g)
1999 {
2000         struct pmu_gk20a *pmu = &g->pmu;
2001         u32 err;
2002
2003         gk20a_dbg_fn("");
2004
2005         if (pmu->initialized)
2006                 return 0;
2007
2008         pmu->g = g;
2009
2010         err = gk20a_init_pmu_reset_enable_hw(g);
2011         if (err)
2012                 return err;
2013
2014         if (support_gk20a_pmu()) {
2015                 err = gk20a_init_pmu_setup_sw(g);
2016                 if (err)
2017                         return err;
2018
2019                 err = gk20a_init_pmu_setup_hw1(g);
2020                 if (err)
2021                         return err;
2022
2023                 pmu->pmu_state = PMU_STATE_STARTING;
2024         }
2025
2026         return err;
2027 }
2028
2029 static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
2030                         void *param, u32 handle, u32 status)
2031 {
2032         struct pmu_gk20a *pmu = param;
2033         struct pmu_pg_msg_elpg_msg *elpg_msg = &msg->msg.pg.elpg_msg;
2034
2035         gk20a_dbg_fn("");
2036
2037         if (status != 0) {
2038                 gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
2039                 /* TBD: disable ELPG */
2040                 return;
2041         }
2042
2043         switch (elpg_msg->msg) {
2044         case PMU_PG_ELPG_MSG_INIT_ACK:
2045                 gk20a_dbg_pmu("INIT_PG is acknowledged from PMU");
2046                 break;
2047         case PMU_PG_ELPG_MSG_ALLOW_ACK:
2048                 gk20a_dbg_pmu("ALLOW is acknowledged from PMU");
2049                 pmu->elpg_stat = PMU_ELPG_STAT_ON;
2050                 break;
2051         case PMU_PG_ELPG_MSG_DISALLOW_ACK:
2052                 gk20a_dbg_pmu("DISALLOW is acknowledged from PMU");
2053                 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
2054                 if (pmu->pmu_state == PMU_STATE_ELPG_BOOTING) {
2055                         pmu->pmu_state = PMU_STATE_ELPG_BOOTED;
2056                         schedule_work(&pmu->pg_init);
2057                 }
2058                 break;
2059         default:
2060                 gk20a_err(dev_from_gk20a(g),
2061                         "unsupported ELPG message : 0x%04x", elpg_msg->msg);
2062         }
2063
2064         return;
2065 }
2066
2067 static void pmu_handle_pg_stat_msg(struct gk20a *g, struct pmu_msg *msg,
2068                         void *param, u32 handle, u32 status)
2069 {
2070         struct pmu_gk20a *pmu = param;
2071
2072         gk20a_dbg_fn("");
2073
2074         if (status != 0) {
2075                 gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
2076                 /* TBD: disable ELPG */
2077                 return;
2078         }
2079
2080         switch (msg->msg.pg.stat.sub_msg_id) {
2081         case PMU_PG_STAT_MSG_RESP_DMEM_OFFSET:
2082                 gk20a_dbg_pmu("ALLOC_DMEM_OFFSET is acknowledged from PMU");
2083                 pmu->stat_dmem_offset = msg->msg.pg.stat.data;
2084                 break;
2085         default:
2086                 break;
2087         }
2088 }
2089
2090 static int pmu_init_powergating(struct gk20a *g)
2091 {
2092         struct pmu_gk20a *pmu = &g->pmu;
2093         struct pmu_cmd cmd;
2094         u32 seq;
2095
2096         gk20a_dbg_fn("");
2097
2098         mutex_lock(&pmu->isr_mutex);
2099
2100         if (tegra_cpu_is_asim()) {
2101                 /* TBD: calculate threshold for silicon */
2102                 gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
2103                                 PMU_PG_IDLE_THRESHOLD_SIM);
2104                 gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
2105                                 PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM);
2106         } else {
2107                 /* TBD: calculate threshold for silicon */
2108                 gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
2109                                 PMU_PG_IDLE_THRESHOLD);
2110                 gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
2111                                 PMU_PG_POST_POWERUP_IDLE_THRESHOLD);
2112         }
2113
2114         gk20a_gr_wait_initialized(g);
2115
2116         /* init ELPG */
2117         memset(&cmd, 0, sizeof(struct pmu_cmd));
2118         cmd.hdr.unit_id = PMU_UNIT_PG;
2119         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
2120         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
2121         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
2122         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_INIT;
2123
2124         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_INIT");
2125         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2126                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
2127
2128         /* alloc dmem for powergating state log */
2129         pmu->stat_dmem_offset = 0;
2130         memset(&cmd, 0, sizeof(struct pmu_cmd));
2131         cmd.hdr.unit_id = PMU_UNIT_PG;
2132         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_stat);
2133         cmd.cmd.pg.stat.cmd_type = PMU_PG_CMD_ID_PG_STAT;
2134         cmd.cmd.pg.stat.engine_id = ENGINE_GR_GK20A;
2135         cmd.cmd.pg.stat.sub_cmd_id = PMU_PG_STAT_CMD_ALLOC_DMEM;
2136         cmd.cmd.pg.stat.data = 0;
2137
2138         gk20a_dbg_pmu("cmd post PMU_PG_STAT_CMD_ALLOC_DMEM");
2139         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
2140                         pmu_handle_pg_stat_msg, pmu, &seq, ~0);
2141
2142         /* disallow ELPG initially
2143            PMU ucode requires a disallow cmd before allow cmd */
2144         pmu->elpg_stat = PMU_ELPG_STAT_OFF; /* set for wait_event PMU_ELPG_STAT_OFF */
2145         memset(&cmd, 0, sizeof(struct pmu_cmd));
2146         cmd.hdr.unit_id = PMU_UNIT_PG;
2147         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
2148         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
2149         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
2150         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
2151
2152         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_DISALLOW");
2153         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2154                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
2155
2156         /* start with elpg disabled until first enable call */
2157         pmu->elpg_refcnt = 0;
2158
2159         if (pmu->pmu_state == PMU_STATE_INIT_RECEIVED)
2160                 pmu->pmu_state = PMU_STATE_ELPG_BOOTING;
2161
2162         mutex_unlock(&pmu->isr_mutex);
2163
2164         return 0;
2165 }
2166
2167 static int pmu_init_perfmon(struct pmu_gk20a *pmu)
2168 {
2169         struct gk20a *g = pmu->g;
2170         struct pmu_v *pv = &g->ops.pmu_ver;
2171         struct pmu_cmd cmd;
2172         struct pmu_payload payload;
2173         u32 seq;
2174         u32 data;
2175         int err = 0;
2176
2177         gk20a_dbg_fn("");
2178
2179         pmu->perfmon_ready = 0;
2180
2181         /* use counter #3 for GR && CE2 busy cycles */
2182         gk20a_writel(g, pwr_pmu_idle_mask_r(3),
2183                 pwr_pmu_idle_mask_gr_enabled_f() |
2184                 pwr_pmu_idle_mask_ce_2_enabled_f());
2185
2186         /* disable idle filtering for counters 3 and 6 */
2187         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3));
2188         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2189                         pwr_pmu_idle_ctrl_filter_m(),
2190                         pwr_pmu_idle_ctrl_value_busy_f() |
2191                         pwr_pmu_idle_ctrl_filter_disabled_f());
2192         gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data);
2193
2194         /* use counter #6 for total cycles */
2195         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6));
2196         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2197                         pwr_pmu_idle_ctrl_filter_m(),
2198                         pwr_pmu_idle_ctrl_value_always_f() |
2199                         pwr_pmu_idle_ctrl_filter_disabled_f());
2200         gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data);
2201
2202         /*
2203          * We don't want to disturb counters #3 and #6, which are used by
2204          * perfmon, so we add wiring also to counters #1 and #2 for
2205          * exposing raw counter readings.
2206          */
2207         gk20a_writel(g, pwr_pmu_idle_mask_r(1),
2208                 pwr_pmu_idle_mask_gr_enabled_f() |
2209                 pwr_pmu_idle_mask_ce_2_enabled_f());
2210
2211         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1));
2212         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2213                         pwr_pmu_idle_ctrl_filter_m(),
2214                         pwr_pmu_idle_ctrl_value_busy_f() |
2215                         pwr_pmu_idle_ctrl_filter_disabled_f());
2216         gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data);
2217
2218         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2));
2219         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2220                         pwr_pmu_idle_ctrl_filter_m(),
2221                         pwr_pmu_idle_ctrl_value_always_f() |
2222                         pwr_pmu_idle_ctrl_filter_disabled_f());
2223         gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
2224
2225         if (!pmu->sample_buffer)
2226                 err = pmu->dmem.alloc(&pmu->dmem,
2227                                       &pmu->sample_buffer, 2 * sizeof(u16));
2228         if (err) {
2229                 gk20a_err(dev_from_gk20a(g),
2230                         "failed to allocate perfmon sample buffer");
2231                 return -ENOMEM;
2232         }
2233
2234         /* init PERFMON */
2235         memset(&cmd, 0, sizeof(struct pmu_cmd));
2236         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2237         cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size();
2238         cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT;
2239         /* buffer to save counter values for pmu perfmon */
2240         pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon,
2241         (u16)pmu->sample_buffer);
2242         /* number of sample periods below lower threshold
2243            before pmu triggers perfmon decrease event
2244            TBD: = 15 */
2245         pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15);
2246         /* index of base counter, aka. always ticking counter */
2247         pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6);
2248         /* microseconds interval between pmu polls perf counters */
2249         pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700);
2250         /* number of perfmon counters
2251            counter #3 (GR and CE2) for gk20a */
2252         pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1);
2253         /* moving average window for sample periods
2254            TBD: = 3000000 / sample_period_us = 17 */
2255         pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17);
2256
2257         memset(&payload, 0, sizeof(struct pmu_payload));
2258         payload.in.buf = &pmu->perfmon_counter;
2259         payload.in.size = sizeof(struct pmu_perfmon_counter);
2260         payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC);
2261
2262         gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_INIT");
2263         gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
2264                         NULL, NULL, &seq, ~0);
2265
2266         return 0;
2267 }
2268
2269 static int pmu_process_init_msg(struct pmu_gk20a *pmu,
2270                         struct pmu_msg *msg)
2271 {
2272         struct gk20a *g = pmu->g;
2273         struct pmu_v *pv = &g->ops.pmu_ver;
2274         union pmu_init_msg_pmu *init;
2275         struct pmu_sha1_gid_data gid_data;
2276         u32 i, tail = 0;
2277         gk20a_dbg_pmu("init received\n");
2278
2279         tail = pwr_pmu_msgq_tail_val_v(
2280                 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
2281
2282         pmu_copy_from_dmem(pmu, tail,
2283                 (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0);
2284
2285         if (msg->hdr.unit_id != PMU_UNIT_INIT) {
2286                 gk20a_err(dev_from_gk20a(g),
2287                         "expecting init msg");
2288                 return -EINVAL;
2289         }
2290
2291         pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE,
2292                 (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0);
2293
2294         if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) {
2295                 gk20a_err(dev_from_gk20a(g),
2296                         "expecting init msg");
2297                 return -EINVAL;
2298         }
2299
2300         tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT);
2301         gk20a_writel(g, pwr_pmu_msgq_tail_r(),
2302                 pwr_pmu_msgq_tail_val_f(tail));
2303
2304         init = pv->get_pmu_msg_pmu_init_msg_ptr(&(msg->msg.init));
2305         if (!pmu->gid_info.valid) {
2306
2307                 pmu_copy_from_dmem(pmu,
2308                         pv->get_pmu_init_msg_pmu_sw_mg_off(init),
2309                         (u8 *)&gid_data,
2310                         sizeof(struct pmu_sha1_gid_data), 0);
2311
2312                 pmu->gid_info.valid =
2313                         (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE);
2314
2315                 if (pmu->gid_info.valid) {
2316
2317                         BUG_ON(sizeof(pmu->gid_info.gid) !=
2318                                 sizeof(gid_data.gid));
2319
2320                         memcpy(pmu->gid_info.gid, gid_data.gid,
2321                                 sizeof(pmu->gid_info.gid));
2322                 }
2323         }
2324
2325         for (i = 0; i < PMU_QUEUE_COUNT; i++)
2326                 pmu_queue_init(pmu, i, init);
2327
2328         if (!pmu->dmem.alloc)
2329                 gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem",
2330                                 pv->get_pmu_init_msg_pmu_sw_mg_off(init),
2331                                 pv->get_pmu_init_msg_pmu_sw_mg_size(init),
2332                                 PMU_DMEM_ALLOC_ALIGNMENT);
2333
2334         pmu->pmu_ready = true;
2335         pmu->pmu_state = PMU_STATE_INIT_RECEIVED;
2336         schedule_work(&pmu->pg_init);
2337         gk20a_dbg_pmu("init received end\n");
2338
2339         return 0;
2340 }
2341
2342 static bool pmu_read_message(struct pmu_gk20a *pmu, struct pmu_queue *queue,
2343                         struct pmu_msg *msg, int *status)
2344 {
2345         struct gk20a *g = pmu->g;
2346         u32 read_size, bytes_read;
2347         int err;
2348
2349         *status = 0;
2350
2351         if (pmu_queue_is_empty(pmu, queue))
2352                 return false;
2353
2354         err = pmu_queue_open_read(pmu, queue);
2355         if (err) {
2356                 gk20a_err(dev_from_gk20a(g),
2357                         "fail to open queue %d for read", queue->id);
2358                 *status = err;
2359                 return false;
2360         }
2361
2362         err = pmu_queue_pop(pmu, queue, &msg->hdr,
2363                         PMU_MSG_HDR_SIZE, &bytes_read);
2364         if (err || bytes_read != PMU_MSG_HDR_SIZE) {
2365                 gk20a_err(dev_from_gk20a(g),
2366                         "fail to read msg from queue %d", queue->id);
2367                 *status = err | -EINVAL;
2368                 goto clean_up;
2369         }
2370
2371         if (msg->hdr.unit_id == PMU_UNIT_REWIND) {
2372                 pmu_queue_rewind(pmu, queue);
2373                 /* read again after rewind */
2374                 err = pmu_queue_pop(pmu, queue, &msg->hdr,
2375                                 PMU_MSG_HDR_SIZE, &bytes_read);
2376                 if (err || bytes_read != PMU_MSG_HDR_SIZE) {
2377                         gk20a_err(dev_from_gk20a(g),
2378                                 "fail to read msg from queue %d", queue->id);
2379                         *status = err | -EINVAL;
2380                         goto clean_up;
2381                 }
2382         }
2383
2384         if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) {
2385                 gk20a_err(dev_from_gk20a(g),
2386                         "read invalid unit_id %d from queue %d",
2387                         msg->hdr.unit_id, queue->id);
2388                         *status = -EINVAL;
2389                         goto clean_up;
2390         }
2391
2392         if (msg->hdr.size > PMU_MSG_HDR_SIZE) {
2393                 read_size = msg->hdr.size - PMU_MSG_HDR_SIZE;
2394                 err = pmu_queue_pop(pmu, queue, &msg->msg,
2395                         read_size, &bytes_read);
2396                 if (err || bytes_read != read_size) {
2397                         gk20a_err(dev_from_gk20a(g),
2398                                 "fail to read msg from queue %d", queue->id);
2399                         *status = err;
2400                         goto clean_up;
2401                 }
2402         }
2403
2404         err = pmu_queue_close(pmu, queue, true);
2405         if (err) {
2406                 gk20a_err(dev_from_gk20a(g),
2407                         "fail to close queue %d", queue->id);
2408                 *status = err;
2409                 return false;
2410         }
2411
2412         return true;
2413
2414 clean_up:
2415         err = pmu_queue_close(pmu, queue, false);
2416         if (err)
2417                 gk20a_err(dev_from_gk20a(g),
2418                         "fail to close queue %d", queue->id);
2419         return false;
2420 }
2421
2422 static int pmu_response_handle(struct pmu_gk20a *pmu,
2423                         struct pmu_msg *msg)
2424 {
2425         struct gk20a *g = pmu->g;
2426         struct pmu_sequence *seq;
2427         struct pmu_v *pv = &g->ops.pmu_ver;
2428         int ret = 0;
2429
2430         gk20a_dbg_fn("");
2431
2432         seq = &pmu->seq[msg->hdr.seq_id];
2433         if (seq->state != PMU_SEQ_STATE_USED &&
2434             seq->state != PMU_SEQ_STATE_CANCELLED) {
2435                 gk20a_err(dev_from_gk20a(g),
2436                         "msg for an unknown sequence %d", seq->id);
2437                 return -EINVAL;
2438         }
2439
2440         if (msg->hdr.unit_id == PMU_UNIT_RC &&
2441             msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) {
2442                 gk20a_err(dev_from_gk20a(g),
2443                         "unhandled cmd: seq %d", seq->id);
2444         }
2445         else if (seq->state != PMU_SEQ_STATE_CANCELLED) {
2446                 if (seq->msg) {
2447                         if (seq->msg->hdr.size >= msg->hdr.size) {
2448                                 memcpy(seq->msg, msg, msg->hdr.size);
2449                                 if (pv->pmu_allocation_get_dmem_size(pmu,
2450                                 pv->get_pmu_seq_out_a_ptr(seq)) != 0) {
2451                                         pmu_copy_from_dmem(pmu,
2452                                         pv->pmu_allocation_get_dmem_offset(pmu,
2453                                         pv->get_pmu_seq_out_a_ptr(seq)),
2454                                         seq->out_payload,
2455                                         pv->pmu_allocation_get_dmem_size(pmu,
2456                                         pv->get_pmu_seq_out_a_ptr(seq)), 0);
2457                                 }
2458                         } else {
2459                                 gk20a_err(dev_from_gk20a(g),
2460                                         "sequence %d msg buffer too small",
2461                                         seq->id);
2462                         }
2463                 }
2464         } else
2465                 seq->callback = NULL;
2466         if (pv->pmu_allocation_get_dmem_size(pmu,
2467                         pv->get_pmu_seq_in_a_ptr(seq)) != 0)
2468                 pmu->dmem.free(&pmu->dmem,
2469                 pv->pmu_allocation_get_dmem_offset(pmu,
2470                 pv->get_pmu_seq_in_a_ptr(seq)),
2471                 pv->pmu_allocation_get_dmem_size(pmu,
2472                 pv->get_pmu_seq_in_a_ptr(seq)));
2473         if (pv->pmu_allocation_get_dmem_size(pmu,
2474                         pv->get_pmu_seq_out_a_ptr(seq)) != 0)
2475                 pmu->dmem.free(&pmu->dmem,
2476                 pv->pmu_allocation_get_dmem_offset(pmu,
2477                 pv->get_pmu_seq_out_a_ptr(seq)),
2478                 pv->pmu_allocation_get_dmem_size(pmu,
2479                 pv->get_pmu_seq_out_a_ptr(seq)));
2480
2481         if (seq->callback)
2482                 seq->callback(g, msg, seq->cb_params, seq->desc, ret);
2483
2484         pmu_seq_release(pmu, seq);
2485
2486         /* TBD: notify client waiting for available dmem */
2487
2488         gk20a_dbg_fn("done");
2489
2490         return 0;
2491 }
2492
2493 static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
2494                                  u32 *var, u32 val);
2495
2496 static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg,
2497                         void *param, u32 handle, u32 status)
2498 {
2499         struct pmu_gk20a *pmu = param;
2500         gk20a_dbg_pmu("reply ZBC_TABLE_UPDATE");
2501         pmu->zbc_save_done = 1;
2502 }
2503
2504 void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
2505 {
2506         struct pmu_gk20a *pmu = &g->pmu;
2507         struct pmu_cmd cmd;
2508         u32 seq;
2509
2510         if (!pmu->pmu_ready || !entries || !pmu->zbc_ready)
2511                 return;
2512
2513         memset(&cmd, 0, sizeof(struct pmu_cmd));
2514         cmd.hdr.unit_id = PMU_UNIT_PG;
2515         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_zbc_cmd);
2516         cmd.cmd.zbc.cmd_type = g->ops.pmu_ver.cmd_id_zbc_table_update;
2517         cmd.cmd.zbc.entry_mask = ZBC_MASK(entries);
2518
2519         pmu->zbc_save_done = 0;
2520
2521         gk20a_dbg_pmu("cmd post ZBC_TABLE_UPDATE");
2522         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2523                            pmu_handle_zbc_msg, pmu, &seq, ~0);
2524         pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
2525                               &pmu->zbc_save_done, 1);
2526         if (!pmu->zbc_save_done)
2527                 gk20a_err(dev_from_gk20a(g), "ZBC save timeout");
2528 }
2529
2530 static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu)
2531 {
2532         struct gk20a *g = pmu->g;
2533         struct pmu_v *pv = &g->ops.pmu_ver;
2534         struct pmu_cmd cmd;
2535         struct pmu_payload payload;
2536         u32 current_rate = 0;
2537         u32 seq;
2538
2539         /* PERFMON Start */
2540         memset(&cmd, 0, sizeof(struct pmu_cmd));
2541         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2542         cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size();
2543         pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon,
2544                 PMU_PERFMON_CMD_ID_START);
2545         pv->perfmon_start_set_group_id(&cmd.cmd.perfmon,
2546                 PMU_DOMAIN_GROUP_PSTATE);
2547         pv->perfmon_start_set_state_id(&cmd.cmd.perfmon,
2548                 pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);
2549
2550         current_rate = rate_gpu_to_gpc2clk(gk20a_clk_get_rate(g));
2551         if (current_rate >= gpc_pll_params.max_freq)
2552                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2553                 PMU_PERFMON_FLAG_ENABLE_DECREASE);
2554         else if (current_rate <= gpc_pll_params.min_freq)
2555                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2556                 PMU_PERFMON_FLAG_ENABLE_INCREASE);
2557         else
2558                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2559                 PMU_PERFMON_FLAG_ENABLE_INCREASE |
2560                 PMU_PERFMON_FLAG_ENABLE_DECREASE);
2561
2562         pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2563                 pv->perfmon_start_get_flags(&cmd.cmd.perfmon) |
2564                 PMU_PERFMON_FLAG_CLEAR_PREV);
2565
2566         memset(&payload, 0, sizeof(struct pmu_payload));
2567
2568         /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */
2569         pmu->perfmon_counter.upper_threshold = 3000; /* 30% */
2570         /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */
2571         pmu->perfmon_counter.lower_threshold = 1000; /* 10% */
2572         pmu->perfmon_counter.valid = true;
2573
2574         payload.in.buf = &pmu->perfmon_counter;
2575         payload.in.size = sizeof(pmu->perfmon_counter);
2576         payload.in.offset =
2577                 pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC);
2578
2579         gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_START");
2580         gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
2581                         NULL, NULL, &seq, ~0);
2582
2583         return 0;
2584 }
2585
2586 static int pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu)
2587 {
2588         struct gk20a *g = pmu->g;
2589         struct pmu_cmd cmd;
2590         u32 seq;
2591
2592         /* PERFMON Stop */
2593         memset(&cmd, 0, sizeof(struct pmu_cmd));
2594         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2595         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop);
2596         cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP;
2597
2598         gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_STOP");
2599         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
2600                         NULL, NULL, &seq, ~0);
2601         return 0;
2602 }
2603
2604 static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu,
2605                         struct pmu_perfmon_msg *msg)
2606 {
2607         struct gk20a *g = pmu->g;
2608         u32 rate;
2609
2610         gk20a_dbg_fn("");
2611
2612         switch (msg->msg_type) {
2613         case PMU_PERFMON_MSG_ID_INCREASE_EVENT:
2614                 gk20a_dbg_pmu("perfmon increase event: "
2615                         "state_id %d, ground_id %d, pct %d",
2616                         msg->gen.state_id, msg->gen.group_id, msg->gen.data);
2617                 /* increase gk20a clock freq by 20% */
2618                 rate = gk20a_clk_get_rate(g);
2619                 gk20a_clk_set_rate(g, rate * 6 / 5);
2620                 break;
2621         case PMU_PERFMON_MSG_ID_DECREASE_EVENT:
2622                 gk20a_dbg_pmu("perfmon decrease event: "
2623                         "state_id %d, ground_id %d, pct %d",
2624                         msg->gen.state_id, msg->gen.group_id, msg->gen.data);
2625                 /* decrease gk20a clock freq by 10% */
2626                 rate = gk20a_clk_get_rate(g);
2627                 gk20a_clk_set_rate(g, (rate / 10) * 7);
2628                 break;
2629         case PMU_PERFMON_MSG_ID_INIT_EVENT:
2630                 pmu->perfmon_ready = 1;
2631                 gk20a_dbg_pmu("perfmon init event");
2632                 break;
2633         default:
2634                 break;
2635         }
2636
2637         /* restart sampling */
2638         if (IS_ENABLED(CONFIG_GK20A_PERFMON))
2639                 return pmu_perfmon_start_sampling(pmu);
2640         return 0;
2641 }
2642
2643
2644 static int pmu_handle_event(struct pmu_gk20a *pmu, struct pmu_msg *msg)
2645 {
2646         int err;
2647
2648         gk20a_dbg_fn("");
2649
2650         switch (msg->hdr.unit_id) {
2651         case PMU_UNIT_PERFMON:
2652                 err = pmu_handle_perfmon_event(pmu, &msg->msg.perfmon);
2653                 break;
2654         default:
2655                 break;
2656         }
2657
2658         return err;
2659 }
2660
2661 static int pmu_process_message(struct pmu_gk20a *pmu)
2662 {
2663         struct pmu_msg msg;
2664         int status;
2665
2666         if (unlikely(!pmu->pmu_ready)) {
2667                 pmu_process_init_msg(pmu, &msg);
2668                 pmu_init_perfmon(pmu);
2669                 return 0;
2670         }
2671
2672         while (pmu_read_message(pmu,
2673                 &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) {
2674
2675                 gk20a_dbg_pmu("read msg hdr: "
2676                                 "unit_id = 0x%08x, size = 0x%08x, "
2677                                 "ctrl_flags = 0x%08x, seq_id = 0x%08x",
2678                                 msg.hdr.unit_id, msg.hdr.size,
2679                                 msg.hdr.ctrl_flags, msg.hdr.seq_id);
2680
2681                 msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK;
2682
2683                 if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) {
2684                         pmu_handle_event(pmu, &msg);
2685                 } else {
2686                         pmu_response_handle(pmu, &msg);
2687                 }
2688         }
2689
2690         return 0;
2691 }
2692
2693 static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
2694                                  u32 *var, u32 val)
2695 {
2696         struct gk20a *g = pmu->g;
2697         unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
2698         unsigned long delay = GR_IDLE_CHECK_DEFAULT;
2699
2700         do {
2701                 if (*var == val)
2702                         return 0;
2703
2704                 if (gk20a_readl(g, pwr_falcon_irqstat_r()))
2705                         gk20a_pmu_isr(g);
2706
2707                 usleep_range(delay, delay * 2);
2708                 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
2709         } while (time_before(jiffies, end_jiffies) ||
2710                         !tegra_platform_is_silicon());
2711
2712         return -ETIMEDOUT;
2713 }
2714
2715 static void pmu_dump_elpg_stats(struct pmu_gk20a *pmu)
2716 {
2717         struct gk20a *g = pmu->g;
2718         struct pmu_pg_stats stats;
2719
2720         pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
2721                 (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
2722
2723         gk20a_dbg_pmu("pg_entry_start_timestamp : 0x%016llx",
2724                 stats.pg_entry_start_timestamp);
2725         gk20a_dbg_pmu("pg_exit_start_timestamp : 0x%016llx",
2726                 stats.pg_exit_start_timestamp);
2727         gk20a_dbg_pmu("pg_ingating_start_timestamp : 0x%016llx",
2728                 stats.pg_ingating_start_timestamp);
2729         gk20a_dbg_pmu("pg_ungating_start_timestamp : 0x%016llx",
2730                 stats.pg_ungating_start_timestamp);
2731         gk20a_dbg_pmu("pg_avg_entry_time_us : 0x%08x",
2732                 stats.pg_avg_entry_time_us);
2733         gk20a_dbg_pmu("pg_avg_exit_time_us : 0x%08x",
2734                 stats.pg_avg_exit_time_us);
2735         gk20a_dbg_pmu("pg_ingating_cnt : 0x%08x",
2736                 stats.pg_ingating_cnt);
2737         gk20a_dbg_pmu("pg_ingating_time_us : 0x%08x",
2738                 stats.pg_ingating_time_us);
2739         gk20a_dbg_pmu("pg_ungating_count : 0x%08x",
2740                 stats.pg_ungating_count);
2741         gk20a_dbg_pmu("pg_ungating_time_us 0x%08x: ",
2742                 stats.pg_ungating_time_us);
2743         gk20a_dbg_pmu("pg_gating_cnt : 0x%08x",
2744                 stats.pg_gating_cnt);
2745         gk20a_dbg_pmu("pg_gating_deny_cnt : 0x%08x",
2746                 stats.pg_gating_deny_cnt);
2747
2748         /*
2749            Turn on PG_DEBUG in ucode and locate symbol "ElpgLog" offset
2750            in .nm file, e.g. 0x1000066c. use 0x66c.
2751         u32 i, val[20];
2752         pmu_copy_from_dmem(pmu, 0x66c,
2753                 (u8 *)val, sizeof(val), 0);
2754         gk20a_dbg_pmu("elpg log begin");
2755         for (i = 0; i < 20; i++)
2756                 gk20a_dbg_pmu("0x%08x", val[i]);
2757         gk20a_dbg_pmu("elpg log end");
2758         */
2759
2760         gk20a_dbg_pmu("pwr_pmu_idle_mask_supp_r(3): 0x%08x",
2761                 gk20a_readl(g, pwr_pmu_idle_mask_supp_r(3)));
2762         gk20a_dbg_pmu("pwr_pmu_idle_mask_1_supp_r(3): 0x%08x",
2763                 gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(3)));
2764         gk20a_dbg_pmu("pwr_pmu_idle_ctrl_supp_r(3): 0x%08x",
2765                 gk20a_readl(g, pwr_pmu_idle_ctrl_supp_r(3)));
2766         gk20a_dbg_pmu("pwr_pmu_pg_idle_cnt_r(0): 0x%08x",
2767                 gk20a_readl(g, pwr_pmu_pg_idle_cnt_r(0)));
2768         gk20a_dbg_pmu("pwr_pmu_pg_intren_r(0): 0x%08x",
2769                 gk20a_readl(g, pwr_pmu_pg_intren_r(0)));
2770
2771         gk20a_dbg_pmu("pwr_pmu_idle_count_r(3): 0x%08x",
2772                 gk20a_readl(g, pwr_pmu_idle_count_r(3)));
2773         gk20a_dbg_pmu("pwr_pmu_idle_count_r(4): 0x%08x",
2774                 gk20a_readl(g, pwr_pmu_idle_count_r(4)));
2775         gk20a_dbg_pmu("pwr_pmu_idle_count_r(7): 0x%08x",
2776                 gk20a_readl(g, pwr_pmu_idle_count_r(7)));
2777
2778         /*
2779          TBD: script can't generate those registers correctly
2780         gk20a_dbg_pmu("pwr_pmu_idle_status_r(): 0x%08x",
2781                 gk20a_readl(g, pwr_pmu_idle_status_r()));
2782         gk20a_dbg_pmu("pwr_pmu_pg_ctrl_r(): 0x%08x",
2783                 gk20a_readl(g, pwr_pmu_pg_ctrl_r()));
2784         */
2785 }
2786
2787 static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu)
2788 {
2789         struct gk20a *g = pmu->g;
2790         int i;
2791
2792         gk20a_err(dev_from_gk20a(g), "pwr_falcon_os_r : %d",
2793                 gk20a_readl(g, pwr_falcon_os_r()));
2794         gk20a_err(dev_from_gk20a(g), "pwr_falcon_cpuctl_r : 0x%x",
2795                 gk20a_readl(g, pwr_falcon_cpuctl_r()));
2796         gk20a_err(dev_from_gk20a(g), "pwr_falcon_idlestate_r : 0x%x",
2797                 gk20a_readl(g, pwr_falcon_idlestate_r()));
2798         gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox0_r : 0x%x",
2799                 gk20a_readl(g, pwr_falcon_mailbox0_r()));
2800         gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox1_r : 0x%x",
2801                 gk20a_readl(g, pwr_falcon_mailbox1_r()));
2802         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqstat_r : 0x%x",
2803                 gk20a_readl(g, pwr_falcon_irqstat_r()));
2804         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmode_r : 0x%x",
2805                 gk20a_readl(g, pwr_falcon_irqmode_r()));
2806         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmask_r : 0x%x",
2807                 gk20a_readl(g, pwr_falcon_irqmask_r()));
2808         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqdest_r : 0x%x",
2809                 gk20a_readl(g, pwr_falcon_irqdest_r()));
2810
2811         for (i = 0; i < pwr_pmu_mailbox__size_1_v(); i++)
2812                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_mailbox_r(%d) : 0x%x",
2813                         i, gk20a_readl(g, pwr_pmu_mailbox_r(i)));
2814
2815         for (i = 0; i < pwr_pmu_debug__size_1_v(); i++)
2816                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_debug_r(%d) : 0x%x",
2817                         i, gk20a_readl(g, pwr_pmu_debug_r(i)));
2818
2819         for (i = 0; i < 6/*NV_PPWR_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) {
2820                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2821                         pwr_pmu_falcon_icd_cmd_opc_rstat_f() |
2822                         pwr_pmu_falcon_icd_cmd_idx_f(i));
2823                 gk20a_err(dev_from_gk20a(g), "pmu_rstat (%d) : 0x%x",
2824                         i, gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2825         }
2826
2827         i = gk20a_readl(g, pwr_pmu_bar0_error_status_r());
2828         gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_error_status_r : 0x%x", i);
2829         if (i != 0) {
2830                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_addr_r : 0x%x",
2831                         gk20a_readl(g, pwr_pmu_bar0_addr_r()));
2832                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_data_r : 0x%x",
2833                         gk20a_readl(g, pwr_pmu_bar0_data_r()));
2834                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_timeout_r : 0x%x",
2835                         gk20a_readl(g, pwr_pmu_bar0_timeout_r()));
2836                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_ctl_r : 0x%x",
2837                         gk20a_readl(g, pwr_pmu_bar0_ctl_r()));
2838         }
2839
2840         i = gk20a_readl(g, pwr_pmu_bar0_fecs_error_r());
2841         gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_fecs_error_r : 0x%x", i);
2842
2843         i = gk20a_readl(g, pwr_falcon_exterrstat_r());
2844         gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterrstat_r : 0x%x", i);
2845         if (pwr_falcon_exterrstat_valid_v(i) ==
2846                         pwr_falcon_exterrstat_valid_true_v()) {
2847                 gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterraddr_r : 0x%x",
2848                         gk20a_readl(g, pwr_falcon_exterraddr_r()));
2849                 gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x",
2850                         gk20a_readl(g, mc_enable_r()));
2851         }
2852
2853         gk20a_err(dev_from_gk20a(g), "pwr_falcon_engctl_r : 0x%x",
2854                 gk20a_readl(g, pwr_falcon_engctl_r()));
2855         gk20a_err(dev_from_gk20a(g), "pwr_falcon_curctx_r : 0x%x",
2856                 gk20a_readl(g, pwr_falcon_curctx_r()));
2857         gk20a_err(dev_from_gk20a(g), "pwr_falcon_nxtctx_r : 0x%x",
2858                 gk20a_readl(g, pwr_falcon_nxtctx_r()));
2859
2860         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2861                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2862                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_IMB));
2863         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_IMB : 0x%x",
2864                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2865
2866         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2867                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2868                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_DMB));
2869         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_DMB : 0x%x",
2870                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2871
2872         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2873                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2874                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CSW));
2875         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CSW : 0x%x",
2876                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2877
2878         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2879                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2880                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CTX));
2881         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CTX : 0x%x",
2882                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2883
2884         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2885                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2886                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_EXCI));
2887         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_EXCI : 0x%x",
2888                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2889
2890         for (i = 0; i < 4; i++) {
2891                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2892                         pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2893                         pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_PC));
2894                 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_PC : 0x%x",
2895                         gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2896
2897                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2898                         pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2899                         pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_SP));
2900                 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_SP : 0x%x",
2901                         gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2902         }
2903         gk20a_err(dev_from_gk20a(g), "elpg stat: %d\n",
2904                         pmu->elpg_stat);
2905
2906         /* PMU may crash due to FECS crash. Dump FECS status */
2907         gk20a_fecs_dump_falcon_stats(g);
2908 }
2909
2910 void gk20a_pmu_isr(struct gk20a *g)
2911 {
2912         struct pmu_gk20a *pmu = &g->pmu;
2913         struct pmu_queue *queue;
2914         u32 intr, mask;
2915         bool recheck = false;
2916
2917         gk20a_dbg_fn("");
2918
2919         mutex_lock(&pmu->isr_enable_lock);
2920         if (!pmu->isr_enabled) {
2921                 mutex_unlock(&pmu->isr_enable_lock);
2922                 return;
2923         }
2924
2925         mutex_lock(&pmu->isr_mutex);
2926
2927         mask = gk20a_readl(g, pwr_falcon_irqmask_r()) &
2928                 gk20a_readl(g, pwr_falcon_irqdest_r());
2929
2930         intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask;
2931
2932         gk20a_dbg_pmu("received falcon interrupt: 0x%08x", intr);
2933
2934         if (!intr || pmu->pmu_state == PMU_STATE_OFF) {
2935                 gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);
2936                 mutex_unlock(&pmu->isr_mutex);
2937                 mutex_unlock(&pmu->isr_enable_lock);
2938                 return;
2939         }
2940
2941         if (intr & pwr_falcon_irqstat_halt_true_f()) {
2942                 gk20a_err(dev_from_gk20a(g),
2943                         "pmu halt intr not implemented");
2944                 pmu_dump_falcon_stats(pmu);
2945         }
2946         if (intr & pwr_falcon_irqstat_exterr_true_f()) {
2947                 gk20a_err(dev_from_gk20a(g),
2948                         "pmu exterr intr not implemented. Clearing interrupt.");
2949                 pmu_dump_falcon_stats(pmu);
2950
2951                 gk20a_writel(g, pwr_falcon_exterrstat_r(),
2952                         gk20a_readl(g, pwr_falcon_exterrstat_r()) &
2953                                 ~pwr_falcon_exterrstat_valid_m());
2954         }
2955         if (intr & pwr_falcon_irqstat_swgen0_true_f()) {
2956                 pmu_process_message(pmu);
2957                 recheck = true;
2958         }
2959
2960         gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);
2961
2962         if (recheck) {
2963                 queue = &pmu->queue[PMU_MESSAGE_QUEUE];
2964                 if (!pmu_queue_is_empty(pmu, queue))
2965                         gk20a_writel(g, pwr_falcon_irqsset_r(),
2966                                 pwr_falcon_irqsset_swgen0_set_f());
2967         }
2968
2969         mutex_unlock(&pmu->isr_mutex);
2970         mutex_unlock(&pmu->isr_enable_lock);
2971 }
2972
2973 static bool pmu_validate_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
2974                         struct pmu_msg *msg, struct pmu_payload *payload,
2975                         u32 queue_id)
2976 {
2977         struct gk20a *g = pmu->g;
2978         struct pmu_queue *queue;
2979         u32 in_size, out_size;
2980
2981         if (!PMU_IS_SW_COMMAND_QUEUE(queue_id))
2982                 goto invalid_cmd;
2983
2984         queue = &pmu->queue[queue_id];
2985         if (cmd->hdr.size < PMU_CMD_HDR_SIZE)
2986                 goto invalid_cmd;
2987
2988         if (cmd->hdr.size > (queue->size >> 1))
2989                 goto invalid_cmd;
2990
2991         if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE)
2992                 goto invalid_cmd;
2993
2994         if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id))
2995                 goto invalid_cmd;
2996
2997         if (payload == NULL)
2998                 return true;
2999
3000         if (payload->in.buf == NULL && payload->out.buf == NULL)
3001                 goto invalid_cmd;
3002
3003         if ((payload->in.buf != NULL && payload->in.size == 0) ||
3004             (payload->out.buf != NULL && payload->out.size == 0))
3005                 goto invalid_cmd;
3006
3007         in_size = PMU_CMD_HDR_SIZE;
3008         if (payload->in.buf) {
3009                 in_size += payload->in.offset;
3010                 in_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
3011         }
3012
3013         out_size = PMU_CMD_HDR_SIZE;
3014         if (payload->out.buf) {
3015                 out_size += payload->out.offset;
3016                 out_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
3017         }
3018
3019         if (in_size > cmd->hdr.size || out_size > cmd->hdr.size)
3020                 goto invalid_cmd;
3021
3022
3023         if ((payload->in.offset != 0 && payload->in.buf == NULL) ||
3024             (payload->out.offset != 0 && payload->out.buf == NULL))
3025                 goto invalid_cmd;
3026
3027         return true;
3028
3029 invalid_cmd:
3030         gk20a_err(dev_from_gk20a(g), "invalid pmu cmd :\n"
3031                 "queue_id=%d,\n"
3032                 "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n"
3033                 "payload in=%p, in_size=%d, in_offset=%d,\n"
3034                 "payload out=%p, out_size=%d, out_offset=%d",
3035                 queue_id, cmd->hdr.size, cmd->hdr.unit_id,
3036                 msg, msg?msg->hdr.unit_id:~0,
3037                 &payload->in, payload->in.size, payload->in.offset,
3038                 &payload->out, payload->out.size, payload->out.offset);
3039
3040         return false;
3041 }
3042
3043 static int pmu_write_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
3044                         u32 queue_id, unsigned long timeout)
3045 {
3046         struct gk20a *g = pmu->g;
3047         struct pmu_queue *queue;
3048         unsigned long end_jiffies = jiffies +
3049                 msecs_to_jiffies(timeout);
3050         int err;
3051
3052         gk20a_dbg_fn("");
3053
3054         queue = &pmu->queue[queue_id];
3055
3056         do {
3057                 err = pmu_queue_open_write(pmu, queue, cmd->hdr.size);
3058                 if (err == -EAGAIN && time_before(jiffies, end_jiffies))
3059                         usleep_range(1000, 2000);
3060                 else
3061                         break;
3062         } while (1);
3063
3064         if (err)
3065                 goto clean_up;
3066
3067         pmu_queue_push(pmu, queue, cmd, cmd->hdr.size);
3068
3069         err = pmu_queue_close(pmu, queue, true);
3070
3071 clean_up:
3072         if (err)
3073                 gk20a_err(dev_from_gk20a(g),
3074                         "fail to write cmd to queue %d", queue_id);
3075         else
3076                 gk20a_dbg_fn("done");
3077
3078         return err;
3079 }
3080
3081 int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
3082                 struct pmu_msg *msg, struct pmu_payload *payload,
3083                 u32 queue_id, pmu_callback callback, void* cb_param,
3084                 u32 *seq_desc, unsigned long timeout)
3085 {
3086         struct pmu_gk20a *pmu = &g->pmu;
3087         struct pmu_v *pv = &g->ops.pmu_ver;
3088         struct pmu_sequence *seq;
3089         void *in = NULL, *out = NULL;
3090         int err;
3091
3092         gk20a_dbg_fn("");
3093
3094         BUG_ON(!cmd);
3095         BUG_ON(!seq_desc);
3096         BUG_ON(!pmu->pmu_ready);
3097
3098         if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id))
3099                 return -EINVAL;
3100
3101         err = pmu_seq_acquire(pmu, &seq);
3102         if (err)
3103                 return err;
3104
3105         cmd->hdr.seq_id = seq->id;
3106
3107         cmd->hdr.ctrl_flags = 0;
3108         cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS;
3109         cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR;
3110
3111         seq->callback = callback;
3112         seq->cb_params = cb_param;
3113         seq->msg = msg;
3114         seq->out_payload = NULL;
3115         seq->desc = pmu->next_seq_desc++;
3116
3117         if (payload)
3118                 seq->out_payload = payload->out.buf;
3119
3120         *seq_desc = seq->desc;
3121
3122         if (payload && payload->in.offset != 0) {
3123                 pv->set_pmu_allocation_ptr(pmu, &in,
3124                 ((u8 *)&cmd->cmd + payload->in.offset));
3125
3126                 if (payload->in.buf != payload->out.buf)
3127                         pv->pmu_allocation_set_dmem_size(pmu, in,
3128                         (u16)payload->in.size);
3129                 else
3130                         pv->pmu_allocation_set_dmem_size(pmu, in,
3131                         (u16)max(payload->in.size, payload->out.size));
3132
3133                 err = pmu->dmem.alloc(&pmu->dmem,
3134                 pv->pmu_allocation_get_dmem_offset_addr(pmu, in),
3135                 pv->pmu_allocation_get_dmem_size(pmu, in));
3136                 if (err)
3137                         goto clean_up;
3138
3139                 pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu,
3140                 in)),
3141                         payload->in.buf, payload->in.size, 0);
3142                 pv->pmu_allocation_set_dmem_size(pmu,
3143                 pv->get_pmu_seq_in_a_ptr(seq),
3144                 pv->pmu_allocation_get_dmem_size(pmu, in));
3145                 pv->pmu_allocation_set_dmem_offset(pmu,
3146                 pv->get_pmu_seq_in_a_ptr(seq),
3147                 pv->pmu_allocation_get_dmem_offset(pmu, in));
3148         }
3149
3150         if (payload && payload->out.offset != 0) {
3151                 pv->set_pmu_allocation_ptr(pmu, &out,
3152                 ((u8 *)&cmd->cmd + payload->out.offset));
3153                 pv->pmu_allocation_set_dmem_size(pmu, out,
3154                 (u16)payload->out.size);
3155
3156                 if (payload->out.buf != payload->in.buf) {
3157                         err = pmu->dmem.alloc(&pmu->dmem,
3158                         pv->pmu_allocation_get_dmem_offset_addr(pmu, out),
3159                         pv->pmu_allocation_get_dmem_size(pmu, out));
3160                         if (err)
3161                                 goto clean_up;
3162                 } else {
3163                         BUG_ON(in == NULL);
3164                         pv->pmu_allocation_set_dmem_offset(pmu, out,
3165                         pv->pmu_allocation_get_dmem_offset(pmu, in));
3166                 }
3167
3168                 pv->pmu_allocation_set_dmem_size(pmu,
3169                 pv->get_pmu_seq_out_a_ptr(seq),
3170                 pv->pmu_allocation_get_dmem_size(pmu, out));
3171                 pv->pmu_allocation_set_dmem_offset(pmu,
3172                 pv->get_pmu_seq_out_a_ptr(seq),
3173                 pv->pmu_allocation_get_dmem_offset(pmu, out));
3174         }
3175
3176         seq->state = PMU_SEQ_STATE_USED;
3177         err = pmu_write_cmd(pmu, cmd, queue_id, timeout);
3178         if (err)
3179                 seq->state = PMU_SEQ_STATE_PENDING;
3180
3181         gk20a_dbg_fn("done");
3182
3183         return 0;
3184
3185 clean_up:
3186         gk20a_dbg_fn("fail");
3187         if (in)
3188                 pmu->dmem.free(&pmu->dmem,
3189                 pv->pmu_allocation_get_dmem_offset(pmu, in),
3190                 pv->pmu_allocation_get_dmem_size(pmu, in));
3191         if (out)
3192                 pmu->dmem.free(&pmu->dmem,
3193                 pv->pmu_allocation_get_dmem_offset(pmu, out),
3194                 pv->pmu_allocation_get_dmem_size(pmu, out));
3195
3196         pmu_seq_release(pmu, seq);
3197         return err;
3198 }
3199
3200 static int gk20a_pmu_enable_elpg_locked(struct gk20a *g)
3201 {
3202         struct pmu_gk20a *pmu = &g->pmu;
3203         struct pmu_cmd cmd;
3204         u32 seq, status;
3205
3206         gk20a_dbg_fn("");
3207
3208         memset(&cmd, 0, sizeof(struct pmu_cmd));
3209         cmd.hdr.unit_id = PMU_UNIT_PG;
3210         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
3211         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
3212         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
3213         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_ALLOW;
3214
3215         /* no need to wait ack for ELPG enable but set pending to sync
3216            with follow up ELPG disable */
3217         pmu->elpg_stat = PMU_ELPG_STAT_ON_PENDING;
3218
3219         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_ALLOW");
3220         status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3221                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
3222
3223         BUG_ON(status != 0);
3224
3225         gk20a_dbg_fn("done");
3226         return 0;
3227 }
3228
3229 int gk20a_pmu_enable_elpg(struct gk20a *g)
3230 {
3231         struct pmu_gk20a *pmu = &g->pmu;
3232         struct gr_gk20a *gr = &g->gr;
3233
3234         int ret = 0;
3235
3236         gk20a_dbg_fn("");
3237
3238         mutex_lock(&pmu->elpg_mutex);
3239
3240         pmu->elpg_refcnt++;
3241         if (pmu->elpg_refcnt <= 0)
3242                 goto exit_unlock;
3243
3244         /* something is not right if we end up in following code path */
3245         if (unlikely(pmu->elpg_refcnt > 1)) {
3246                 gk20a_warn(dev_from_gk20a(g),
3247                 "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
3248                 __func__, pmu->elpg_refcnt);
3249                 WARN_ON(1);
3250         }
3251
3252         /* do NOT enable elpg until golden ctx is created,
3253            which is related with the ctx that ELPG save and restore. */
3254         if (unlikely(!gr->ctx_vars.golden_image_initialized))
3255                 goto exit_unlock;
3256
3257         /* return if ELPG is already on or on_pending or off_on_pending */
3258         if (pmu->elpg_stat != PMU_ELPG_STAT_OFF)
3259                 goto exit_unlock;
3260
3261         ret = gk20a_pmu_enable_elpg_locked(g);
3262
3263 exit_unlock:
3264         mutex_unlock(&pmu->elpg_mutex);
3265         gk20a_dbg_fn("done");
3266         return ret;
3267 }
3268
3269 int gk20a_pmu_disable_elpg(struct gk20a *g)
3270 {
3271         struct pmu_gk20a *pmu = &g->pmu;
3272         struct pmu_cmd cmd;
3273         u32 seq;
3274         int ret = 0;
3275
3276         gk20a_dbg_fn("");
3277
3278         mutex_lock(&pmu->elpg_mutex);
3279
3280         pmu->elpg_refcnt--;
3281         if (pmu->elpg_refcnt > 0) {
3282                 gk20a_warn(dev_from_gk20a(g),
3283                 "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
3284                 __func__, pmu->elpg_refcnt);
3285                 WARN_ON(1);
3286                 ret = 0;
3287                 goto exit_unlock;
3288         }
3289
3290         /* cancel off_on_pending and return */
3291         if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) {
3292                 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
3293                 ret = 0;
3294                 goto exit_reschedule;
3295         }
3296         /* wait if on_pending */
3297         else if (pmu->elpg_stat == PMU_ELPG_STAT_ON_PENDING) {
3298
3299                 pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
3300                                       &pmu->elpg_stat, PMU_ELPG_STAT_ON);
3301
3302                 if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
3303                         gk20a_err(dev_from_gk20a(g),
3304                                 "ELPG_ALLOW_ACK failed, elpg_stat=%d",
3305                                 pmu->elpg_stat);
3306                         pmu_dump_elpg_stats(pmu);
3307                         pmu_dump_falcon_stats(pmu);
3308                         ret = -EBUSY;
3309                         goto exit_unlock;
3310                 }
3311         }
3312         /* return if ELPG is already off */
3313         else if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
3314                 ret = 0;
3315                 goto exit_reschedule;
3316         }
3317
3318         memset(&cmd, 0, sizeof(struct pmu_cmd));
3319         cmd.hdr.unit_id = PMU_UNIT_PG;
3320         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
3321         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
3322         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
3323         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
3324
3325         pmu->elpg_stat = PMU_ELPG_STAT_OFF_PENDING;
3326
3327         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_DISALLOW");
3328         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3329                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
3330
3331         pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
3332                               &pmu->elpg_stat, PMU_ELPG_STAT_OFF);
3333         if (pmu->elpg_stat != PMU_ELPG_STAT_OFF) {
3334                 gk20a_err(dev_from_gk20a(g),
3335                         "ELPG_DISALLOW_ACK failed");
3336                 pmu_dump_elpg_stats(pmu);
3337                 pmu_dump_falcon_stats(pmu);
3338                 ret = -EBUSY;
3339                 goto exit_unlock;
3340         }
3341
3342 exit_reschedule:
3343 exit_unlock:
3344         mutex_unlock(&pmu->elpg_mutex);
3345         gk20a_dbg_fn("done");
3346         return ret;
3347 }
3348
3349 int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable)
3350 {
3351         struct pmu_gk20a *pmu = &g->pmu;
3352         int err;
3353
3354         gk20a_dbg_fn("");
3355
3356         if (enable)
3357                 err = pmu_perfmon_start_sampling(pmu);
3358         else
3359                 err = pmu_perfmon_stop_sampling(pmu);
3360
3361         return err;
3362 }
3363
3364 int gk20a_pmu_destroy(struct gk20a *g)
3365 {
3366         struct pmu_gk20a *pmu = &g->pmu;
3367         u32 elpg_ingating_time, elpg_ungating_time, gating_cnt;
3368
3369         gk20a_dbg_fn("");
3370
3371         if (!support_gk20a_pmu())
3372                 return 0;
3373
3374         /* make sure the pending operations are finished before we continue */
3375         cancel_work_sync(&pmu->pg_init);
3376
3377         gk20a_pmu_get_elpg_residency_gating(g, &elpg_ingating_time,
3378                 &elpg_ungating_time, &gating_cnt);
3379
3380         gk20a_pmu_disable_elpg(g);
3381         pmu->initialized = false;
3382
3383         /* update the s/w ELPG residency counters */
3384         g->pg_ingating_time_us += (u64)elpg_ingating_time;
3385         g->pg_ungating_time_us += (u64)elpg_ungating_time;
3386         g->pg_gating_cnt += gating_cnt;
3387
3388         mutex_lock(&pmu->isr_enable_lock);
3389         pmu_enable(pmu, false);
3390         pmu->isr_enabled = false;
3391         mutex_unlock(&pmu->isr_enable_lock);
3392
3393         pmu->pmu_state = PMU_STATE_OFF;
3394         pmu->pmu_ready = false;
3395         pmu->perfmon_ready = false;
3396         pmu->zbc_ready = false;
3397
3398         gk20a_dbg_fn("done");
3399         return 0;
3400 }
3401
3402 int gk20a_pmu_load_norm(struct gk20a *g, u32 *load)
3403 {
3404         struct pmu_gk20a *pmu = &g->pmu;
3405         u16 _load = 0;
3406
3407         if (!pmu->perfmon_ready) {
3408                 *load = 0;
3409                 return 0;
3410         }
3411
3412         pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0);
3413         *load = _load / 10;
3414
3415         return 0;
3416 }
3417
3418 void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
3419                                  u32 *total_cycles)
3420 {
3421         if (!g->power_on) {
3422                 *busy_cycles = 0;
3423                 *total_cycles = 0;
3424                 return;
3425         }
3426
3427         gk20a_busy(g->dev);
3428         *busy_cycles = pwr_pmu_idle_count_value_v(
3429                 gk20a_readl(g, pwr_pmu_idle_count_r(1)));
3430         rmb();
3431         *total_cycles = pwr_pmu_idle_count_value_v(
3432                 gk20a_readl(g, pwr_pmu_idle_count_r(2)));
3433         gk20a_idle(g->dev);
3434 }
3435
3436 void gk20a_pmu_reset_load_counters(struct gk20a *g)
3437 {
3438         u32 reg_val = pwr_pmu_idle_count_reset_f(1);
3439
3440         if (!g->power_on)
3441                 return;
3442
3443         gk20a_busy(g->dev);
3444         gk20a_writel(g, pwr_pmu_idle_count_r(2), reg_val);
3445         wmb();
3446         gk20a_writel(g, pwr_pmu_idle_count_r(1), reg_val);
3447         gk20a_idle(g->dev);
3448 }
3449
3450 static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
3451                         u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt)
3452 {
3453         struct pmu_gk20a *pmu = &g->pmu;
3454         struct pmu_pg_stats stats;
3455
3456         if (!pmu->initialized) {
3457                 *ingating_time = 0;
3458                 *ungating_time = 0;
3459                 *gating_cnt = 0;
3460                 return 0;
3461         }
3462
3463         pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
3464                 (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
3465
3466         *ingating_time = stats.pg_ingating_time_us;
3467         *ungating_time = stats.pg_ungating_time_us;
3468         *gating_cnt = stats.pg_gating_cnt;
3469
3470         return 0;
3471 }
3472
3473 /* Send an Adaptive Power (AP) related command to PMU */
3474 static int gk20a_pmu_ap_send_command(struct gk20a *g,
3475                         union pmu_ap_cmd *p_ap_cmd, bool b_block)
3476 {
3477         struct pmu_gk20a *pmu = &g->pmu;
3478         /* FIXME: where is the PG structure defined?? */
3479         u32 status = 0;
3480         struct pmu_cmd cmd;
3481         u32 seq;
3482         pmu_callback p_callback = NULL;
3483
3484         memset(&cmd, 0, sizeof(struct pmu_cmd));
3485
3486         /* Copy common members */
3487         cmd.hdr.unit_id = PMU_UNIT_PG;
3488         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(union pmu_ap_cmd);
3489
3490         cmd.cmd.pg.ap_cmd.cmn.cmd_type = PMU_PG_CMD_ID_AP;
3491         cmd.cmd.pg.ap_cmd.cmn.cmd_id = p_ap_cmd->cmn.cmd_id;
3492
3493         /* Copy other members of command */
3494         switch (p_ap_cmd->cmn.cmd_id) {
3495         case PMU_AP_CMD_ID_INIT:
3496                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_INIT");
3497                 cmd.cmd.pg.ap_cmd.init.pg_sampling_period_us =
3498                         p_ap_cmd->init.pg_sampling_period_us;
3499                 p_callback = ap_callback_init_and_enable_ctrl;
3500                 break;
3501
3502         case PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL:
3503                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL");
3504                 cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.ctrl_id =
3505                 p_ap_cmd->init_and_enable_ctrl.ctrl_id;
3506                 memcpy(
3507                 (void *)&(cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.params),
3508                         (void *)&(p_ap_cmd->init_and_enable_ctrl.params),
3509                         sizeof(struct pmu_ap_ctrl_init_params));
3510
3511                 p_callback = ap_callback_init_and_enable_ctrl;
3512                 break;
3513
3514         case PMU_AP_CMD_ID_ENABLE_CTRL:
3515                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_ENABLE_CTRL");
3516                 cmd.cmd.pg.ap_cmd.enable_ctrl.ctrl_id =
3517                         p_ap_cmd->enable_ctrl.ctrl_id;
3518                 break;
3519
3520         case PMU_AP_CMD_ID_DISABLE_CTRL:
3521                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_DISABLE_CTRL");
3522                 cmd.cmd.pg.ap_cmd.disable_ctrl.ctrl_id =
3523                         p_ap_cmd->disable_ctrl.ctrl_id;
3524                 break;
3525
3526         case PMU_AP_CMD_ID_KICK_CTRL:
3527                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_KICK_CTRL");
3528                 cmd.cmd.pg.ap_cmd.kick_ctrl.ctrl_id =
3529                         p_ap_cmd->kick_ctrl.ctrl_id;
3530                 cmd.cmd.pg.ap_cmd.kick_ctrl.skip_count =
3531                         p_ap_cmd->kick_ctrl.skip_count;
3532                 break;
3533
3534         default:
3535                 gk20a_dbg_pmu("%s: Invalid Adaptive Power command %d\n",
3536                         __func__, p_ap_cmd->cmn.cmd_id);
3537                 return 0x2f;
3538         }
3539
3540         status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3541                         p_callback, pmu, &seq, ~0);
3542
3543         if (!status) {
3544                 gk20a_dbg_pmu(
3545                         "%s: Unable to submit Adaptive Power Command %d\n",
3546                         __func__, p_ap_cmd->cmn.cmd_id);
3547                 goto err_return;
3548         }
3549
3550         /* TODO: Implement blocking calls (b_block) */
3551
3552 err_return:
3553         return status;
3554 }
3555
3556 static void ap_callback_init_and_enable_ctrl(
3557                 struct gk20a *g, struct pmu_msg *msg,
3558                 void *param, u32 seq_desc, u32 status)
3559 {
3560         /* Define p_ap (i.e pointer to pmu_ap structure) */
3561         WARN_ON(!msg);
3562
3563         if (!status) {
3564                 switch (msg->msg.pg.ap_msg.cmn.msg_id) {
3565                 case PMU_AP_MSG_ID_INIT_ACK:
3566                         gk20a_dbg_pmu("reply PMU_AP_CMD_ID_INIT");
3567                         break;
3568
3569                 default:
3570                         gk20a_dbg_pmu(
3571                         "%s: Invalid Adaptive Power Message: %x\n",
3572                         __func__, msg->msg.pg.ap_msg.cmn.msg_id);
3573                         break;
3574                 }
3575         }
3576 }
3577
3578 static int gk20a_aelpg_init(struct gk20a *g)
3579 {
3580         int status = 0;
3581
3582         /* Remove reliance on app_ctrl field. */
3583         union pmu_ap_cmd ap_cmd;
3584
3585         /* TODO: Check for elpg being ready? */
3586         ap_cmd.init.cmd_id = PMU_AP_CMD_ID_INIT;
3587         ap_cmd.init.pg_sampling_period_us =
3588                 APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US;
3589
3590         status = gk20a_pmu_ap_send_command(g, &ap_cmd, false);
3591         return status;
3592 }
3593
3594 static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id)
3595 {
3596         int status = 0;
3597         union pmu_ap_cmd ap_cmd;
3598
3599         /* TODO: Probably check if ELPG is ready? */
3600
3601         ap_cmd.init_and_enable_ctrl.cmd_id = PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL;
3602         ap_cmd.init_and_enable_ctrl.ctrl_id = ctrl_id;
3603         ap_cmd.init_and_enable_ctrl.params.min_idle_filter_us =
3604                 APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US;
3605         ap_cmd.init_and_enable_ctrl.params.min_target_saving_us =
3606                 APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US;
3607         ap_cmd.init_and_enable_ctrl.params.power_break_even_us =
3608                 APCTRL_POWER_BREAKEVEN_DEFAULT_US;
3609         ap_cmd.init_and_enable_ctrl.params.cycles_per_sample_max =
3610                 APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT;
3611
3612         switch (ctrl_id) {
3613         case PMU_AP_CTRL_ID_GRAPHICS:
3614                 break;
3615         default:
3616                 break;
3617         }
3618
3619         status = gk20a_pmu_ap_send_command(g, &ap_cmd, true);
3620         return status;
3621 }
3622
3623 #if CONFIG_DEBUG_FS
3624 static int elpg_residency_show(struct seq_file *s, void *data)
3625 {
3626         struct gk20a *g = s->private;
3627         u32 ingating_time = 0;
3628         u32 ungating_time = 0;
3629         u32 gating_cnt;
3630         u64 total_ingating, total_ungating, residency, divisor, dividend;
3631
3632         /* Don't unnecessarily power on the device */
3633         if (g->power_on) {
3634                 gk20a_busy(g->dev);
3635                 gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
3636                         &ungating_time, &gating_cnt);
3637                 gk20a_idle(g->dev);
3638         }
3639         total_ingating = g->pg_ingating_time_us + (u64)ingating_time;
3640         total_ungating = g->pg_ungating_time_us + (u64)ungating_time;
3641         divisor = total_ingating + total_ungating;
3642
3643         /* We compute the residency on a scale of 1000 */
3644         dividend = total_ingating * 1000;
3645
3646         if (divisor)
3647                 residency = div64_u64(dividend, divisor);
3648         else
3649                 residency = 0;
3650
3651         seq_printf(s, "Time in ELPG: %llu us\n"
3652                         "Time out of ELPG: %llu us\n"
3653                         "ELPG residency ratio: %llu\n",
3654                         total_ingating, total_ungating, residency);
3655         return 0;
3656
3657 }
3658
3659 static int elpg_residency_open(struct inode *inode, struct file *file)
3660 {
3661         return single_open(file, elpg_residency_show, inode->i_private);
3662 }
3663
3664 static const struct file_operations elpg_residency_fops = {
3665         .open           = elpg_residency_open,
3666         .read           = seq_read,
3667         .llseek         = seq_lseek,
3668         .release        = single_release,
3669 };
3670
3671 static int elpg_transitions_show(struct seq_file *s, void *data)
3672 {
3673         struct gk20a *g = s->private;
3674         u32 ingating_time, ungating_time, total_gating_cnt;
3675         u32 gating_cnt = 0;
3676
3677         if (g->power_on) {
3678                 gk20a_busy(g->dev);
3679                 gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
3680                         &ungating_time, &gating_cnt);
3681                 gk20a_idle(g->dev);
3682         }
3683         total_gating_cnt = g->pg_gating_cnt + gating_cnt;
3684
3685         seq_printf(s, "%u\n", total_gating_cnt);
3686         return 0;
3687
3688 }
3689
3690 static int elpg_transitions_open(struct inode *inode, struct file *file)
3691 {
3692         return single_open(file, elpg_transitions_show, inode->i_private);
3693 }
3694
3695 static const struct file_operations elpg_transitions_fops = {
3696         .open           = elpg_transitions_open,
3697         .read           = seq_read,
3698         .llseek         = seq_lseek,
3699         .release        = single_release,
3700 };
3701
3702 int gk20a_pmu_debugfs_init(struct platform_device *dev)
3703 {
3704         struct dentry *d;
3705         struct gk20a_platform *platform = platform_get_drvdata(dev);
3706         struct gk20a *g = get_gk20a(dev);
3707
3708         d = debugfs_create_file(
3709                 "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
3710                                                 &elpg_residency_fops);
3711         if (!d)
3712                 goto err_out;
3713
3714         d = debugfs_create_file(
3715                 "elpg_transitions", S_IRUGO, platform->debugfs, g,
3716                                                 &elpg_transitions_fops);
3717         if (!d)
3718                 goto err_out;
3719
3720         return 0;
3721
3722 err_out:
3723         pr_err("%s: Failed to make debugfs node\n", __func__);
3724         debugfs_remove_recursive(platform->debugfs);
3725         return -ENOMEM;
3726 }
3727 #endif