#ifndef __NVC0_SCREEN_H__
#define __NVC0_SCREEN_H__

#include "nouveau_screen.h"
#include "nouveau_mm.h"
#include "nouveau_fence.h"
#include "nouveau_heap.h"

#include "nv_object.xml.h"

#include "nvc0/nvc0_winsys.h"
#include "nvc0/nvc0_stateobj.h"

#define NVC0_TIC_MAX_ENTRIES 2048
#define NVC0_TSC_MAX_ENTRIES 2048

/* doesn't count reserved slots (for auxiliary constants, immediates, etc.) */
#define NVC0_MAX_PIPE_CONSTBUFS         14
#define NVE4_MAX_PIPE_CONSTBUFS_COMPUTE  7

#define NVC0_MAX_SURFACE_SLOTS 16

#define NVC0_MAX_VIEWPORTS 16


struct nvc0_context;

struct nvc0_blitter;

struct nvc0_screen {
   struct nouveau_screen base;

   struct nvc0_context *cur_ctx;

   int num_occlusion_queries_active;

   struct nouveau_bo *text;
   struct nouveau_bo *parm;       /* for COMPUTE */
   struct nouveau_bo *uniform_bo; /* for 3D */
   struct nouveau_bo *tls;
   struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
   struct nouveau_bo *poly_cache;

   uint16_t mp_count;
   uint16_t mp_count_compute; /* magic reg can make compute use fewer MPs */

   struct nouveau_heap *text_heap;
   struct nouveau_heap *lib_code; /* allocated from text_heap */

   struct nvc0_blitter *blitter;

   struct {
      void **entries;
      int next;
      uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32];
   } tic;

   struct {
      void **entries;
      int next;
      uint32_t lock[NVC0_TSC_MAX_ENTRIES / 32];
   } tsc;

   struct {
      struct nouveau_bo *bo;
      uint32_t *map;
   } fence;

   struct {
      struct nvc0_program *prog; /* compute state object to read MP counters */
      struct pipe_query *mp_counter[8]; /* counter to query allocation */
      uint8_t num_mp_pm_active[2];
      boolean mp_counters_enabled;
   } pm;

   struct nouveau_mman *mm_VRAM_fe0;

   struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
   struct nouveau_object *eng2d;
   struct nouveau_object *m2mf;
   struct nouveau_object *compute;
   struct nouveau_object *nvsw;
};

static INLINE struct nvc0_screen *
nvc0_screen(struct pipe_screen *screen)
{
   return (struct nvc0_screen *)screen;
}


/* Performance counter queries:
 */
#define NVE4_PM_QUERY_COUNT  49
#define NVE4_PM_QUERY(i)    (PIPE_QUERY_DRIVER_SPECIFIC + (i))
#define NVE4_PM_QUERY_LAST   NVE4_PM_QUERY(NVE4_PM_QUERY_COUNT - 1)
#define NVE4_PM_QUERY_PROF_TRIGGER_0            0
#define NVE4_PM_QUERY_PROF_TRIGGER_1            1
#define NVE4_PM_QUERY_PROF_TRIGGER_2            2
#define NVE4_PM_QUERY_PROF_TRIGGER_3            3
#define NVE4_PM_QUERY_PROF_TRIGGER_4            4
#define NVE4_PM_QUERY_PROF_TRIGGER_5            5
#define NVE4_PM_QUERY_PROF_TRIGGER_6            6
#define NVE4_PM_QUERY_PROF_TRIGGER_7            7
#define NVE4_PM_QUERY_LAUNCHED_WARPS            8
#define NVE4_PM_QUERY_LAUNCHED_THREADS          9
#define NVE4_PM_QUERY_LAUNCHED_CTA              10
#define NVE4_PM_QUERY_INST_ISSUED1              11
#define NVE4_PM_QUERY_INST_ISSUED2              12
#define NVE4_PM_QUERY_INST_EXECUTED             13
#define NVE4_PM_QUERY_LD_LOCAL                  14
#define NVE4_PM_QUERY_ST_LOCAL                  15
#define NVE4_PM_QUERY_LD_SHARED                 16
#define NVE4_PM_QUERY_ST_SHARED                 17
#define NVE4_PM_QUERY_L1_LOCAL_LOAD_HIT         18
#define NVE4_PM_QUERY_L1_LOCAL_LOAD_MISS        19
#define NVE4_PM_QUERY_L1_LOCAL_STORE_HIT        20
#define NVE4_PM_QUERY_L1_LOCAL_STORE_MISS       21
#define NVE4_PM_QUERY_GLD_REQUEST               22
#define NVE4_PM_QUERY_GST_REQUEST               23
#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_HIT        24
#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_MISS       25
#define NVE4_PM_QUERY_GLD_TRANSACTIONS_UNCACHED 26
#define NVE4_PM_QUERY_GST_TRANSACTIONS          27
#define NVE4_PM_QUERY_BRANCH                    28
#define NVE4_PM_QUERY_BRANCH_DIVERGENT          29
#define NVE4_PM_QUERY_ACTIVE_WARPS              30
#define NVE4_PM_QUERY_ACTIVE_CYCLES             31
#define NVE4_PM_QUERY_INST_ISSUED               32
#define NVE4_PM_QUERY_ATOM_COUNT                33
#define NVE4_PM_QUERY_GRED_COUNT                34
#define NVE4_PM_QUERY_LD_SHARED_REPLAY          35
#define NVE4_PM_QUERY_ST_SHARED_REPLAY          36
#define NVE4_PM_QUERY_LD_LOCAL_TRANSACTIONS     37
#define NVE4_PM_QUERY_ST_LOCAL_TRANSACTIONS     38
#define NVE4_PM_QUERY_L1_LD_SHARED_TRANSACTIONS 39
#define NVE4_PM_QUERY_L1_ST_SHARED_TRANSACTIONS 40
#define NVE4_PM_QUERY_GLD_MEM_DIV_REPLAY        41
#define NVE4_PM_QUERY_GST_MEM_DIV_REPLAY        42
#define NVE4_PM_QUERY_METRIC_IPC                43
#define NVE4_PM_QUERY_METRIC_IPAC               44
#define NVE4_PM_QUERY_METRIC_IPEC               45
#define NVE4_PM_QUERY_METRIC_MP_OCCUPANCY       46
#define NVE4_PM_QUERY_METRIC_MP_EFFICIENCY      47
#define NVE4_PM_QUERY_METRIC_INST_REPLAY_OHEAD  48

/*
#define NVE4_PM_QUERY_GR_IDLE                   50
#define NVE4_PM_QUERY_BSP_IDLE                  51
#define NVE4_PM_QUERY_VP_IDLE                   52
#define NVE4_PM_QUERY_PPP_IDLE                  53
#define NVE4_PM_QUERY_CE0_IDLE                  54
#define NVE4_PM_QUERY_CE1_IDLE                  55
#define NVE4_PM_QUERY_CE2_IDLE                  56
*/
/* L2 queries (PCOUNTER) */
/*
#define NVE4_PM_QUERY_L2_SUBP_WRITE_L1_SECTOR_QUERIES 57
...
*/
/* TEX queries (PCOUNTER) */
/*
#define NVE4_PM_QUERY_TEX0_CACHE_SECTOR_QUERIES 58
...
*/

#define NVC0_PM_QUERY_COUNT 31
#define NVC0_PM_QUERY(i)    (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i))
#define NVC0_PM_QUERY_LAST   NVC0_PM_QUERY(NVC0_PM_QUERY_COUNT - 1)
#define NVC0_PM_QUERY_INST_EXECUTED             0
#define NVC0_PM_QUERY_BRANCH                    1
#define NVC0_PM_QUERY_BRANCH_DIVERGENT          2
#define NVC0_PM_QUERY_ACTIVE_WARPS              3
#define NVC0_PM_QUERY_ACTIVE_CYCLES             4
#define NVC0_PM_QUERY_LAUNCHED_WARPS            5
#define NVC0_PM_QUERY_LAUNCHED_THREADS          6
#define NVC0_PM_QUERY_LD_SHARED                 7
#define NVC0_PM_QUERY_ST_SHARED                 8
#define NVC0_PM_QUERY_LD_LOCAL                  9
#define NVC0_PM_QUERY_ST_LOCAL                  10
#define NVC0_PM_QUERY_GRED_COUNT                11
#define NVC0_PM_QUERY_ATOM_COUNT                12
#define NVC0_PM_QUERY_GLD_REQUEST               13
#define NVC0_PM_QUERY_GST_REQUEST               14
#define NVC0_PM_QUERY_INST_ISSUED1_0            15
#define NVC0_PM_QUERY_INST_ISSUED1_1            16
#define NVC0_PM_QUERY_INST_ISSUED2_0            17
#define NVC0_PM_QUERY_INST_ISSUED2_1            18
#define NVC0_PM_QUERY_TH_INST_EXECUTED_0        19
#define NVC0_PM_QUERY_TH_INST_EXECUTED_1        20
#define NVC0_PM_QUERY_TH_INST_EXECUTED_2        21
#define NVC0_PM_QUERY_TH_INST_EXECUTED_3        22
#define NVC0_PM_QUERY_PROF_TRIGGER_0            23
#define NVC0_PM_QUERY_PROF_TRIGGER_1            24
#define NVC0_PM_QUERY_PROF_TRIGGER_2            25
#define NVC0_PM_QUERY_PROF_TRIGGER_3            26
#define NVC0_PM_QUERY_PROF_TRIGGER_4            27
#define NVC0_PM_QUERY_PROF_TRIGGER_5            28
#define NVC0_PM_QUERY_PROF_TRIGGER_6            29
#define NVC0_PM_QUERY_PROF_TRIGGER_7            30

/* Driver statistics queries:
 */
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS

#define NVC0_QUERY_DRV_STAT(i)    (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i))
#define NVC0_QUERY_DRV_STAT_COUNT  29
#define NVC0_QUERY_DRV_STAT_LAST   NVC0_QUERY_DRV_STAT(NVC0_QUERY_DRV_STAT_COUNT - 1)
#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT         0
#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES         1
#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT         2
#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID     3
#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS     4
#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_READ               5
#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE              6
#define NVC0_QUERY_DRV_STAT_TEX_COPY_COUNT                   7
#define NVC0_QUERY_DRV_STAT_TEX_BLIT_COUNT                   8
#define NVC0_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT            9
#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_READ              10
#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE             11
#define NVC0_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID      12
#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT          13
#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID     14
#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS     15
#define NVC0_QUERY_DRV_STAT_BUF_COPY_BYTES                  16
#define NVC0_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT 17
#define NVC0_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT 18
#define NVC0_QUERY_DRV_STAT_QUERY_SYNC_COUNT                19
#define NVC0_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT             20
#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_ARRAY                21
#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_INDEXED              22
#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT       23
#define NVC0_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES        24
#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT           25
#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES           26
#define NVC0_QUERY_DRV_STAT_PUSHBUF_COUNT                   27
#define NVC0_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT         28

#else

#define NVC0_QUERY_DRV_STAT_COUNT 0

#endif

int nvc0_screen_get_driver_query_info(struct pipe_screen *, unsigned,
                                      struct pipe_driver_query_info *);

boolean nvc0_blitter_create(struct nvc0_screen *);
void nvc0_blitter_destroy(struct nvc0_screen *);

void nvc0_screen_make_buffers_resident(struct nvc0_screen *);

int nvc0_screen_tic_alloc(struct nvc0_screen *, void *);
int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);

int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
int nvc0_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);

boolean nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos,
                                    uint32_t lneg, uint32_t cstack);

static INLINE void
nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
{
   struct nvc0_screen *screen = nvc0_screen(res->base.screen);

   if (res->mm) {
      nouveau_fence_ref(screen->base.fence.current, &res->fence);
      if (flags & NOUVEAU_BO_WR)
         nouveau_fence_ref(screen->base.fence.current, &res->fence_wr);
   }
}

static INLINE void
nvc0_resource_validate(struct nv04_resource *res, uint32_t flags)
{
   if (likely(res->bo)) {
      if (flags & NOUVEAU_BO_WR)
         res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING |
            NOUVEAU_BUFFER_STATUS_DIRTY;
      if (flags & NOUVEAU_BO_RD)
         res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;

      nvc0_resource_fence(res, flags);
   }
}

struct nvc0_format {
   uint32_t rt;
   uint32_t tic;
   uint32_t vtx;
   uint32_t usage;
};

extern const struct nvc0_format nvc0_format_table[];

static INLINE void
nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
{
   if (tic->id >= 0)
      screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
}

static INLINE void
nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
{
   if (tsc->id >= 0)
      screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
}

static INLINE void
nvc0_screen_tic_free(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
{
   if (tic->id >= 0) {
      screen->tic.entries[tic->id] = NULL;
      screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
   }
}

static INLINE void
nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
{
   if (tsc->id >= 0) {
      screen->tsc.entries[tsc->id] = NULL;
      screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
   }
}

#endif
