/*
 * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 * Authors:
 *    Jonathan Marek <jonathan@marek.ca>
 */

#include "ir2_private.h"

static bool is_mov(struct ir2_instr *instr)
{
	return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&
		instr->src_count == 1;
}

static void src_combine(struct ir2_src *src, struct ir2_src b)
{
	src->num = b.num;
	src->type = b.type;
	src->swizzle = swiz_merge(b.swizzle, src->swizzle);
	if (!src->abs) /* if we have abs we don't care about previous negate */
		src->negate ^= b.negate;
	src->abs |= b.abs;
}

/* cp_src: replace src regs when they refer to a mov instruction
 * example:
 *	ALU:      MAXv    R7 = C7, C7
 *	ALU:      MULADDv R7 = R7, R10, R0.xxxx
 * becomes:
 *	ALU:      MULADDv R7 = C7, R10, R0.xxxx
 */
void cp_src(struct ir2_context *ctx)
{
	struct ir2_instr *p;

	ir2_foreach_instr(instr, ctx) {
		ir2_foreach_src(src, instr) {
			/* loop to replace recursively */
			do {
				if (src->type != IR2_SRC_SSA)
					break;

				p = &ctx->instr[src->num];
				/* don't work across blocks to avoid possible issues */
				if (p->block_idx != instr->block_idx)
					break;

				if (!is_mov(p))
					break;

				/* cant apply abs to const src, const src only for alu */
				if (p->src[0].type == IR2_SRC_CONST &&
					(src->abs || instr->type != IR2_ALU))
					break;

				src_combine(src, p->src[0]);
			} while (1);
		}
	}
}

/* cp_export: replace mov to export when possible
 * in the cp_src pass we bypass any mov instructions related
 * to the src registers, but for exports for need something different
 * example:
 *	ALU:      MAXv    R3.x___ = C9.x???, C9.x???
 *	ALU:      MAXv    R3._y__ = R0.?x??, C8.?x??
 *	ALU:      MAXv    export0 = R3.yyyx, R3.yyyx
 * becomes:
 *	ALU:      MAXv    export0.___w = C9.???x, C9.???x
 *	ALU:      MAXv    export0.xyz_ = R0.xxx?, C8.xxx?
 *
 */
void cp_export(struct ir2_context *ctx)
{
	struct ir2_instr *c[4], *ins[4];
	struct ir2_src *src;
	struct ir2_reg *reg;
	unsigned ncomp;

	ir2_foreach_instr(instr, ctx) {
		if (!is_export(instr)) /* TODO */
			continue;

		if (!is_mov(instr))
			continue;

		src = &instr->src[0];

		if (src->negate || src->abs) /* TODO handle these cases */
			continue;

		if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)
			continue;

		reg = get_reg_src(ctx, src);
		ncomp = dst_ncomp(instr);

		unsigned reswiz[4] = {};
		unsigned num_instr = 0;

		/* fill array c with pointers to instrs that write each component */
		if (src->type == IR2_SRC_SSA) {
			struct ir2_instr *instr = &ctx->instr[src->num];

			if (instr->type != IR2_ALU)
				continue;

			for (int i = 0; i < ncomp; i++)
				c[i] = instr;

			ins[num_instr++] = instr;
			reswiz[0] = src->swizzle;
		} else {
			bool ok = true;
			unsigned write_mask = 0;

			ir2_foreach_instr(instr, ctx) {
				if (instr->is_ssa || instr->reg != reg)
					continue;

				/* set by non-ALU */
				if (instr->type != IR2_ALU) {
					ok = false;
					break;
				}

				/* component written more than once */
				if (write_mask & instr->alu.write_mask) {
					ok = false;
					break;
				}

				write_mask |= instr->alu.write_mask;

				/* src pointers for components */
				for (int i = 0, j = 0; i < 4; i++) {
					unsigned k = swiz_get(src->swizzle, i);
					if (instr->alu.write_mask & 1 << k) {
						c[i] = instr;

						/* reswiz = compressed src->swizzle */
						unsigned x = 0;
						for (int i = 0; i < k; i++)
							x += !!(instr->alu.write_mask & 1 << i);

						assert(src->swizzle || x == j);
						reswiz[num_instr] |= swiz_set(x, j++);
					}
				}
				ins[num_instr++] = instr;
			}
			if (!ok)
				continue;
		}

		bool redirect = true;

		/* must all be in same block */
		for (int i = 0; i < ncomp; i++)
			redirect &= (c[i]->block_idx == instr->block_idx);

		/* no other instr using the value */
		ir2_foreach_instr(p, ctx) {
			if (p == instr)
				continue;
			ir2_foreach_src(src, p)
				redirect &= reg != get_reg_src(ctx, src);
		}

		if (!redirect)
			continue;

		/* redirect the instructions writing to the register */
		for (int i = 0; i < num_instr; i++) {
			struct ir2_instr *p = ins[i];

			p->alu.export = instr->alu.export;
			p->alu.write_mask = 0;
			p->is_ssa = true;
			p->ssa.ncomp = 0;
			memset(p->ssa.comp, 0, sizeof(p->ssa.comp));

			switch (instr->alu.vector_opc) {
			case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
			case DOT2ADDv:
			case DOT3v:
			case DOT4v:
			case CUBEv:
				continue;
			default:
				break;
			}
			ir2_foreach_src(s, p)
				swiz_merge_p(&s->swizzle, reswiz[i]);
		}

		for (int i = 0; i < ncomp; i++) {
			c[i]->alu.write_mask |= (1 << i);
			c[i]->ssa.ncomp++;
		}
		instr->type = IR2_NONE;
		instr->need_emit = false;
	}
}
