in drivers/gpu/drm/radeon/evergreen_cs.c [1772:2670]
static int evergreen_packet3_check(struct radeon_cs_parser *p,
struct radeon_cs_packet *pkt)
{
struct radeon_bo_list *reloc;
struct evergreen_cs_track *track;
uint32_t *ib;
unsigned idx;
unsigned i;
unsigned start_reg, end_reg, reg;
int r;
u32 idx_value;
track = (struct evergreen_cs_track *)p->track;
ib = p->ib.ptr;
idx = pkt->idx + 1;
idx_value = radeon_get_ib_value(p, idx);
switch (pkt->opcode) {
case PACKET3_SET_PREDICATION:
{
int pred_op;
int tmp;
uint64_t offset;
if (pkt->count != 1) {
DRM_ERROR("bad SET PREDICATION\n");
return -EINVAL;
}
tmp = radeon_get_ib_value(p, idx + 1);
pred_op = (tmp >> 16) & 0x7;
/* for the clear predicate operation */
if (pred_op == 0)
return 0;
if (pred_op > 2) {
DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op);
return -EINVAL;
}
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad SET PREDICATION\n");
return -EINVAL;
}
offset = reloc->gpu_offset +
(idx_value & 0xfffffff0) +
((u64)(tmp & 0xff) << 32);
ib[idx + 0] = offset;
ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
}
break;
case PACKET3_CONTEXT_CONTROL:
if (pkt->count != 1) {
DRM_ERROR("bad CONTEXT_CONTROL\n");
return -EINVAL;
}
break;
case PACKET3_INDEX_TYPE:
case PACKET3_NUM_INSTANCES:
case PACKET3_CLEAR_STATE:
if (pkt->count) {
DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
return -EINVAL;
}
break;
case CAYMAN_PACKET3_DEALLOC_STATE:
if (p->rdev->family < CHIP_CAYMAN) {
DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
return -EINVAL;
}
if (pkt->count) {
DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
return -EINVAL;
}
break;
case PACKET3_INDEX_BASE:
{
uint64_t offset;
if (pkt->count != 1) {
DRM_ERROR("bad INDEX_BASE\n");
return -EINVAL;
}
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad INDEX_BASE\n");
return -EINVAL;
}
offset = reloc->gpu_offset +
idx_value +
((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
ib[idx+0] = offset;
ib[idx+1] = upper_32_bits(offset) & 0xff;
r = evergreen_cs_track_check(p);
if (r) {
dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
return r;
}
break;
}
case PACKET3_INDEX_BUFFER_SIZE:
{
if (pkt->count != 0) {
DRM_ERROR("bad INDEX_BUFFER_SIZE\n");
return -EINVAL;
}
break;
}
case PACKET3_DRAW_INDEX:
{
uint64_t offset;
if (pkt->count != 3) {
DRM_ERROR("bad DRAW_INDEX\n");
return -EINVAL;
}
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad DRAW_INDEX\n");
return -EINVAL;
}
offset = reloc->gpu_offset +
idx_value +
((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
ib[idx+0] = offset;
ib[idx+1] = upper_32_bits(offset) & 0xff;
r = evergreen_cs_track_check(p);
if (r) {
dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
return r;
}
break;
}
case PACKET3_DRAW_INDEX_2:
{
uint64_t offset;
if (pkt->count != 4) {
DRM_ERROR("bad DRAW_INDEX_2\n");
return -EINVAL;
}
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad DRAW_INDEX_2\n");
return -EINVAL;
}
offset = reloc->gpu_offset +
radeon_get_ib_value(p, idx+1) +
((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
ib[idx+1] = offset;
ib[idx+2] = upper_32_bits(offset) & 0xff;
r = evergreen_cs_track_check(p);
if (r) {
dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
return r;
}
break;
}
case PACKET3_DRAW_INDEX_AUTO:
if (pkt->count != 1) {
DRM_ERROR("bad DRAW_INDEX_AUTO\n");
return -EINVAL;
}
r = evergreen_cs_track_check(p);
if (r) {
dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
return r;
}
break;
case PACKET3_DRAW_INDEX_MULTI_AUTO:
if (pkt->count != 2) {
DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
return -EINVAL;
}
r = evergreen_cs_track_check(p);
if (r) {
dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
return r;
}
break;
case PACKET3_DRAW_INDEX_IMMD:
if (pkt->count < 2) {
DRM_ERROR("bad DRAW_INDEX_IMMD\n");
return -EINVAL;
}
r = evergreen_cs_track_check(p);
if (r) {
dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
return r;
}
break;
case PACKET3_DRAW_INDEX_OFFSET:
if (pkt->count != 2) {
DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
return -EINVAL;
}
r = evergreen_cs_track_check(p);
if (r) {
dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
return r;
}
break;
case PACKET3_DRAW_INDEX_OFFSET_2:
if (pkt->count != 3) {
DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
return -EINVAL;
}
r = evergreen_cs_track_check(p);
if (r) {
dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
return r;
}
break;
case PACKET3_SET_BASE:
{
/*
DW 1 HEADER Header of the packet. Shader_Type in bit 1 of the Header will correspond to the shader type of the Load, see Type-3 Packet.
2 BASE_INDEX Bits [3:0] BASE_INDEX - Base Index specifies which base address is specified in the last two DWs.
0001: DX11 Draw_Index_Indirect Patch Table Base: Base address for Draw_Index_Indirect data.
3 ADDRESS_LO Bits [31:3] - Lower bits of QWORD-Aligned Address. Bits [2:0] - Reserved
4 ADDRESS_HI Bits [31:8] - Reserved. Bits [7:0] - Upper bits of Address [47:32]
*/
if (pkt->count != 2) {
DRM_ERROR("bad SET_BASE\n");
return -EINVAL;
}
/* currently only supporting setting indirect draw buffer base address */
if (idx_value != 1) {
DRM_ERROR("bad SET_BASE\n");
return -EINVAL;
}
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad SET_BASE\n");
return -EINVAL;
}
track->indirect_draw_buffer_size = radeon_bo_size(reloc->robj);
ib[idx+1] = reloc->gpu_offset;
ib[idx+2] = upper_32_bits(reloc->gpu_offset) & 0xff;
break;
}
case PACKET3_DRAW_INDIRECT:
case PACKET3_DRAW_INDEX_INDIRECT:
{
u64 size = pkt->opcode == PACKET3_DRAW_INDIRECT ? 16 : 20;
/*
DW 1 HEADER
2 DATA_OFFSET Bits [31:0] + byte aligned offset where the required data structure starts. Bits 1:0 are zero
3 DRAW_INITIATOR Draw Initiator Register. Written to the VGT_DRAW_INITIATOR register for the assigned context
*/
if (pkt->count != 1) {
DRM_ERROR("bad DRAW_INDIRECT\n");
return -EINVAL;
}
if (idx_value + size > track->indirect_draw_buffer_size) {
dev_warn(p->dev, "DRAW_INDIRECT buffer too small %u + %llu > %lu\n",
idx_value, size, track->indirect_draw_buffer_size);
return -EINVAL;
}
r = evergreen_cs_track_check(p);
if (r) {
dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
return r;
}
break;
}
case PACKET3_DISPATCH_DIRECT:
if (pkt->count != 3) {
DRM_ERROR("bad DISPATCH_DIRECT\n");
return -EINVAL;
}
r = evergreen_cs_track_check(p);
if (r) {
dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
return r;
}
break;
case PACKET3_DISPATCH_INDIRECT:
if (pkt->count != 1) {
DRM_ERROR("bad DISPATCH_INDIRECT\n");
return -EINVAL;
}
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad DISPATCH_INDIRECT\n");
return -EINVAL;
}
ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff);
r = evergreen_cs_track_check(p);
if (r) {
dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
return r;
}
break;
case PACKET3_WAIT_REG_MEM:
if (pkt->count != 5) {
DRM_ERROR("bad WAIT_REG_MEM\n");
return -EINVAL;
}
/* bit 4 is reg (0) or mem (1) */
if (idx_value & 0x10) {
uint64_t offset;
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad WAIT_REG_MEM\n");
return -EINVAL;
}
offset = reloc->gpu_offset +
(radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
ib[idx+2] = upper_32_bits(offset) & 0xff;
} else if (idx_value & 0x100) {
DRM_ERROR("cannot use PFP on REG wait\n");
return -EINVAL;
}
break;
case PACKET3_CP_DMA:
{
u32 command, size, info;
u64 offset, tmp;
if (pkt->count != 4) {
DRM_ERROR("bad CP DMA\n");
return -EINVAL;
}
command = radeon_get_ib_value(p, idx+4);
size = command & 0x1fffff;
info = radeon_get_ib_value(p, idx+1);
if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
(((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
((((info & 0x00300000) >> 20) == 0) &&
(command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
((((info & 0x60000000) >> 29) == 0) &&
(command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
/* non mem to mem copies requires dw aligned count */
if (size % 4) {
DRM_ERROR("CP DMA command requires dw count alignment\n");
return -EINVAL;
}
}
if (command & PACKET3_CP_DMA_CMD_SAS) {
/* src address space is register */
/* GDS is ok */
if (((info & 0x60000000) >> 29) != 1) {
DRM_ERROR("CP DMA SAS not supported\n");
return -EINVAL;
}
} else {
if (command & PACKET3_CP_DMA_CMD_SAIC) {
DRM_ERROR("CP DMA SAIC only supported for registers\n");
return -EINVAL;
}
/* src address space is memory */
if (((info & 0x60000000) >> 29) == 0) {
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad CP DMA SRC\n");
return -EINVAL;
}
tmp = radeon_get_ib_value(p, idx) +
((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
offset = reloc->gpu_offset + tmp;
if ((tmp + size) > radeon_bo_size(reloc->robj)) {
dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
tmp + size, radeon_bo_size(reloc->robj));
return -EINVAL;
}
ib[idx] = offset;
ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
} else if (((info & 0x60000000) >> 29) != 2) {
DRM_ERROR("bad CP DMA SRC_SEL\n");
return -EINVAL;
}
}
if (command & PACKET3_CP_DMA_CMD_DAS) {
/* dst address space is register */
/* GDS is ok */
if (((info & 0x00300000) >> 20) != 1) {
DRM_ERROR("CP DMA DAS not supported\n");
return -EINVAL;
}
} else {
/* dst address space is memory */
if (command & PACKET3_CP_DMA_CMD_DAIC) {
DRM_ERROR("CP DMA DAIC only supported for registers\n");
return -EINVAL;
}
if (((info & 0x00300000) >> 20) == 0) {
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad CP DMA DST\n");
return -EINVAL;
}
tmp = radeon_get_ib_value(p, idx+2) +
((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
offset = reloc->gpu_offset + tmp;
if ((tmp + size) > radeon_bo_size(reloc->robj)) {
dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
tmp + size, radeon_bo_size(reloc->robj));
return -EINVAL;
}
ib[idx+2] = offset;
ib[idx+3] = upper_32_bits(offset) & 0xff;
} else {
DRM_ERROR("bad CP DMA DST_SEL\n");
return -EINVAL;
}
}
break;
}
case PACKET3_PFP_SYNC_ME:
if (pkt->count) {
DRM_ERROR("bad PFP_SYNC_ME\n");
return -EINVAL;
}
break;
case PACKET3_SURFACE_SYNC:
if (pkt->count != 3) {
DRM_ERROR("bad SURFACE_SYNC\n");
return -EINVAL;
}
/* 0xffffffff/0x0 is flush all cache flag */
if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
radeon_get_ib_value(p, idx + 2) != 0) {
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad SURFACE_SYNC\n");
return -EINVAL;
}
ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
}
break;
case PACKET3_EVENT_WRITE:
if (pkt->count != 2 && pkt->count != 0) {
DRM_ERROR("bad EVENT_WRITE\n");
return -EINVAL;
}
if (pkt->count) {
uint64_t offset;
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad EVENT_WRITE\n");
return -EINVAL;
}
offset = reloc->gpu_offset +
(radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
ib[idx+1] = offset & 0xfffffff8;
ib[idx+2] = upper_32_bits(offset) & 0xff;
}
break;
case PACKET3_EVENT_WRITE_EOP:
{
uint64_t offset;
if (pkt->count != 4) {
DRM_ERROR("bad EVENT_WRITE_EOP\n");
return -EINVAL;
}
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad EVENT_WRITE_EOP\n");
return -EINVAL;
}
offset = reloc->gpu_offset +
(radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
ib[idx+1] = offset & 0xfffffffc;
ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
break;
}
case PACKET3_EVENT_WRITE_EOS:
{
uint64_t offset;
if (pkt->count != 3) {
DRM_ERROR("bad EVENT_WRITE_EOS\n");
return -EINVAL;
}
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad EVENT_WRITE_EOS\n");
return -EINVAL;
}
offset = reloc->gpu_offset +
(radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
ib[idx+1] = offset & 0xfffffffc;
ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
break;
}
case PACKET3_SET_CONFIG_REG:
start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
end_reg = 4 * pkt->count + start_reg - 4;
if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
(start_reg >= PACKET3_SET_CONFIG_REG_END) ||
(end_reg >= PACKET3_SET_CONFIG_REG_END)) {
DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
return -EINVAL;
}
for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) {
if (evergreen_is_safe_reg(p, reg))
continue;
r = evergreen_cs_handle_reg(p, reg, idx);
if (r)
return r;
}
break;
case PACKET3_SET_CONTEXT_REG:
start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_START;
end_reg = 4 * pkt->count + start_reg - 4;
if ((start_reg < PACKET3_SET_CONTEXT_REG_START) ||
(start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
(end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
return -EINVAL;
}
for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) {
if (evergreen_is_safe_reg(p, reg))
continue;
r = evergreen_cs_handle_reg(p, reg, idx);
if (r)
return r;
}
break;
case PACKET3_SET_RESOURCE:
if (pkt->count % 8) {
DRM_ERROR("bad SET_RESOURCE\n");
return -EINVAL;
}
start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START;
end_reg = 4 * pkt->count + start_reg - 4;
if ((start_reg < PACKET3_SET_RESOURCE_START) ||
(start_reg >= PACKET3_SET_RESOURCE_END) ||
(end_reg >= PACKET3_SET_RESOURCE_END)) {
DRM_ERROR("bad SET_RESOURCE\n");
return -EINVAL;
}
for (i = 0; i < (pkt->count / 8); i++) {
struct radeon_bo *texture, *mipmap;
u32 toffset, moffset;
u32 size, offset, mip_address, tex_dim;
switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) {
case SQ_TEX_VTX_VALID_TEXTURE:
/* tex base */
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad SET_RESOURCE (tex)\n");
return -EINVAL;
}
if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
ib[idx+1+(i*8)+1] |=
TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
if (reloc->tiling_flags & RADEON_TILING_MACRO) {
unsigned bankw, bankh, mtaspect, tile_split;
evergreen_tiling_fields(reloc->tiling_flags,
&bankw, &bankh, &mtaspect,
&tile_split);
ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split);
ib[idx+1+(i*8)+7] |=
TEX_BANK_WIDTH(bankw) |
TEX_BANK_HEIGHT(bankh) |
MACRO_TILE_ASPECT(mtaspect) |
TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
}
}
texture = reloc->robj;
toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
/* tex mip base */
tex_dim = ib[idx+1+(i*8)+0] & 0x7;
mip_address = ib[idx+1+(i*8)+3];
if ((tex_dim == SQ_TEX_DIM_2D_MSAA || tex_dim == SQ_TEX_DIM_2D_ARRAY_MSAA) &&
!mip_address &&
!radeon_cs_packet_next_is_pkt3_nop(p)) {
/* MIP_ADDRESS should point to FMASK for an MSAA texture.
* It should be 0 if FMASK is disabled. */
moffset = 0;
mipmap = NULL;
} else {
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad SET_RESOURCE (tex)\n");
return -EINVAL;
}
moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
mipmap = reloc->robj;
}
r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8));
if (r)
return r;
ib[idx+1+(i*8)+2] += toffset;
ib[idx+1+(i*8)+3] += moffset;
break;
case SQ_TEX_VTX_VALID_BUFFER:
{
uint64_t offset64;
/* vtx base */
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad SET_RESOURCE (vtx)\n");
return -EINVAL;
}
offset = radeon_get_ib_value(p, idx+1+(i*8)+0);
size = radeon_get_ib_value(p, idx+1+(i*8)+1);
if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
/* force size to size of the buffer */
dev_warn_ratelimited(p->dev, "vbo resource seems too big for the bo\n");
ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
}
offset64 = reloc->gpu_offset + offset;
ib[idx+1+(i*8)+0] = offset64;
ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
(upper_32_bits(offset64) & 0xff);
break;
}
case SQ_TEX_VTX_INVALID_TEXTURE:
case SQ_TEX_VTX_INVALID_BUFFER:
default:
DRM_ERROR("bad SET_RESOURCE\n");
return -EINVAL;
}
}
break;
case PACKET3_SET_ALU_CONST:
/* XXX fix me ALU const buffers only */
break;
case PACKET3_SET_BOOL_CONST:
start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_START;
end_reg = 4 * pkt->count + start_reg - 4;
if ((start_reg < PACKET3_SET_BOOL_CONST_START) ||
(start_reg >= PACKET3_SET_BOOL_CONST_END) ||
(end_reg >= PACKET3_SET_BOOL_CONST_END)) {
DRM_ERROR("bad SET_BOOL_CONST\n");
return -EINVAL;
}
break;
case PACKET3_SET_LOOP_CONST:
start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_START;
end_reg = 4 * pkt->count + start_reg - 4;
if ((start_reg < PACKET3_SET_LOOP_CONST_START) ||
(start_reg >= PACKET3_SET_LOOP_CONST_END) ||
(end_reg >= PACKET3_SET_LOOP_CONST_END)) {
DRM_ERROR("bad SET_LOOP_CONST\n");
return -EINVAL;
}
break;
case PACKET3_SET_CTL_CONST:
start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_START;
end_reg = 4 * pkt->count + start_reg - 4;
if ((start_reg < PACKET3_SET_CTL_CONST_START) ||
(start_reg >= PACKET3_SET_CTL_CONST_END) ||
(end_reg >= PACKET3_SET_CTL_CONST_END)) {
DRM_ERROR("bad SET_CTL_CONST\n");
return -EINVAL;
}
break;
case PACKET3_SET_SAMPLER:
if (pkt->count % 3) {
DRM_ERROR("bad SET_SAMPLER\n");
return -EINVAL;
}
start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START;
end_reg = 4 * pkt->count + start_reg - 4;
if ((start_reg < PACKET3_SET_SAMPLER_START) ||
(start_reg >= PACKET3_SET_SAMPLER_END) ||
(end_reg >= PACKET3_SET_SAMPLER_END)) {
DRM_ERROR("bad SET_SAMPLER\n");
return -EINVAL;
}
break;
case PACKET3_STRMOUT_BUFFER_UPDATE:
if (pkt->count != 4) {
DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
return -EINVAL;
}
/* Updating memory at DST_ADDRESS. */
if (idx_value & 0x1) {
u64 offset;
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
return -EINVAL;
}
offset = radeon_get_ib_value(p, idx+1);
offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
if ((offset + 4) > radeon_bo_size(reloc->robj)) {
DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
offset + 4, radeon_bo_size(reloc->robj));
return -EINVAL;
}
offset += reloc->gpu_offset;
ib[idx+1] = offset;
ib[idx+2] = upper_32_bits(offset) & 0xff;
}
/* Reading data from SRC_ADDRESS. */
if (((idx_value >> 1) & 0x3) == 2) {
u64 offset;
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
return -EINVAL;
}
offset = radeon_get_ib_value(p, idx+3);
offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
if ((offset + 4) > radeon_bo_size(reloc->robj)) {
DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
offset + 4, radeon_bo_size(reloc->robj));
return -EINVAL;
}
offset += reloc->gpu_offset;
ib[idx+3] = offset;
ib[idx+4] = upper_32_bits(offset) & 0xff;
}
break;
case PACKET3_MEM_WRITE:
{
u64 offset;
if (pkt->count != 3) {
DRM_ERROR("bad MEM_WRITE (invalid count)\n");
return -EINVAL;
}
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
return -EINVAL;
}
offset = radeon_get_ib_value(p, idx+0);
offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
if (offset & 0x7) {
DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
return -EINVAL;
}
if ((offset + 8) > radeon_bo_size(reloc->robj)) {
DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
offset + 8, radeon_bo_size(reloc->robj));
return -EINVAL;
}
offset += reloc->gpu_offset;
ib[idx+0] = offset;
ib[idx+1] = upper_32_bits(offset) & 0xff;
break;
}
case PACKET3_COPY_DW:
if (pkt->count != 4) {
DRM_ERROR("bad COPY_DW (invalid count)\n");
return -EINVAL;
}
if (idx_value & 0x1) {
u64 offset;
/* SRC is memory. */
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad COPY_DW (missing src reloc)\n");
return -EINVAL;
}
offset = radeon_get_ib_value(p, idx+1);
offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
if ((offset + 4) > radeon_bo_size(reloc->robj)) {
DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
offset + 4, radeon_bo_size(reloc->robj));
return -EINVAL;
}
offset += reloc->gpu_offset;
ib[idx+1] = offset;
ib[idx+2] = upper_32_bits(offset) & 0xff;
} else {
/* SRC is a reg. */
reg = radeon_get_ib_value(p, idx+1) << 2;
if (!evergreen_is_safe_reg(p, reg)) {
dev_warn(p->dev, "forbidden register 0x%08x at %d\n",
reg, idx + 1);
return -EINVAL;
}
}
if (idx_value & 0x2) {
u64 offset;
/* DST is memory. */
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
return -EINVAL;
}
offset = radeon_get_ib_value(p, idx+3);
offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
if ((offset + 4) > radeon_bo_size(reloc->robj)) {
DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
offset + 4, radeon_bo_size(reloc->robj));
return -EINVAL;
}
offset += reloc->gpu_offset;
ib[idx+3] = offset;
ib[idx+4] = upper_32_bits(offset) & 0xff;
} else {
/* DST is a reg. */
reg = radeon_get_ib_value(p, idx+3) << 2;
if (!evergreen_is_safe_reg(p, reg)) {
dev_warn(p->dev, "forbidden register 0x%08x at %d\n",
reg, idx + 3);
return -EINVAL;
}
}
break;
case PACKET3_SET_APPEND_CNT:
{
uint32_t areg;
uint32_t allowed_reg_base;
uint32_t source_sel;
if (pkt->count != 2) {
DRM_ERROR("bad SET_APPEND_CNT (invalid count)\n");
return -EINVAL;
}
allowed_reg_base = GDS_APPEND_COUNT_0;
allowed_reg_base -= PACKET3_SET_CONTEXT_REG_START;
allowed_reg_base >>= 2;
areg = idx_value >> 16;
if (areg < allowed_reg_base || areg > (allowed_reg_base + 11)) {
dev_warn(p->dev, "forbidden register for append cnt 0x%08x at %d\n",
areg, idx);
return -EINVAL;
}
source_sel = G_PACKET3_SET_APPEND_CNT_SRC_SELECT(idx_value);
if (source_sel == PACKET3_SAC_SRC_SEL_MEM) {
uint64_t offset;
uint32_t swap;
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
if (r) {
DRM_ERROR("bad SET_APPEND_CNT (missing reloc)\n");
return -EINVAL;
}
offset = radeon_get_ib_value(p, idx + 1);
swap = offset & 0x3;
offset &= ~0x3;
offset += ((u64)(radeon_get_ib_value(p, idx + 2) & 0xff)) << 32;
offset += reloc->gpu_offset;
ib[idx+1] = (offset & 0xfffffffc) | swap;
ib[idx+2] = upper_32_bits(offset) & 0xff;
} else {
DRM_ERROR("bad SET_APPEND_CNT (unsupported operation)\n");
return -EINVAL;
}
break;
}
case PACKET3_NOP:
break;
default:
DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
return -EINVAL;
}
return 0;
}