diff --git a/src/editor/gizmo.cpp b/src/editor/gizmo.cpp index 3c5b21922..ac682df44 100644 --- a/src/editor/gizmo.cpp +++ b/src/editor/gizmo.cpp @@ -1106,7 +1106,8 @@ struct GizmoImpl LUMIX_FINAL : public Gizmo transform(); for (int i = 0; i < m_count; ++i) { - const Matrix gizmo_mtx = getMatrix(m_entities[i]); + Matrix gizmo_mtx = getMatrix(m_entities[i]); + gizmo_mtx.translate(-vp.pos); data->push({gizmo_mtx, m_active == i}); } diff --git a/src/editor/world_editor.cpp b/src/editor/world_editor.cpp index d51012f75..c94d0c62f 100644 --- a/src/editor/world_editor.cpp +++ b/src/editor/world_editor.cpp @@ -3076,7 +3076,7 @@ public: m_viewport.h = -1; m_viewport.fov = Math::degreesToRadians(60.f); m_viewport.near = 0.1f; - m_viewport.far = 10000.f; + m_viewport.far = 100000.f; for (auto& i : m_is_mouse_down) i = false; for (auto& i : m_is_mouse_click) i = false; diff --git a/src/engine/array.h b/src/engine/array.h index b5e52f4de..4f8ed5024 100644 --- a/src/engine/array.h +++ b/src/engine/array.h @@ -342,6 +342,7 @@ public: return m_data[index]; } + int byte_size() const { return m_size * sizeof(T); } int size() const { return m_size; } int capacity() const { return m_capacity; } diff --git a/src/engine/job_system.cpp b/src/engine/job_system.cpp index aa99f46f3..77ec1d6f5 100644 --- a/src/engine/job_system.cpp +++ b/src/engine/job_system.cpp @@ -242,7 +242,7 @@ bool init(IAllocator& allocator) g_system = LUMIX_NEW(allocator, System)(allocator); g_system->m_work_signal.reset(); - int count = Math::maximum(1, int(MT::getCPUsCount() - 1)); + int count = Math::maximum(1, int(MT::getCPUsCount() - 0)); for (int i = 0; i < count; ++i) { WorkerTask* task = LUMIX_NEW(allocator, WorkerTask)(*g_system); diff --git a/src/engine/universe/universe.cpp b/src/engine/universe/universe.cpp index d71a554c5..d0de18f6b 100644 --- a/src/engine/universe/universe.cpp +++ b/src/engine/universe/universe.cpp @@ -222,6 +222,16 @@ Transform Universe::getTransform(Entity entity) const } +Matrix Universe::getRelativeMatrix(Entity entity, const Vec3& base_pos) const +{ + auto& transform = m_entities[entity.index]; + Matrix mtx = transform.rotation.toMatrix(); + mtx.setTranslation(transform.position - base_pos); + mtx.multiply3x3(transform.scale); + return mtx; +} + + Matrix Universe::getMatrix(Entity entity) const { auto& transform = m_entities[entity.index]; diff --git a/src/engine/universe/universe.h b/src/engine/universe/universe.h index 15a8a8211..a0bc71a8a 100644 --- a/src/engine/universe/universe.h +++ b/src/engine/universe/universe.h @@ -91,6 +91,7 @@ public: void setMatrix(Entity entity, const Matrix& mtx); Matrix getPositionAndRotation(Entity entity) const; Matrix getMatrix(Entity entity) const; + Matrix getRelativeMatrix(Entity entity, const Vec3& base_pos) const; void setTransform(Entity entity, const RigidTransform& transform); void setTransform(Entity entity, const Transform& transform); void setTransformKeepChildren(Entity entity, const Transform& transform); diff --git a/src/engine/viewport.cpp b/src/engine/viewport.cpp index bf604a1ae..b395f5da2 100644 --- a/src/engine/viewport.cpp +++ b/src/engine/viewport.cpp @@ -37,6 +37,14 @@ Matrix Viewport::getView() const } +Matrix Viewport::getViewRotation() const +{ + Matrix view = rot.toMatrix(); + view.fastInverse(); + return view; +} + + void Viewport::getRay(const Vec2& screen_pos, Vec3& origin, Vec3& dir) const { origin = pos; diff --git a/src/engine/viewport.h b/src/engine/viewport.h index 87461aa78..a645dea42 100644 --- a/src/engine/viewport.h +++ b/src/engine/viewport.h @@ -16,6 +16,7 @@ struct LUMIX_ENGINE_API Viewport { Matrix getProjection(bool homogenous_depth) const; Matrix getView() const; + Matrix getViewRotation() const; Frustum getFrustum() const; Frustum getFrustum(const Vec2& viewport_min_px, const Vec2& viewport_max_px) const; Vec2 worldToScreenPixels(const Vec3& world) const; diff --git a/src/renderer/editor/plugins.cpp b/src/renderer/editor/plugins.cpp index cec62b636..ee6c1a5e5 100644 --- a/src/renderer/editor/plugins.cpp +++ b/src/renderer/editor/plugins.cpp @@ -2391,7 +2391,7 @@ struct RenderInterfaceImpl LUMIX_FINAL : public RenderInterface ffr::createBuffer(vertex_buffer, vertices_count * sizeof(Vertex), vertices); ffr::ProgramHandle prg = m_shader->getProgram(0).handle; - ffr::setUniformMatrix4x3f(m_model_uniform, &mtx.m11); + ffr::setUniformMatrix4f(m_model_uniform, &mtx.m11); ffr::useProgram(prg); ffr::setVertexBuffer(&vertex_decl, vertex_buffer, 0, nullptr); ffr::setIndexBuffer(index_buffer); diff --git a/src/renderer/editor/scene_view.cpp b/src/renderer/editor/scene_view.cpp index 17e9af235..521e4a09f 100644 --- a/src/renderer/editor/scene_view.cpp +++ b/src/renderer/editor/scene_view.cpp @@ -268,6 +268,7 @@ void SceneView::renderGizmos() renderer->beginProfileBlock("gizmos"); ffr::pushDebugGroup("gizmos"); + ffr::blending(0); view->m_editor.getGizmo().render(data, viewport); ffr::popDebugGroup(); renderer->endProfileBlock(); diff --git a/src/renderer/editor/terrain_editor.cpp b/src/renderer/editor/terrain_editor.cpp index 32224db64..f8df45554 100644 --- a/src/renderer/editor/terrain_editor.cpp +++ b/src/renderer/editor/terrain_editor.cpp @@ -37,10 +37,10 @@ namespace Lumix static const ComponentType MODEL_INSTANCE_TYPE = Reflection::getComponentType("renderable"); static const ComponentType TERRAIN_TYPE = Reflection::getComponentType("terrain"); static const ComponentType HEIGHTFIELD_TYPE = Reflection::getComponentType("physical_heightfield"); -static const char* HEIGHTMAP_UNIFORM = "u_texHeightmap"; -static const char* SPLATMAP_UNIFORM = "u_texSplatmap"; -static const char* COLORMAP_UNIFORM = "u_texColormap"; -static const char* TEX_COLOR_UNIFORM = "u_texColor"; +static const char* HEIGHTMAP_UNIFORM = "u_heightmap"; +static const char* SPLATMAP_UNIFORM = "u_splatmap"; +static const char* COLORMAP_UNIFORM = "u_colormap"; +static const char* TEX_COLOR_UNIFORM = "u_detail_albedomap"; static const float MIN_BRUSH_SIZE = 0.5f; diff --git a/src/renderer/ffr/ffr.cpp b/src/renderer/ffr/ffr.cpp index ae9b65954..7a8452e5f 100644 --- a/src/renderer/ffr/ffr.cpp +++ b/src/renderer/ffr/ffr.cpp @@ -49,7 +49,7 @@ struct Texture enum { MAX_COUNT = 4096 }; GLuint handle; - bool cubemap; + GLenum target; }; @@ -119,6 +119,9 @@ static struct { HashMap* uniforms_hash_map; MT::SpinMutex handle_mutex {false}; DWORD thread; + int vertex_attributes = 0; + int instance_attributes = 0; + int max_vertex_attributes = 16; } g_ffr; @@ -156,6 +159,130 @@ static const uint D3DFMT_DXT2 = '2TXD'; static const uint D3DFMT_DXT3 = '3TXD'; static const uint D3DFMT_DXT4 = '4TXD'; static const uint D3DFMT_DXT5 = '5TXD'; +static const uint D3DFMT_DX10 = '01XD'; + +enum class DxgiFormat : uint { + UNKNOWN , + R32G32B32A32_TYPELESS , + R32G32B32A32_FLOAT , + R32G32B32A32_UINT , + R32G32B32A32_SINT , + R32G32B32_TYPELESS , + R32G32B32_FLOAT , + R32G32B32_UINT , + R32G32B32_SINT , + R16G16B16A16_TYPELESS , + R16G16B16A16_FLOAT , + R16G16B16A16_UNORM , + R16G16B16A16_UINT , + R16G16B16A16_SNORM , + R16G16B16A16_SINT , + R32G32_TYPELESS , + R32G32_FLOAT , + R32G32_UINT , + R32G32_SINT , + R32G8X24_TYPELESS , + D32_FLOAT_S8X24_UINT , + R32_FLOAT_X8X24_TYPELESS , + X32_TYPELESS_G8X24_UINT , + R10G10B10A2_TYPELESS , + R10G10B10A2_UNORM , + R10G10B10A2_UINT , + R11G11B10_FLOAT , + R8G8B8A8_TYPELESS , + R8G8B8A8_UNORM , + R8G8B8A8_UNORM_SRGB , + R8G8B8A8_UINT , + R8G8B8A8_SNORM , + R8G8B8A8_SINT , + R16G16_TYPELESS , + R16G16_FLOAT , + R16G16_UNORM , + R16G16_UINT , + R16G16_SNORM , + R16G16_SINT , + R32_TYPELESS , + D32_FLOAT , + R32_FLOAT , + R32_UINT , + R32_SINT , + R24G8_TYPELESS , + D24_UNORM_S8_UINT , + R24_UNORM_X8_TYPELESS , + X24_TYPELESS_G8_UINT , + R8G8_TYPELESS , + R8G8_UNORM , + R8G8_UINT , + R8G8_SNORM , + R8G8_SINT , + R16_TYPELESS , + R16_FLOAT , + D16_UNORM , + R16_UNORM , + R16_UINT , + R16_SNORM , + R16_SINT , + R8_TYPELESS , + R8_UNORM , + R8_UINT , + R8_SNORM , + R8_SINT , + A8_UNORM , + R1_UNORM , + R9G9B9E5_SHAREDEXP , + R8G8_B8G8_UNORM , + G8R8_G8B8_UNORM , + BC1_TYPELESS , + BC1_UNORM , + BC1_UNORM_SRGB , + BC2_TYPELESS , + BC2_UNORM , + BC2_UNORM_SRGB , + BC3_TYPELESS , + BC3_UNORM , + BC3_UNORM_SRGB , + BC4_TYPELESS , + BC4_UNORM , + BC4_SNORM , + BC5_TYPELESS , + BC5_UNORM , + BC5_SNORM , + B5G6R5_UNORM , + B5G5R5A1_UNORM , + B8G8R8A8_UNORM , + B8G8R8X8_UNORM , + R10G10B10_XR_BIAS_A2_UNORM , + B8G8R8A8_TYPELESS , + B8G8R8A8_UNORM_SRGB , + B8G8R8X8_TYPELESS , + B8G8R8X8_UNORM_SRGB , + BC6H_TYPELESS , + BC6H_UF16 , + BC6H_SF16 , + BC7_TYPELESS , + BC7_UNORM , + BC7_UNORM_SRGB , + AYUV , + Y410 , + Y416 , + NV12 , + P010 , + P016 , + OPAQUE_420 , + YUY2 , + Y210 , + Y216 , + NV11 , + AI44 , + IA44 , + P8 , + A8P8 , + B4G4R4A4_UNORM , + P208 , + V208 , + V408 , + FORCE_UINT +} ; struct PixelFormat { uint dwSize; @@ -192,6 +319,15 @@ struct Header { uint dwReserved2; }; +struct DXT10Header +{ + DxgiFormat dxgi_format; + uint resource_dimension; + uint misc_flag; + uint array_size; + uint misc_flags2; +}; + struct LoadInfo { bool compressed; bool swap; @@ -209,33 +345,38 @@ static uint sizeDXTC(uint w, uint h, GLuint format) { return ((w + 3) / 4) * ((h + 3) / 4) * (is_dxt1 || is_ati ? 8 : 16); } -static bool isDXT1(PixelFormat& pf) +static bool isDXT1(const PixelFormat& pf) { return ((pf.dwFlags & DDPF_FOURCC) && (pf.dwFourCC == D3DFMT_DXT1)); } -static bool isATI1(PixelFormat& pf) +static bool isDXT10(const PixelFormat& pf) +{ + return ((pf.dwFlags & DDPF_FOURCC) && (pf.dwFourCC == D3DFMT_DX10)); +} + +static bool isATI1(const PixelFormat& pf) { return ((pf.dwFlags & DDPF_FOURCC) && (pf.dwFourCC == D3DFMT_ATI1)); } -static bool isATI2(PixelFormat& pf) +static bool isATI2(const PixelFormat& pf) { return ((pf.dwFlags & DDPF_FOURCC) && (pf.dwFourCC == D3DFMT_ATI2)); } -static bool isDXT3(PixelFormat& pf) +static bool isDXT3(const PixelFormat& pf) { return ((pf.dwFlags & DDPF_FOURCC) && (pf.dwFourCC == D3DFMT_DXT3)); } -static bool isDXT5(PixelFormat& pf) +static bool isDXT5(const PixelFormat& pf) { return ((pf.dwFlags & DDPF_FOURCC) && (pf.dwFourCC == D3DFMT_DXT5)); } -static bool isBGRA8(PixelFormat& pf) +static bool isBGRA8(const PixelFormat& pf) { return ((pf.dwFlags & DDPF_RGB) && (pf.dwFlags & DDPF_ALPHAPIXELS) @@ -246,7 +387,7 @@ static bool isBGRA8(PixelFormat& pf) && (pf.dwAlphaBitMask == 0xff000000U)); } -static bool isBGR8(PixelFormat& pf) +static bool isBGR8(const PixelFormat& pf) { return ((pf.dwFlags & DDPF_ALPHAPIXELS) && !(pf.dwFlags & DDPF_ALPHAPIXELS) @@ -256,7 +397,7 @@ static bool isBGR8(PixelFormat& pf) && (pf.dwBBitMask == 0xff)); } -static bool isBGR5A1(PixelFormat& pf) +static bool isBGR5A1(const PixelFormat& pf) { return ((pf.dwFlags & DDPF_RGB) && (pf.dwFlags & DDPF_ALPHAPIXELS) @@ -267,7 +408,7 @@ static bool isBGR5A1(PixelFormat& pf) && (pf.dwAlphaBitMask == 0x00008000)); } -static bool isBGR565(PixelFormat& pf) +static bool isBGR565(const PixelFormat& pf) { return ((pf.dwFlags & DDPF_RGB) && !(pf.dwFlags & DDPF_ALPHAPIXELS) @@ -277,7 +418,7 @@ static bool isBGR565(PixelFormat& pf) && (pf.dwBBitMask == 0x0000001f)); } -static bool isINDEX8(PixelFormat& pf) +static bool isINDEX8(const PixelFormat& pf) { return ((pf.dwFlags & DDPF_INDEXED) && (pf.dwRGBBitCount == 8)); } @@ -300,6 +441,9 @@ static LoadInfo loadInfoATI2 = { static LoadInfo loadInfoBGRA8 = { false, false, false, 4, GL_RGBA8, GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE }; +static LoadInfo loadInfoRGBA8 = { + false, false, false, 4, GL_RGBA8, GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE +}; static LoadInfo loadInfoBGR8 = { false, false, false, 3, GL_RGB8, GL_SRGB8, GL_BGR, GL_UNSIGNED_BYTE }; @@ -313,6 +457,25 @@ static LoadInfo loadInfoIndex8 = { false, false, true, 1, GL_RGB8, GL_SRGB8, GL_BGRA, GL_UNSIGNED_BYTE }; +static LoadInfo* getDXT10LoadInfo(const Header& hdr, const DXT10Header& dxt10_hdr) +{ + switch(dxt10_hdr.dxgi_format) { + case DxgiFormat::B8G8R8A8_UNORM_SRGB: + return &loadInfoBGRA8; + break; + case DxgiFormat::B8G8R8A8_UNORM: + return &loadInfoBGRA8; + break; + case DxgiFormat::R8G8B8A8_UNORM: + return &loadInfoRGBA8; + break; + default: + ASSERT(false); + return nullptr; + break; + } +} + struct DXTColBlock { uint16_t col0; @@ -528,6 +691,7 @@ static void flipCompressedTexture(int w, int h, int format, void* surface) GLenum err = glGetError(); \ if (err != GL_NO_ERROR) { \ g_log_error.log("Renderer") << "OpenGL error " << err; \ + ASSERT(false);/**/ \ } \ } while(false) #else @@ -739,7 +903,7 @@ void bindTexture(uint unit, TextureHandle handle) if(handle.isValid()) { const Texture& t = g_ffr.textures[handle.value]; CHECK_GL(glActiveTexture(GL_TEXTURE0 + unit)); - CHECK_GL(glBindTexture(t.cubemap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D, t.handle)); + CHECK_GL(glBindTexture(t.target, t.handle)); } else { CHECK_GL(glActiveTexture(GL_TEXTURE0 + unit)); @@ -752,9 +916,9 @@ void setInstanceBuffer(const VertexDecl& decl, BufferHandle instance_buffer, int { checkThread(); const GLuint ib = g_ffr.buffers[instance_buffer.value].handle; - CHECK_GL(glBindBuffer(GL_ARRAY_BUFFER, ib)); const GLsizei stride = decl.size; + g_ffr.instance_attributes = decl.attributes_count; for (uint i = 0; i < decl.attributes_count; ++i) { const Attribute* attr = &decl.attributes[i]; const void* offset = (void*)(intptr_t)(attr->offset + byte_offset); @@ -766,20 +930,23 @@ void setInstanceBuffer(const VertexDecl& decl, BufferHandle instance_buffer, int } const int index = location_offset + i; - CHECK_GL(glEnableVertexAttribArray(index)); + CHECK_GL(glBindBuffer(GL_ARRAY_BUFFER, ib)); CHECK_GL(glVertexAttribPointer(index, attr->components_num, gl_attr_type, attr->normalized, stride, offset)); CHECK_GL(glVertexAttribDivisor(index, 1)); + CHECK_GL(glEnableVertexAttribArray(index)); } } void setVertexBuffer(const VertexDecl* decl, BufferHandle vertex_buffer, uint buffer_offset_bytes, const int* attribute_map) { + for (int i = 0; i < g_ffr.max_vertex_attributes; ++i) { + glDisableVertexAttribArray(i); + } if (decl) { const GLsizei stride = decl->size; const GLuint vb = g_ffr.buffers[vertex_buffer.value].handle; const uint vb_offset = buffer_offset_bytes; - CHECK_GL(glBindBuffer(GL_ARRAY_BUFFER, vb)); - + g_ffr.vertex_attributes = decl->attributes_count; for (uint i = 0; i < decl->attributes_count; ++i) { const Attribute* attr = &decl->attributes[i]; const void* offset = (void*)(intptr_t)(attr->offset + vb_offset); @@ -792,18 +959,18 @@ void setVertexBuffer(const VertexDecl* decl, BufferHandle vertex_buffer, uint bu const int index = attribute_map ? attribute_map[i] : i; if(index >= 0) { - CHECK_GL(glEnableVertexAttribArray(index)); + CHECK_GL(glBindBuffer(GL_ARRAY_BUFFER, vb)); CHECK_GL(glVertexAttribPointer(index, attr->components_num, gl_attr_type, attr->normalized, stride, offset)); + CHECK_GL(glVertexAttribDivisor(index, 0)); + CHECK_GL(glEnableVertexAttribArray(index)); } else { - glDisableVertexAttribArray(i); + CHECK_GL(glDisableVertexAttribArray(i)); } } } else { - GLint n; - glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &n); - for (int i = 0; i < n; ++i) { + for (int i = 0; i < g_ffr.max_vertex_attributes; ++i) { glDisableVertexAttribArray(i); } } @@ -849,6 +1016,16 @@ void setIndexBuffer(BufferHandle handle) CHECK_GL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0)); } +void resetInstanceBuffer() +{ + if (g_ffr.instance_attributes == 0) return; + + for (int i = g_ffr.vertex_attributes; i < g_ffr.max_vertex_attributes; ++i) { + glDisableVertexAttribArray(i); + } + g_ffr.instance_attributes = 0; +} + void drawElements(uint offset, uint count, PrimitiveType type) { @@ -862,19 +1039,22 @@ void drawElements(uint offset, uint count, PrimitiveType type) default: ASSERT(0); break; } + resetInstanceBuffer(); CHECK_GL(glDrawElements(pt, count, GL_UNSIGNED_SHORT, (void*)(intptr_t)(offset * sizeof(short)))); } -void drawTrianglesInstanced(uint indices_count, uint instances_count) +void drawTrianglesInstanced(uint indices_offset, uint indices_count, uint instances_count) { checkThread(); - glDrawElementsInstanced(GL_TRIANGLES, indices_count, GL_UNSIGNED_SHORT, nullptr, instances_count); + CHECK_GL(glDrawElementsInstanced(GL_TRIANGLES, indices_count, GL_UNSIGNED_SHORT, (const void*)(intptr_t)indices_offset, instances_count)); } void drawTriangles(uint indices_count) { checkThread(); + + resetInstanceBuffer(); CHECK_GL(glDrawElements(GL_TRIANGLES, indices_count, GL_UNSIGNED_SHORT, 0)); } @@ -891,6 +1071,7 @@ void drawArrays(uint offset, uint count, PrimitiveType type) default: ASSERT(0); break; } + resetInstanceBuffer(); CHECK_GL(glDrawArrays(pt, offset, count)); } @@ -912,6 +1093,22 @@ void bindUniformBuffer(uint index, BufferHandle buffer, size_t offset, size_t si } +void* map(BufferHandle buffer) +{ + checkThread(); + const GLuint buf = g_ffr.buffers[buffer.value].handle; + return glMapNamedBuffer(buf, GL_WRITE_ONLY); +} + + +void unmap(BufferHandle buffer) +{ + checkThread(); + const GLuint buf = g_ffr.buffers[buffer.value].handle; + glUnmapNamedBuffer(buf); +} + + void update(BufferHandle buffer, const void* data, size_t offset, size_t size) { checkThread(); @@ -974,21 +1171,28 @@ static struct { TextureInfo getTextureInfo(const void* data) { - const DDS::Header* hdr = (const DDS::Header*)data; - const uint mips = (hdr->dwFlags & DDS::DDSD_MIPMAPCOUNT) ? hdr->dwMipMapCount : 1; - const bool is_cubemap = (hdr->caps2.dwCaps2 & DDS::DDSCAPS2_CUBEMAP) != 0; TextureInfo info; + + const DDS::Header* hdr = (const DDS::Header*)data; info.width = hdr->dwWidth; info.height = hdr->dwHeight; - info.depth = 1; - info.layers = 1; - info.mips = mips; - info.is_cubemap = is_cubemap; + info.is_cubemap = (hdr->caps2.dwCaps2 & DDS::DDSCAPS2_CUBEMAP) != 0; + info.mips = (hdr->dwFlags & DDS::DDSD_MIPMAPCOUNT) ? hdr->dwMipMapCount : 1; + info.depth = (hdr->dwFlags & DDS::DDSD_DEPTH) ? hdr->dwDepth : 1; + + if (isDXT10(hdr->pixelFormat)) { + const DDS::DXT10Header* hdr_dxt10 = (const DDS::DXT10Header*)((const u8*)data + sizeof(DDS::Header)); + info.layers = hdr_dxt10->array_size; + } + else { + info.layers = 1; + } + return info; } -bool loadTexture(TextureHandle handle, const void* input, int input_size, uint flags, TextureInfo* info) +bool loadTexture(TextureHandle handle, const void* input, int input_size, uint flags) { checkThread(); DDS::Header hdr; @@ -1004,6 +1208,7 @@ bool loadTexture(TextureHandle handle, const void* input, int input_size, uint f } DDS::LoadInfo* li; + int layers = 1; if (isDXT1(hdr.pixelFormat)) { li = &DDS::loadInfoDXT1; @@ -1035,110 +1240,132 @@ bool loadTexture(TextureHandle handle, const void* input, int input_size, uint f else if (isINDEX8(hdr.pixelFormat)) { li = &DDS::loadInfoIndex8; } + else if (isDXT10(hdr.pixelFormat)) { + DDS::DXT10Header dxt10_hdr; + blob.read(dxt10_hdr); + li = DDS::getDXT10LoadInfo(hdr, dxt10_hdr); + layers = dxt10_hdr.array_size; + } else { + ASSERT(false); return false; } const bool is_cubemap = (hdr.caps2.dwCaps2 & DDS::DDSCAPS2_CUBEMAP) != 0; + const GLenum texture_target = is_cubemap ? GL_TEXTURE_CUBE_MAP : layers > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D; + const bool is_srgb = flags & (u32)TextureFlags::SRGB; + const GLenum internal_format = is_srgb ? li->internalSRGBFormat : li->internalFormat; + const uint mipMapCount = (hdr.dwFlags & DDS::DDSD_MIPMAPCOUNT) ? hdr.dwMipMapCount : 1; + GLuint texture; - glGenTextures(1, &texture); + CHECK_GL(glCreateTextures(texture_target, 1, &texture)); if (texture == 0) { return false; } - - const bool is_srgb = flags & (u32)TextureFlags::SRGB; - const GLenum internal_format = is_srgb ? li->internalSRGBFormat : li->internalFormat; - - const uint mipMapCount = (hdr.dwFlags & DDS::DDSD_MIPMAPCOUNT) ? hdr.dwMipMapCount : 1; - for(int side = 0; side < (is_cubemap ? 6 : 1); ++side) { - - uint width = hdr.dwWidth; - uint height = hdr.dwHeight; - - const GLenum tex_img_target = is_cubemap ? GL_TEXTURE_CUBE_MAP_POSITIVE_X + side : GL_TEXTURE_2D; - const GLenum texture_target = is_cubemap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D; - CHECK_GL(glBindTexture(texture_target, texture)); - - if (li->compressed) { - uint size = DDS::sizeDXTC(width, height, internal_format); - if (size != hdr.dwPitchOrLinearSize || (hdr.dwFlags & DDS::DDSD_LINEARSIZE) == 0) { - CHECK_GL(glDeleteTextures(1, &texture)); - return false; - } - Array data(*g_ffr.allocator); - data.resize(size); - for (uint ix = 0; ix < mipMapCount; ++ix) { - blob.read(&data[0], size); - //DDS::flipCompressedTexture(width, height, internal_format, &data[0]); - CHECK_GL(glCompressedTexImage2D(tex_img_target, ix, internal_format, width, height, 0, size, &data[0])); - CHECK_GL(glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR)); - CHECK_GL(glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER, GL_LINEAR)); - width = Math::maximum(1, width >> 1); - height = Math::maximum(1, height >> 1); - size = DDS::sizeDXTC(width, height, internal_format); - } - } - else if (li->palette) { - if ((hdr.dwFlags & DDS::DDSD_PITCH) == 0 || hdr.pixelFormat.dwRGBBitCount != 8) { - CHECK_GL(glDeleteTextures(1, &texture)); - return false; - } - uint size = hdr.dwPitchOrLinearSize * height; - if (size != width * height * li->blockBytes) { - CHECK_GL(glDeleteTextures(1, &texture)); - return false; - } - Array data(*g_ffr.allocator); - data.resize(size); - uint palette[256]; - Array unpacked(*g_ffr.allocator); - unpacked.resize(size); - blob.read(palette, 4 * 256); - for (uint ix = 0; ix < mipMapCount; ++ix) { - blob.read(&data[0], size); - for (uint zz = 0; zz < size; ++zz) { - unpacked[zz] = palette[data[zz]]; - } - //glPixelStorei(GL_UNPACK_ROW_LENGTH, height); - CHECK_GL(glTexImage2D(tex_img_target, ix, internal_format, width, height, 0, li->externalFormat, li->type, &unpacked[0])); - width = Math::maximum(1, width >> 1); - height = Math::maximum(1, height >> 1); - size = width * height * li->blockBytes; - } - } - else { - if (li->swap) { - CHECK_GL(glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE)); - } - uint size = width * height * li->blockBytes; - Array data(*g_ffr.allocator); - data.resize(size); - for (uint ix = 0; ix < mipMapCount; ++ix) { - blob.read(&data[0], size); - //glPixelStorei(GL_UNPACK_ROW_LENGTH, height); - CHECK_GL(glTexImage2D(tex_img_target, ix, internal_format, width, height, 0, li->externalFormat, li->type, &data[0])); - width = Math::maximum(1, width >> 1); - height = Math::maximum(1, height >> 1); - size = width * height * li->blockBytes; - } - CHECK_GL(glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE)); - } - CHECK_GL(glTexParameteri(texture_target, GL_TEXTURE_MAX_LEVEL, mipMapCount - 1)); + if(layers > 1) { + CHECK_GL(glTextureStorage3D(texture, mipMapCount, internal_format, hdr.dwWidth, hdr.dwHeight, layers)); + } + else { + CHECK_GL(glTextureStorage2D(texture, mipMapCount, internal_format, hdr.dwWidth, hdr.dwHeight)); } - if(info) { - info->width = hdr.dwWidth; - info->height = hdr.dwHeight; - info->depth = 1; - info->layers = 1; - info->mips = mipMapCount; - info->is_cubemap = is_cubemap; + for (int layer = 0; layer < layers; ++layer) { + for(int side = 0; side < (is_cubemap ? 6 : 1); ++side) { + const GLenum tex_img_target = is_cubemap ? GL_TEXTURE_CUBE_MAP_POSITIVE_X + side : layers > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D; + uint width = hdr.dwWidth; + uint height = hdr.dwHeight; + + if (li->compressed) { + uint size = DDS::sizeDXTC(width, height, internal_format); + if (size != hdr.dwPitchOrLinearSize || (hdr.dwFlags & DDS::DDSD_LINEARSIZE) == 0) { + CHECK_GL(glDeleteTextures(1, &texture)); + return false; + } + Array data(*g_ffr.allocator); + data.resize(size); + for (uint mip = 0; mip < mipMapCount; ++mip) { + blob.read(&data[0], size); + //DDS::flipCompressedTexture(width, height, internal_format, &data[0]); + if(layers > 1) { + CHECK_GL(glCompressedTextureSubImage3D(texture, mip, 0, 0, layer, width, height, 1, internal_format, size, &data[0])); + } + else if (is_cubemap) { + ASSERT(layer == 0); + CHECK_GL(glCompressedTextureSubImage3D(texture, mip, 0, 0, side, width, height, 1, internal_format, size, &data[0])); + } + else { + CHECK_GL(glCompressedTextureSubImage2D(texture, mip, 0, 0, width, height, internal_format, size, &data[0])); + } + CHECK_GL(glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR)); + CHECK_GL(glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR)); + width = Math::maximum(1, width >> 1); + height = Math::maximum(1, height >> 1); + size = DDS::sizeDXTC(width, height, internal_format); + } + } + else if (li->palette) { + if ((hdr.dwFlags & DDS::DDSD_PITCH) == 0 || hdr.pixelFormat.dwRGBBitCount != 8) { + CHECK_GL(glDeleteTextures(1, &texture)); + return false; + } + uint size = hdr.dwPitchOrLinearSize * height; + if (size != width * height * li->blockBytes) { + CHECK_GL(glDeleteTextures(1, &texture)); + return false; + } + Array data(*g_ffr.allocator); + data.resize(size); + uint palette[256]; + Array unpacked(*g_ffr.allocator); + unpacked.resize(size); + blob.read(palette, 4 * 256); + for (uint ix = 0; ix < mipMapCount; ++ix) { + blob.read(&data[0], size); + for (uint zz = 0; zz < size; ++zz) { + unpacked[zz] = palette[data[zz]]; + } + //glPixelStorei(GL_UNPACK_ROW_LENGTH, height); + if(layers > 1) { + CHECK_GL(glTextureSubImage3D(tex_img_target, ix, internal_format, width, height, layer, 0, li->externalFormat, li->type, &unpacked[0])); + } + else { + CHECK_GL(glTextureSubImage2D(tex_img_target, ix, internal_format, width, height, 0, li->externalFormat, li->type, &unpacked[0])); + } + width = Math::maximum(1, width >> 1); + height = Math::maximum(1, height >> 1); + size = width * height * li->blockBytes; + } + } + else { + if (li->swap) { + CHECK_GL(glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE)); + } + uint size = width * height * li->blockBytes; + Array data(*g_ffr.allocator); + data.resize(size); + for (uint mip = 0; mip < mipMapCount; ++mip) { + blob.read(&data[0], size); + //glPixelStorei(GL_UNPACK_ROW_LENGTH, height); + if (layers > 1) { + CHECK_GL(glTextureSubImage3D(texture, mip, 0, 0, layer, width, height, 1, li->externalFormat, li->type, &data[0])); + } + else { + CHECK_GL(glTextureSubImage2D(texture, mip, 0, 0, width, height, li->externalFormat, li->type, &data[0])); + } + width = Math::maximum(1, width >> 1); + height = Math::maximum(1, height >> 1); + size = width * height * li->blockBytes; + } + CHECK_GL(glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE)); + } + CHECK_GL(glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, mipMapCount - 1)); + } } Texture& t = g_ffr.textures[handle.value]; t.handle = texture; - t.cubemap = is_cubemap; + t.target = is_cubemap ? GL_TEXTURE_CUBE_MAP : layers > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D; return true; } @@ -1215,7 +1442,7 @@ bool createTexture(TextureHandle handle, uint w,uint h, TextureFormat format, ui Texture& t = g_ffr.textures[handle.value]; t.handle = texture; - t.cubemap = false; + t.target = GL_TEXTURE_2D; return true; } @@ -1416,6 +1643,7 @@ ProgramHandle createProgram(const char** srcs, const ShaderType* types, int num, glGetActiveUniform(prg, i, sizeof(name), nullptr, &size, &type, name); switch(type) { case GL_SAMPLER_CUBE: + case GL_SAMPLER_2D_ARRAY: case GL_SAMPLER_2D: case GL_INT: ffr_type = UniformType::INT; break; case GL_FLOAT: ffr_type = UniformType::FLOAT; break; @@ -1476,6 +1704,8 @@ bool init(void* window_handle) if (!load_gl(g_ffr.device_context)) return false; + glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &g_ffr.max_vertex_attributes); + /* int extensions_count; glGetIntegerv(GL_NUM_EXTENSIONS, &extensions_count); for(int i = 0; i < extensions_count; ++i) { diff --git a/src/renderer/ffr/ffr.h b/src/renderer/ffr/ffr.h index bc077d63f..72e263486 100644 --- a/src/renderer/ffr/ffr.h +++ b/src/renderer/ffr/ffr.h @@ -151,7 +151,7 @@ ProgramHandle createProgram(const char** srcs, const ShaderType* types, int num, void useProgram(ProgramHandle prg); void createBuffer(BufferHandle handle, size_t size, const void* data); bool createTexture(TextureHandle handle, uint w, uint h, TextureFormat format, uint flags, const void* data); -bool loadTexture(TextureHandle handle, const void* data, int size, uint flags, TextureInfo* info); +bool loadTexture(TextureHandle handle, const void* data, int size, uint flags); FramebufferHandle createFramebuffer(uint renderbuffers_count, const TextureHandle* renderbuffers); QueryHandle createQuery(); @@ -161,13 +161,14 @@ void bindTexture(uint unit, TextureHandle handle); void uniformBlockBinding(ProgramHandle program, const char* block_name, uint binding); void update(FramebufferHandle fb, uint renderbuffers_count, const TextureHandle* renderbuffers); void update(BufferHandle buffer, const void* data, size_t offset, size_t size); +void* map(BufferHandle buffer); +void unmap(BufferHandle buffer); void bindUniformBuffer(uint index, BufferHandle buffer, size_t offset, size_t size); void getTextureImage(ffr::TextureHandle texture, uint size, void* buf); TextureInfo getTextureInfo(const void* data); void queryTimestamp(QueryHandle query); u64 getQueryResult(QueryHandle query); - void destroy(ProgramHandle program); void destroy(BufferHandle buffer); void destroy(TextureHandle texture); @@ -177,7 +178,7 @@ void destroy(UniformHandle query); void setIndexBuffer(BufferHandle handle); void drawTriangles(uint indices_count); -void drawTrianglesInstanced(uint indices_count, uint instances_count); +void drawTrianglesInstanced(uint indices_offset_bytes, uint indices_count, uint instances_count); void drawElements(uint offset, uint count, PrimitiveType type); void drawArrays(uint offset, uint count, PrimitiveType type); diff --git a/src/renderer/ffr/gl_ext.h b/src/renderer/ffr/gl_ext.h index ca2642c5f..cf489002c 100644 --- a/src/renderer/ffr/gl_ext.h +++ b/src/renderer/ffr/gl_ext.h @@ -26,6 +26,11 @@ FFR_GL_IMPORT(PFNGLCHECKFRAMEBUFFERSTATUSPROC, glCheckFramebufferStatus); FFR_GL_IMPORT(PFNGLCLIPCONTROLPROC, glClipControl); FFR_GL_IMPORT(PFNGLCOMPILESHADERPROC, glCompileShader); FFR_GL_IMPORT(PFNGLCOMPRESSEDTEXIMAGE2DPROC, glCompressedTexImage2D); +FFR_GL_IMPORT(PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC, glCompressedTexSubImage2D); +FFR_GL_IMPORT(PFNGLCOMPRESSEDTEXTURESUBIMAGE2DPROC, glCompressedTextureSubImage2D); +FFR_GL_IMPORT(PFNGLCOMPRESSEDTEXIMAGE3DPROC, glCompressedTexImage3D); +FFR_GL_IMPORT(PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC, glCompressedTexSubImage3D); +FFR_GL_IMPORT(PFNGLCOMPRESSEDTEXTURESUBIMAGE3DPROC, glCompressedTextureSubImage3D); FFR_GL_IMPORT(PFNGLCREATEFRAMEBUFFERSPROC, glCreateFramebuffers); FFR_GL_IMPORT(PFNGLCREATEPROGRAMPROC, glCreateProgram); FFR_GL_IMPORT(PFNGLCREATESHADERPROC, glCreateShader); @@ -62,6 +67,8 @@ FFR_GL_IMPORT(PFNGLGETTEXTUREIMAGEPROC, glGetTextureImage); FFR_GL_IMPORT(PFNGLGETUNIFORMBLOCKINDEXPROC, glGetUniformBlockIndex); FFR_GL_IMPORT(PFNGLGETUNIFORMLOCATIONPROC, glGetUniformLocation); FFR_GL_IMPORT(PFNGLLINKPROGRAMPROC, glLinkProgram); +FFR_GL_IMPORT(PFNGLMAPBUFFERPROC, glMapBuffer); +FFR_GL_IMPORT(PFNGLMAPNAMEDBUFFERPROC, glMapNamedBuffer); FFR_GL_IMPORT(PFNGLNAMEDFRAMEBUFFERRENDERBUFFERPROC, glNamedFramebufferRenderbuffer); FFR_GL_IMPORT(PFNGLNAMEDFRAMEBUFFERTEXTUREPROC, glNamedFramebufferTexture); FFR_GL_IMPORT(PFNGLPOPDEBUGGROUPPROC, glPopDebugGroup); @@ -69,7 +76,13 @@ FFR_GL_IMPORT(PFNGLPUSHDEBUGGROUPPROC, glPushDebugGroup); FFR_GL_IMPORT(PFNGLQUERYCOUNTERPROC, glQueryCounter); FFR_GL_IMPORT(PFNGLSHADERSOURCEPROC, glShaderSource); FFR_GL_IMPORT(PFNGLTEXBUFFERPROC, glTexBuffer); +FFR_GL_IMPORT(PFNGLTEXIMAGE3DPROC, glTexImage3D); +FFR_GL_IMPORT(PFNGLTEXSUBIMAGE3DPROC, glTexSubImage3D); +FFR_GL_IMPORT(PFNGLTEXTURESUBIMAGE2DPROC, glTextureSubImage2D); +FFR_GL_IMPORT(PFNGLTEXTURESUBIMAGE3DPROC, glTextureSubImage3D); +FFR_GL_IMPORT(PFNGLTEXTUREPARAMETERIPROC, glTextureParameteri); FFR_GL_IMPORT(PFNGLTEXTURESTORAGE2DPROC, glTextureStorage2D); +FFR_GL_IMPORT(PFNGLTEXTURESTORAGE3DPROC, glTextureStorage3D); FFR_GL_IMPORT(PFNGLUNIFORM1IPROC, glUniform1i); FFR_GL_IMPORT(PFNGLUNIFORM1FVPROC, glUniform1fv); FFR_GL_IMPORT(PFNGLUNIFORM2FVPROC, glUniform2fv); @@ -79,6 +92,8 @@ FFR_GL_IMPORT(PFNGLUNIFORMMATRIX3X4FVPROC, glUniformMatrix3x4fv); FFR_GL_IMPORT(PFNGLUNIFORMMATRIX4FVPROC, glUniformMatrix4fv); FFR_GL_IMPORT(PFNGLUNIFORMMATRIX4X3FVPROC, glUniformMatrix4x3fv); FFR_GL_IMPORT(PFNGLUNIFORMBLOCKBINDINGPROC, glUniformBlockBinding); +FFR_GL_IMPORT(PFNGLUNMAPBUFFERPROC, glUnmapBuffer); +FFR_GL_IMPORT(PFNGLUNMAPNAMEDBUFFERPROC, glUnmapNamedBuffer); FFR_GL_IMPORT(PFNGLUSEPROGRAMPROC, glUseProgram); FFR_GL_IMPORT(PFNGLVERTEXATTRIBDIVISORARBPROC, glVertexAttribDivisor); FFR_GL_IMPORT(PFNGLVERTEXATTRIBPOINTERPROC, glVertexAttribPointer); \ No newline at end of file diff --git a/src/renderer/material.cpp b/src/renderer/material.cpp index 40f5e247d..202e6dbe4 100644 --- a/src/renderer/material.cpp +++ b/src/renderer/material.cpp @@ -651,11 +651,16 @@ int texture(lua_State* L) return 0; } lua_pop(L, 1); + + + Texture* texture = material->getTexture(material->getTextureCount() - 1); + bool keep_data = false; + LuaWrapper::getOptionalField(L, 1, "keep_data", &keep_data); + if (keep_data) texture->addDataReference(); lua_getfield(L, 1, "srgb"); if (lua_isboolean(L, -1)) { const bool srgb = lua_toboolean(L, -1); - Texture* texture = material->getTexture(material->getTextureCount() - 1); texture->setSRGB(srgb); } else if (!lua_isnil(L, -1)) { diff --git a/src/renderer/pipeline.cpp b/src/renderer/pipeline.cpp index fc5c808cc..a1ca77a93 100644 --- a/src/renderer/pipeline.cpp +++ b/src/renderer/pipeline.cpp @@ -7,6 +7,7 @@ #include "engine/fs/file_system.h" #include "engine/fs/ifile_device.h" #include "engine/geometry.h" +#include "engine/job_system.h" #include "engine/log.h" #include "engine/lua_wrapper.h" #include "engine/mt/atomic.h" @@ -23,10 +24,13 @@ #include "render_scene.h" #include "shader.h" #include "shader_manager.h" +#include "terrain.h" #include "texture.h" #include "texture_manager.h" +#include #include + namespace Lumix { @@ -65,6 +69,10 @@ struct PipelineImpl LUMIX_FINAL : Pipeline m_draw2d.PushClipRectFullScreen(); m_draw2d.PushTextureID(font_atlas.TexID); + m_terrain_params_uniform = ffr::allocUniform("u_terrain_params", ffr::UniformType::VEC4, 1); + m_rel_camera_pos_uniform = ffr::allocUniform("u_rel_camera_pos", ffr::UniformType::VEC4, 1); + m_terrain_scale_uniform = ffr::allocUniform("u_terrain_scale", ffr::UniformType::VEC4, 1); + m_terrain_matrix_uniform = ffr::allocUniform("u_terrain_matrix", ffr::UniformType::MAT4, 1); m_model_uniform = ffr::allocUniform("u_model", ffr::UniformType::MAT4, 1); m_bones_uniform = ffr::allocUniform("u_bones", ffr::UniformType::MAT4, 196); m_canvas_size_uniform = ffr::allocUniform("u_canvas_size", ffr::UniformType::VEC2, 1); @@ -306,7 +314,7 @@ struct PipelineImpl LUMIX_FINAL : Pipeline Renderer::GlobalState state; state.camera_pos = Vec4(m_viewport.pos, 1); - const Matrix view = m_viewport.getView(); + const Matrix view = m_viewport.getViewRotation(); const Matrix projection = m_viewport.getProjection(ffr::isHomogenousDepth()); state.camera_projection = projection; state.camera_view = view; @@ -609,12 +617,95 @@ struct PipelineImpl LUMIX_FINAL : Pipeline } + static int renderTerrains(lua_State* L) + { + PROFILE_FUNCTION(); + const int pipeline_idx = lua_upvalueindex(1); + if (lua_type(L, pipeline_idx) != LUA_TLIGHTUSERDATA) { + LuaWrapper::argError(L, pipeline_idx); + } + PipelineImpl* pipeline = LuaWrapper::toType(L, pipeline_idx); + const char* define = LuaWrapper::checkArg(L, 1); + + CameraParams cp; + + lua_getfield(L, 2, "frustum"); + if (!lua_istable(L, -1)) { + lua_pop(L, 1); + luaL_error(L, "Frustum is not a table"); + } + float* points = cp.frustum.xs; + for (int i = 0; i < 32 + 24; ++i) { + lua_rawgeti(L, -1, i + 1); + if(!LuaWrapper::isType(L, -1)) { + lua_pop(L, 2); + luaL_error(L, "Frustum must contain exactly 24 floats"); + } + points[i] = LuaWrapper::toType(L, -1); + lua_pop(L, 1); + } + cp.frustum.setPlanesFromPoints(); + + if(!LuaWrapper::checkField(L, 2, "lod_multiplier", &cp.lod_multiplier)) { + luaL_error(L, "Missing lod_multiplier in camera params"); + } + + if(!LuaWrapper::checkField(L, 2, "position", &cp.pos)) { + luaL_error(L, "Missing position in camera params"); + } + + IAllocator& allocator = pipeline->m_renderer.getAllocator(); + RenderTerrainsCommand* cmd = LUMIX_NEW(allocator, RenderTerrainsCommand)(allocator); + + if (lua_gettop(L) > 3 && lua_istable(L, 3)) { + lua_pushnil(L); + while (lua_next(L, 3) != 0) { + if(lua_type(L, -1) != LUA_TNUMBER) { + g_log_error.log("Renderer") << "Incorrect global textures arguments of renderTerrains"; + LUMIX_DELETE(pipeline->m_renderer.getAllocator(), cmd); + lua_pop(L, 2); + return 0; + } + + if(lua_type(L, -2) != LUA_TSTRING) { + g_log_error.log("Renderer") << "Incorrect global textures arguments of renderTerrains"; + LUMIX_DELETE(pipeline->m_renderer.getAllocator(), cmd); + lua_pop(L, 2); + return 0; + } + + if (cmd->m_global_textures_count > lengthOf(cmd->m_global_textures)) { + g_log_error.log("Renderer") << "Too many textures in renderTerrains call"; + LUMIX_DELETE(pipeline->m_renderer.getAllocator(), cmd); + lua_pop(L, 2); + return 0; + } + + const char* uniform = lua_tostring(L, -2); + const int rb_idx = (int)lua_tointeger(L, -1); + auto& t = cmd->m_global_textures[cmd->m_global_textures_count]; + t.texture = pipeline->m_renderbuffers[rb_idx].handle; + t.uniform = ffr::allocUniform(uniform, ffr::UniformType::INT, 1); + ++cmd->m_global_textures_count; + + lua_pop(L, 1); + } + } + + cmd->m_pipeline = pipeline; + cmd->m_camera_params = cp; + cmd->m_shader_define = define; + pipeline->m_renderer.push(cmd); + return 0; + } + + static int renderMeshes(lua_State* L) { PROFILE_FUNCTION(); const int pipeline_idx = lua_upvalueindex(1); if (lua_type(L, pipeline_idx) != LUA_TLIGHTUSERDATA) { - LuaWrapper::argError(L, 1); + LuaWrapper::argError(L, pipeline_idx); } PipelineImpl* pipeline = LuaWrapper::toType(L, pipeline_idx); @@ -737,7 +828,7 @@ struct PipelineImpl LUMIX_FINAL : Pipeline const int pipeline_idx = lua_upvalueindex(1); if (lua_type(L, pipeline_idx) != LUA_TLIGHTUSERDATA) { - LuaWrapper::argError(L, 1); + LuaWrapper::argError(L, pipeline_idx); } PipelineImpl* pipeline = LuaWrapper::toType(L, pipeline_idx); const int indices_offset = LuaWrapper::checkArg(L, 1); @@ -891,7 +982,7 @@ struct PipelineImpl LUMIX_FINAL : Pipeline { const int pipeline_idx = lua_upvalueindex(1); if (lua_type(L, pipeline_idx) != LUA_TLIGHTUSERDATA) { - LuaWrapper::argError(L, 1); + LuaWrapper::argError(L, pipeline_idx); } PipelineImpl* pipeline = LuaWrapper::toType(L, pipeline_idx); @@ -959,7 +1050,7 @@ struct PipelineImpl LUMIX_FINAL : Pipeline { const int pipeline_idx = lua_upvalueindex(1); if (lua_type(L, pipeline_idx) != LUA_TLIGHTUSERDATA) { - LuaWrapper::argError(L, 1); + LuaWrapper::argError(L, pipeline_idx); } PipelineImpl* pipeline = LuaWrapper::toType(L, pipeline_idx); @@ -1039,7 +1130,7 @@ struct PipelineImpl LUMIX_FINAL : Pipeline { const int pipeline_idx = lua_upvalueindex(1); if (lua_type(L, pipeline_idx) != LUA_TLIGHTUSERDATA) { - LuaWrapper::argError(L, 1); + LuaWrapper::argError(L, pipeline_idx ); } PipelineImpl* pipeline = LuaWrapper::toType(L, pipeline_idx); @@ -1129,16 +1220,166 @@ struct PipelineImpl LUMIX_FINAL : Pipeline } } + struct RenderTerrainsCommand : Renderer::RenderCommandBase + { + RenderTerrainsCommand(IAllocator& allocator) + : m_allocator(allocator) + , m_instance_data(allocator) + , m_batches(allocator) + { + } + + void setup() override + { + PROFILE_FUNCTION(); + Array infos(m_allocator); + m_pipeline->m_scene->getTerrainInfos(m_camera_params.frustum, m_camera_params.pos, infos); + + if (infos.empty()) return; + + m_define_mask = m_shader_define.empty() + ? 0 + : 1 << m_pipeline->m_renderer.getShaderDefineIdx(m_shader_define); + + std::sort(infos.begin(), infos.end(), [](const TerrainInfo& a, const TerrainInfo& b) { + if (a.m_terrain == b.m_terrain) return a.m_index < b.m_index; + return a.m_terrain < b.m_terrain; + }); + + m_instance_data.resize(infos.size()); + Terrain* prev_terrain = nullptr; + int prev_idx = -1; + int prev_submesh = -1; + for (int i = 0, c = infos.size(); i < c; ++i) { + const TerrainInfo& info = infos[i]; + if (info.m_terrain != prev_terrain || prev_submesh != info.m_index) { + if (prev_terrain) { + Batch& b = m_batches.emplace(); + b.terrain = prev_terrain; + b.shader = infos[prev_idx].m_shader; + b.matrix = infos[prev_idx].m_world_matrix; + b.matrix.setTranslation(b.matrix.getTranslation() - m_camera_params.pos); + b.submesh = infos[prev_idx].m_index; + b.from = prev_idx; + b.to = i - 1; + } + prev_idx = i; + prev_terrain = info.m_terrain; + prev_submesh = info.m_index; + } + m_instance_data[i].size = info.m_size; + m_instance_data[i].quad_min = info.m_min; + m_instance_data[i].morph_consts = info.m_morph_const; + } + Batch& b = m_batches.emplace(); + b.terrain = prev_terrain; + b.shader = infos[prev_idx].m_shader; + b.matrix = infos[prev_idx].m_world_matrix; + b.matrix.setTranslation(b.matrix.getTranslation() - m_camera_params.pos); + b.submesh = infos[prev_idx].m_index; + b.from = prev_idx; + b.to = infos.size() - 1; + } + + void execute() override + { + if(m_instance_data.empty()) return; + + ffr::pushDebugGroup("terrains"); + Renderer::TransientSlice instance_buffer = m_pipeline->m_renderer.allocTransient(m_instance_data.byte_size()); + ffr::update(instance_buffer.buffer, m_instance_data.begin(), 0, m_instance_data.byte_size()); + + ffr::VertexDecl decl; + decl.addAttribute(3, ffr::AttributeType::FLOAT, false, false); + decl.addAttribute(2, ffr::AttributeType::FLOAT, false, false); + + ffr::VertexDecl instance_decl; + instance_decl.addAttribute(3, ffr::AttributeType::FLOAT, false, false); + instance_decl.addAttribute(1, ffr::AttributeType::FLOAT, false, false); + instance_decl.addAttribute(3, ffr::AttributeType::FLOAT, false, false); + + const Vec3 camera_pos = m_camera_params.pos; + + for (const Batch& batch : m_batches) { + Texture* detail_texture = batch.terrain->getDetailTexture(); + if (!detail_texture) continue; + Texture* splat_texture = batch.terrain->getSplatmap(); + if (!splat_texture) continue; + + const Matrix inv_world_matrix = batch.matrix.fastInverted(); + const Vec4 rel_cam_pos(inv_world_matrix.transformPoint(camera_pos) / batch.terrain->getXZScale(), 1); + const Vec4 terrain_scale(batch.terrain->getScale(), 0); + const Vec4 terrain_params(batch.terrain->getRootSize() + , (float)detail_texture->width + , (float)splat_texture->width + , 0); + ffr::setUniform4f(m_pipeline->m_terrain_params_uniform, &terrain_params.x); + ffr::setUniform4f(m_pipeline->m_rel_camera_pos_uniform, &rel_cam_pos.x); + ffr::setUniform4f(m_pipeline->m_terrain_scale_uniform, &terrain_scale.x); + ffr::setUniformMatrix4f(m_pipeline->m_terrain_matrix_uniform, &batch.matrix.m11); + + const ffr::ProgramHandle prg = batch.shader->getProgram(m_define_mask).handle; + ffr::useProgram(prg); + /* + for (int i = 0; i < m_global_textures_count; ++i) { + const auto& t = m_global_textures[i]; + ffr::bindTexture(i, t.texture); + ffr::setUniform1i(t.uniform, i); + } + */ + const Material* material = batch.terrain->m_material; + const int textures_count = material->getTextureCount(); + for (int i = 0; i < textures_count; ++i) { + ffr::bindTexture(i + 0, material->getTexture(i)->handle); + ffr::setUniform1i(material->getTextureUniform(i), i + 0); + } + + const Mesh& mesh = *batch.terrain->getMesh(); + ffr::setVertexBuffer(&decl, mesh.vertex_buffer_handle, 0, nullptr); + ffr::setInstanceBuffer(instance_decl, instance_buffer.buffer, instance_buffer.offset + batch.from * sizeof(m_instance_data[0]), 2); + ffr::setIndexBuffer(mesh.index_buffer_handle); + ffr::setState(u64(ffr::StateFlags::DEPTH_TEST) | batch.terrain->m_material->getRenderStates()); + const int submesh_indices_count = mesh.indices_count / 4; + ffr::drawTrianglesInstanced(batch.submesh * submesh_indices_count * sizeof(u16), submesh_indices_count , 1 + batch.to - batch.from); + } + ffr::popDebugGroup(); + } + + struct InstanceData + { + Vec3 quad_min; + float size; + Vec3 morph_consts; + }; + + struct Batch + { + Terrain* terrain; + Shader* shader; + Matrix matrix; + uint submesh; + uint from; + uint to; + }; + + IAllocator& m_allocator; + PipelineImpl* m_pipeline; + CameraParams m_camera_params; + StaticString<32> m_shader_define; + u32 m_define_mask; + Array m_instance_data; + Array m_batches; + struct { + ffr::TextureHandle texture; + ffr::UniformHandle uniform; + } m_global_textures[16]; + int m_global_textures_count = 0; + + }; + struct RenderMeshesCommand : Renderer::RenderCommandBase { - struct Mesh { - Matrix mtx; - Lumix::Mesh* mesh; // TODO - Entity owner; - }; - - void setup() override { if(!pipeline->m_scene) return; @@ -1150,26 +1391,49 @@ struct PipelineImpl LUMIX_FINAL : Pipeline RenderScene* scene = pipeline->getScene(); const Universe& universe = scene->getUniverse(); const Frustum frustum = camera_params.frustum;; - const Vec3 pos = camera_params.pos; + const Vec3 camera_pos = camera_params.pos; const float lod_multiplier = camera_params.lod_multiplier; Array> meshes(renderer.getAllocator()); - scene->getModelInstanceInfos(frustum, pos, lod_multiplier, layer_mask, meshes); + scene->getModelInstanceInfos(frustum, camera_pos, lod_multiplier, layer_mask, meshes); int count = 0; for(const auto& submeshes : meshes) count += submeshes.size(); - meshes_mem = renderer.allocate(sizeof(Mesh) * count); - this->meshes = (Mesh*)meshes_mem.data; - meshes_count = 0; - for(const auto& submeshes : meshes) { - for(const auto& mesh : submeshes) { - auto& m = this->meshes[meshes_count]; - m.mtx = universe.getMatrix(mesh.owner); - m.mesh = mesh.mesh; - m.owner = mesh.owner; - ++meshes_count; + m_meshes_mem = renderer.allocate((sizeof(Matrix) + sizeof(Mesh*) + sizeof(Entity)) * count); + m_meshes.matrices = (Matrix*)m_meshes_mem.data; + m_meshes.meshes = (Mesh**)&m_meshes.matrices[count]; + m_meshes.owners = (Entity*)&m_meshes.meshes[count]; + m_meshes.count = 0; + + JobSystem::JobDecl jobs[64]; + JobSystem::LambdaJob job_storage[64]; + + ASSERT(meshes.size() < lengthOf(jobs)); + + if (!meshes.empty()) { + volatile int counter = 0; + for(const auto& submeshes : meshes) { + const int idx = int(&submeshes - &meshes[0]); + const int offset = m_meshes.count; + JobSystem::fromLambda([idx, this, &meshes, &universe, camera_pos, offset](){ + const auto& submeshes = meshes[idx]; + int midx = 0; + Mesh** LUMIX_RESTRICT meshes = &m_meshes.meshes[offset]; + Matrix* LUMIX_RESTRICT matrices = &m_meshes.matrices[offset]; + Entity* LUMIX_RESTRICT owners = &m_meshes.owners[offset]; + + for(const auto& mesh : submeshes) { + matrices[midx] = universe.getRelativeMatrix(mesh.owner, camera_pos); + meshes[midx] = mesh.mesh; + owners[midx] = mesh.owner; + ++midx; + } + }, &job_storage[idx], &jobs[idx], nullptr); + m_meshes.count += submeshes.size(); } + JobSystem::runJobs(jobs, meshes.size(), &counter); + JobSystem::wait(&counter); } - MT::atomicAdd(&pipeline->m_stats.draw_call_count, meshes_count); + MT::atomicAdd(&pipeline->m_stats.draw_call_count, m_meshes.count); const Entity probe = scene->getNearestEnvironmentProbe(pipeline->m_viewport.pos); if (probe.isValid()) { @@ -1185,7 +1449,7 @@ struct PipelineImpl LUMIX_FINAL : Pipeline } - void renderSkinnedMesh(const Mesh& mesh, const Pose& pose, const Model& model, int model_uniform_loc, int bones_uniform_loc) const + void renderSkinnedMesh(const Matrix& matrix, const Mesh& mesh, const Pose& pose, const Model& model, int model_uniform_loc, int bones_uniform_loc) const { Matrix bone_mtx[196]; @@ -1201,8 +1465,8 @@ struct PipelineImpl LUMIX_FINAL : Pipeline } ffr::applyUniformMatrix4fv(bones_uniform_loc, pose.count, &bone_mtx[0].m11); - ffr::applyUniformMatrix4f(model_uniform_loc, &mesh.mtx.m11); - ffr::drawTriangles(mesh.mesh->indices_count); + ffr::applyUniformMatrix4f(model_uniform_loc, &matrix.m11); + ffr::drawTriangles(mesh.indices_count); } @@ -1223,123 +1487,112 @@ struct PipelineImpl LUMIX_FINAL : Pipeline ffr::setUniform1i(t.uniform, 2 + i); } - const Material* prev_material = nullptr; - const Lumix::Mesh* prev_mesh = nullptr; - const Shader::Program* program = nullptr; - int model_uniform_loc = -1; - int bones_uniform_loc = -1; - u8* instance_data = (u8*)pipeline->m_allocator.allocate(4 * 1024 * 1024); - int instance_offset = 0; - const Lumix::Mesh* instance_mesh = nullptr; - ffr::VertexDecl instance_decl; instance_decl.addAttribute(4, ffr::AttributeType::FLOAT, false, false); instance_decl.addAttribute(4, ffr::AttributeType::FLOAT, false, false); instance_decl.addAttribute(4, ffr::AttributeType::FLOAT, false, false); instance_decl.addAttribute(4, ffr::AttributeType::FLOAT, false, false); - auto finish_instances = [&](){ - PROFILE_BLOCK("finish_instances"); - const ffr::BufferHandle instance_buffer = pipeline->m_renderer.getTransientBuffer(); - ffr::update(instance_buffer, instance_data, 0, instance_offset); - { - PROFILE_BLOCK("isntance"); - ffr::setInstanceBuffer(instance_decl, instance_buffer, 0, instance_mesh->vertex_decl.attributes_count); - { - PROFILE_BLOCK("draw"); - ffr::drawTrianglesInstanced(instance_mesh->indices_count, instance_offset / sizeof(Matrix)); - instance_mesh = nullptr; - instance_offset = 0; - } - } - }; - Renderer& renderer = pipeline->m_renderer; const u32 instanced_define_mask = 1 << renderer.getShaderDefineIdx("INSTANCED"); const u32 skinned_define_mask = 1 << renderer.getShaderDefineIdx("SKINNED"); const ModelInstance* model_instances = pipeline->m_scene->getModelInstances(); - for (int i = 0, c = meshes_count; i < c; ++i) { - const Mesh& mesh = meshes[i]; + Mesh** LUMIX_RESTRICT meshes = m_meshes.meshes; + const Matrix* LUMIX_RESTRICT matrices = m_meshes.matrices; + const Entity* LUMIX_RESTRICT owners = m_meshes.owners; + int start_instance = -1; - if(mesh.mesh != prev_mesh) { - if(instance_mesh) { - finish_instances(); - } - if (mesh.mesh->type == Lumix::Mesh::RIGID_INSTANCED) { - instance_mesh = mesh.mesh; - } + for (int batch = 0, c = m_meshes.count; batch < c; batch += 8 * 1024) { + const Material* prev_material = nullptr; + const Lumix::Mesh* prev_mesh = nullptr; + const Shader::Program* program = nullptr; + int model_uniform_loc = -1; + int bones_uniform_loc = -1; - const Material* material = mesh.mesh->material; - if (!material->isReady()) continue; - if(material != prev_material) { - u32 final_define_mask = material->getDefineMask() | define_mask; - if(mesh.mesh->type == Lumix::Mesh::RIGID_INSTANCED) { - final_define_mask |= instanced_define_mask; - } + for (int i = batch, c = Math::minimum(batch + 8 * 1024, m_meshes.count); i < c; ++i) { + const Mesh* mesh = meshes[i]; + + if(mesh != prev_mesh) { + const Material* material = mesh->material; + if (!material->isReady()) continue; + if(material != prev_material) { + u32 final_define_mask = material->getDefineMask() | define_mask; + if(mesh->type == Mesh::RIGID_INSTANCED) { + final_define_mask |= instanced_define_mask; + } - const Shader::Program& prog = material->getShader()->getProgram(final_define_mask); - if (!prog.handle.isValid()) continue; + const Shader::Program& prog = material->getShader()->getProgram(final_define_mask); + if (!prog.handle.isValid()) continue; - program = &prog; - model_uniform_loc = ffr::getUniformLocation(prog.handle, pipeline->m_model_uniform); - bones_uniform_loc = ffr::getUniformLocation(prog.handle, pipeline->m_bones_uniform); - int textures_count = material->getTextureCount(); - for (int i = 0; i < textures_count; ++i) { - ffr::bindTexture(i + 2 + global_textures_count, material->getTexture(i)->handle); - ffr::setUniform1i(material->getTextureUniform(i), i + 2 + global_textures_count); + program = &prog; + model_uniform_loc = ffr::getUniformLocation(prog.handle, pipeline->m_model_uniform); + bones_uniform_loc = ffr::getUniformLocation(prog.handle, pipeline->m_bones_uniform); + const int textures_count = material->getTextureCount(); + for (int i = 0; i < textures_count; ++i) { + ffr::bindTexture(i + 2 + global_textures_count, material->getTexture(i)->handle); + ffr::setUniform1i(material->getTextureUniform(i), i + 2 + global_textures_count); + } + + const Vec4 material_params(material->getRoughness() + , material->getMetallic() + , 0 + , 1 + ); + + ffr::setUniform4f(pipeline->m_material_params_uniform, &material_params.x); + prev_material = material; + + ffr::setState(u64(ffr::StateFlags::DEPTH_TEST) | mesh->material->getRenderStates()); + ffr::useProgram(prog.handle); } - const Vec4 material_params(material->getRoughness() - , material->getMetallic() - , 0 - , 1 - ); - - ffr::setUniform4f(pipeline->m_material_params_uniform, &material_params.x); - prev_material = material; - - ffr::setState(u64(ffr::StateFlags::DEPTH_TEST) | mesh.mesh->material->getRenderStates()); - ffr::useProgram(prog.handle); - } - - prev_mesh = mesh.mesh; - int attribute_map[16]; - for (uint i = 0; i < mesh.mesh->vertex_decl.attributes_count; ++i) { - attribute_map[i] = program->attribute_by_semantics[(int)mesh.mesh->attributes_semantic[i]]; - } + prev_mesh = mesh; + int attribute_map[16]; + for (uint i = 0; i < mesh->vertex_decl.attributes_count; ++i) { + attribute_map[i] = program->attribute_by_semantics[(int)mesh->attributes_semantic[i]]; + } - ffr::setVertexBuffer(&mesh.mesh->vertex_decl, mesh.mesh->vertex_buffer_handle, 0, program->use_semantics ? attribute_map : nullptr); - ffr::setIndexBuffer(mesh.mesh->index_buffer_handle); - - } - - switch(mesh.mesh->type) { - case Lumix::Mesh::RIGID_INSTANCED: - memcpy(instance_data + instance_offset, &mesh.mtx, sizeof(mesh.mtx)); - instance_offset += sizeof(mesh.mtx); - break; - case Lumix::Mesh::RIGID: - ffr::applyUniformMatrix4f(model_uniform_loc, &mesh.mtx.m11); - ffr::drawTriangles(mesh.mesh->indices_count); - break; - case Lumix::Mesh::SKINNED: { - const ModelInstance& model_instance = model_instances[mesh.owner.index]; - renderSkinnedMesh(mesh, *model_instance.pose, *model_instance.model, model_uniform_loc, bones_uniform_loc); - break; + ffr::setVertexBuffer(&mesh->vertex_decl, mesh->vertex_buffer_handle, 0, program->use_semantics ? attribute_map : nullptr); + ffr::setIndexBuffer(mesh->index_buffer_handle); + } + + switch(mesh->type) { + case Mesh::RIGID_INSTANCED: { + const int start = i; + const Mesh* const instance_mesh = meshes[start]; + ++i; + while (meshes[i] == instance_mesh && i < c) { + ++i; + } + const int instances_count = i - start; + + PROFILE_BLOCK("finish_instances"); + const Renderer::TransientSlice instance_buffer = pipeline->m_renderer.allocTransient(instances_count * sizeof(Matrix)); + + ffr::update(instance_buffer.buffer, matrices + start, instance_buffer.offset, instance_buffer.size); + ffr::setInstanceBuffer(instance_decl, instance_buffer.buffer, instance_buffer.offset, instance_mesh->vertex_decl.attributes_count); + ffr::drawTrianglesInstanced(0, instance_mesh->indices_count, instances_count); + break; + } + case Mesh::RIGID: + ffr::applyUniformMatrix4f(model_uniform_loc, &matrices[i].m11); + ffr::drawTriangles(mesh->indices_count); + break; + case Mesh::SKINNED: { + const ModelInstance& model_instance = model_instances[owners[i].index]; + renderSkinnedMesh(matrices[i], *mesh, *model_instance.pose, *model_instance.model, model_uniform_loc, bones_uniform_loc); + break; + } + default: + ASSERT(false); + break; } - default: - ASSERT(false); - break; } } - if(instance_mesh) { - finish_instances(); - } - pipeline->m_allocator.deallocate(instance_data); - pipeline->m_renderer.free(meshes_mem); + pipeline->m_renderer.free(m_meshes_mem); ffr::popDebugGroup(); } @@ -1348,9 +1601,13 @@ struct PipelineImpl LUMIX_FINAL : Pipeline PipelineImpl* pipeline; ffr::TextureHandle irradiance_map; ffr::TextureHandle radiance_map; - Mesh* meshes; - int meshes_count; - Renderer::MemRef meshes_mem; + struct { + Entity* owners; + Matrix* matrices; + Mesh** meshes; + int count; + } m_meshes; + Renderer::MemRef m_meshes_mem; StaticString<32> shader_define; u32 define_mask; u64 layer_mask; @@ -1527,6 +1784,7 @@ struct PipelineImpl LUMIX_FINAL : Pipeline registerCFunction("getCameraParams", PipelineImpl::getCameraParams); registerCFunction("getShadowCameraParams", PipelineImpl::getShadowCameraParams); registerCFunction("renderMeshes", PipelineImpl::renderMeshes); + registerCFunction("renderTerrains", PipelineImpl::renderTerrains); registerCFunction("setRenderTargets", PipelineImpl::setRenderTargets); lua_pop(L, 1); // pop env @@ -1581,6 +1839,10 @@ struct PipelineImpl LUMIX_FINAL : Pipeline Array m_renderbuffers; Array m_shaders; + ffr::UniformHandle m_terrain_params_uniform; + ffr::UniformHandle m_rel_camera_pos_uniform; + ffr::UniformHandle m_terrain_scale_uniform; + ffr::UniformHandle m_terrain_matrix_uniform; ffr::UniformHandle m_model_uniform; ffr::UniformHandle m_bones_uniform; ffr::UniformHandle m_canvas_size_uniform; diff --git a/src/renderer/render_scene.cpp b/src/renderer/render_scene.cpp index 9f30e0821..61455bb19 100644 --- a/src/renderer/render_scene.cpp +++ b/src/renderer/render_scene.cpp @@ -3503,6 +3503,89 @@ bgfx::TextureHandle& handle = pipeline->getRenderbuffer(framebuffer_name, render } + + // TODO + enum { + BX_RADIXSORT_BITS = 11, + BX_RADIXSORT_HISTOGRAM_SIZE = 1 << BX_RADIXSORT_BITS, + BX_RADIXSORT_BIT_MASK = BX_RADIXSORT_HISTOGRAM_SIZE - 1 + }; + + + template + inline void radixSort(uint64_t* _keys, uint64_t* _tempKeys, Ty* _values, Ty* _tempValues, uint32_t _size) + { + uint64_t* keys = _keys; + uint64_t* tempKeys = _tempKeys; + Ty* values = _values; + Ty* tempValues = _tempValues; + + uint32_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE]; + uint16_t shift = 0; + uint32_t pass = 0; + for (; pass < 6; ++pass) + { + memset(histogram, 0, sizeof(uint32_t)*BX_RADIXSORT_HISTOGRAM_SIZE); + + bool sorted = true; + { + uint64_t key = keys[0]; + uint64_t prevKey = key; + for (uint32_t ii = 0; ii < _size; ++ii, prevKey = key) + { + key = keys[ii]; + uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK; + ++histogram[index]; + sorted &= prevKey <= key; + } + } + + if (sorted) + { + goto done; + } + + uint32_t offset = 0; + for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii) + { + uint32_t count = histogram[ii]; + histogram[ii] = offset; + offset += count; + } + + for (uint32_t ii = 0; ii < _size; ++ii) + { + uint64_t key = keys[ii]; + uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK; + uint32_t dest = histogram[index]++; + tempKeys[dest] = key; + tempValues[dest] = values[ii]; + } + + uint64_t* swapKeys = tempKeys; + tempKeys = keys; + keys = swapKeys; + + Ty* swapValues = tempValues; + tempValues = values; + values = swapValues; + + shift += BX_RADIXSORT_BITS; + } + +done: + if (0 != (pass&1) ) + { + // Odd number of passes needs to do copy to the destination. + memcpy(_keys, _tempKeys, _size*sizeof(uint64_t) ); + for (uint32_t ii = 0; ii < _size; ++ii) + { + _values[ii] = _tempValues[ii]; + } + } + } + + void getModelInstanceInfos(const Frustum& frustum, const Vec3& lod_ref_point, float lod_multiplier, @@ -3558,14 +3641,27 @@ bgfx::TextureHandle& handle = pipeline->getRenderbuffer(framebuffer_name, render if (!subinfos.empty()) { PROFILE_BLOCK("Sort"); - MeshInstance* begin = &subinfos[0]; - MeshInstance* end = begin + subinfos.size(); + MeshInstance* const LUMIX_RESTRICT begin = &subinfos[0]; + MeshInstance* const LUMIX_RESTRICT end = begin + subinfos.size(); + Array keys(m_allocator); + Array tmpkeys(m_allocator); + keys.resize(subinfos.size()); + tmpkeys.resize(subinfos.size()); + for(int i = 0, c = subinfos.size(); i < c; ++i) { + keys[i] = (u64)subinfos[i].mesh; + } + + Array tmpinfos(m_allocator); + tmpinfos.resize(subinfos.size()); + + radixSort(&keys[0], &tmpkeys[0], &subinfos[0], &tmpinfos[0], keys.size()); +/* auto cmp = [](const MeshInstance& a, const MeshInstance& b) -> bool { if (a.mesh != b.mesh) return a.mesh < b.mesh; return (a.depth < b.depth); }; - std::sort(begin, end, cmp); + std::sort(begin, end, cmp);*/ } }, &job_storage[subresult_index], &jobs[subresult_index], nullptr); } diff --git a/src/renderer/renderer.cpp b/src/renderer/renderer.cpp index bb51f4184..1ba23b79e 100644 --- a/src/renderer/renderer.cpp +++ b/src/renderer/renderer.cpp @@ -58,6 +58,8 @@ namespace Lumix static const ComponentType MODEL_INSTANCE_TYPE = Reflection::getComponentType("renderable"); +enum { TRANSIENT_BUFFER_SIZE = 32 * 1024 * 1024 }; + struct GPUProfiler { @@ -182,6 +184,7 @@ struct RenderTask : MT::Task MT::Semaphore m_finished_semaphore; bool m_shutdown_requested = false; ffr::BufferHandle m_transient_buffer; + uint m_transient_buffer_offset; struct PreparedCommand { @@ -561,7 +564,7 @@ struct RendererImpl LUMIX_FINAL : public Renderer struct Cmd : RenderCommandBase { void setup() override {} void execute() override { - ffr::loadTexture(handle, memory.data, memory.size, flags, nullptr); + ffr::loadTexture(handle, memory.data, memory.size, flags); if(memory.own) { renderer->free(memory); } @@ -584,9 +587,13 @@ struct RendererImpl LUMIX_FINAL : public Renderer } - ffr::BufferHandle getTransientBuffer() override + TransientSlice allocTransient(uint size) override { - return m_render_task.m_transient_buffer; + TransientSlice slice; + slice.buffer = m_render_task.m_transient_buffer; + slice.offset = m_render_task.m_transient_buffer_offset; + slice.size = m_render_task.m_transient_buffer_offset + size > TRANSIENT_BUFFER_SIZE ? 0 : size; + return slice; } @@ -850,6 +857,7 @@ struct RendererImpl LUMIX_FINAL : public Renderer renderer->m_frame_semaphore.signal(); ffr::swapBuffers(); renderer->m_render_task.m_profiler.frame(); + renderer->m_render_task.m_transient_buffer_offset = 0; } RendererImpl* renderer; }; @@ -921,7 +929,8 @@ int RenderTask::task() ffr::createBuffer(m_global_state_uniforms, sizeof(Renderer::GlobalState), nullptr); ffr::bindUniformBuffer(0, m_global_state_uniforms, 0, sizeof(Renderer::GlobalState)); m_transient_buffer = ffr::allocBufferHandle(); - ffr::createBuffer(m_transient_buffer, 4 *1024 * 1024, nullptr); + m_transient_buffer_offset = 0; + ffr::createBuffer(m_transient_buffer, TRANSIENT_BUFFER_SIZE, nullptr); while (!m_shutdown_requested || !m_commands.isEmpty()) { Renderer::RenderCommandBase** rt_cmd = m_commands.pop(true); Renderer::RenderCommandBase* cmd = *rt_cmd; diff --git a/src/renderer/renderer.h b/src/renderer/renderer.h index befaf07ec..c3f49d9b6 100644 --- a/src/renderer/renderer.h +++ b/src/renderer/renderer.h @@ -67,6 +67,13 @@ class LUMIX_RENDERER_API Renderer : public IPlugin bool is_end; }; + struct TransientSlice + { + ffr::BufferHandle buffer; + uint offset; + uint size; + }; + enum { MAX_SHADER_DEFINES = 32 }; public: virtual ~Renderer() {} @@ -95,7 +102,7 @@ class LUMIX_RENDERER_API Renderer : public IPlugin virtual MemRef copy(const void* data, uint size) = 0 ; virtual void free(const MemRef& memory) = 0; - virtual ffr::BufferHandle getTransientBuffer() = 0; + virtual TransientSlice allocTransient(uint size) = 0; virtual ffr::BufferHandle createBuffer(const MemRef& memory) = 0; virtual void destroy(ffr::BufferHandle buffer) = 0; diff --git a/src/renderer/terrain.cpp b/src/renderer/terrain.cpp index 550cf561f..ff5c2944a 100644 --- a/src/renderer/terrain.cpp +++ b/src/renderer/terrain.cpp @@ -28,7 +28,7 @@ static const float GRASS_QUAD_SIZE = 10.0f; static const float GRASS_QUAD_RADIUS = GRASS_QUAD_SIZE * 0.7072f; static const int GRID_SIZE = 16; static const ComponentType TERRAIN_HASH = Reflection::getComponentType("terrain"); -static const char* TEX_COLOR_UNIFORM = "u_texColor"; +static const char* TEX_COLOR_UNIFORM = "u_detail_albedomap"; struct Sample { @@ -874,7 +874,7 @@ RayCastModelHit Terrain::castRay(const Vec3& origin, const Vec3& dir) } -static void generateSubgrid(Array& samples, Array& indices, int& indices_offset, int start_x, int start_y) +static void generateSubgrid(Array& samples, Array& indices, int& indices_offset, int start_x, int start_y) { for (int j = start_y; j < start_y + 8; ++j) { @@ -907,11 +907,11 @@ static void generateSubgrid(Array& samples, Array& indices, int& void Terrain::generateGeometry() { - /*LUMIX_DELETE(m_allocator, m_mesh); + LUMIX_DELETE(m_allocator, m_mesh); m_mesh = nullptr; Array points(m_allocator); points.resize(GRID_SIZE * GRID_SIZE * 4); - Array indices(m_allocator); + Array indices(m_allocator); indices.resize(GRID_SIZE * GRID_SIZE * 6); int indices_offset = 0; generateSubgrid(points, indices, indices_offset, 0, 0); @@ -919,20 +919,20 @@ void Terrain::generateGeometry() generateSubgrid(points, indices, indices_offset, 0, 8); generateSubgrid(points, indices, indices_offset, 8, 8); - bgfx::VertexDecl vertex_def; - vertex_def.begin() - .add(bgfx::Attrib::Position, 3, bgfx::AttribType::Float) - .add(bgfx::Attrib::TexCoord0, 2, bgfx::AttribType::Float) - .end(); - m_mesh = LUMIX_NEW(m_allocator, Mesh)(m_material, vertex_def, "terrain", m_allocator); - m_mesh->vertex_buffer_handle = bgfx::createVertexBuffer(bgfx::copy(&points[0], sizeof(points[0]) * points.size()), vertex_def); - auto* indices_mem = bgfx::copy(&indices[0], sizeof(indices[0]) * indices.size()); - m_mesh->index_buffer_handle = bgfx::createIndexBuffer(indices_mem); + ffr::VertexDecl vertex_def; + vertex_def.addAttribute(3, ffr::AttributeType::FLOAT, false, false); + vertex_def.addAttribute(2, ffr::AttributeType::FLOAT, false, false); + + const Mesh::AttributeSemantic semantics[] = { Mesh::AttributeSemantic::POSITION, Mesh::AttributeSemantic::TEXCOORD0 }; + m_mesh = LUMIX_NEW(m_allocator, Mesh)(m_material, vertex_def, "terrain", semantics, m_allocator); + + const Renderer::MemRef vb_data = m_renderer.copy(&points[0], points.byte_size()); + m_mesh->vertex_buffer_handle = m_renderer.createBuffer(vb_data); + + const Renderer::MemRef ib_data = m_renderer.copy(&indices[0], indices.byte_size()); + m_mesh->index_buffer_handle = m_renderer.createBuffer(ib_data); m_mesh->indices_count = indices.size(); m_mesh->flags.set(Mesh::Flags::INDICES_16_BIT); - */ - // TODO - ASSERT(false); } TerrainQuad* Terrain::generateQuadTree(float size) @@ -953,21 +953,21 @@ void Terrain::onMaterialLoaded(Resource::State, Resource::State new_state, Resou { m_detail_texture = m_material->getTextureByUniform(TEX_COLOR_UNIFORM); - m_heightmap = m_material->getTextureByUniform("u_texHeightmap"); + m_heightmap = m_material->getTextureByUniform("u_heightmap"); bool is_data_ready = true; if (m_heightmap && m_heightmap->getData() == nullptr) { m_heightmap->addDataReference(); is_data_ready = false; } - m_splatmap = m_material->getTextureByUniform("u_texSplatmap"); + m_splatmap = m_material->getTextureByUniform("u_splatmap"); if (m_splatmap && m_splatmap->getData() == nullptr) { m_splatmap->addDataReference(); is_data_ready = false; } - Texture* colormap = m_material->getTextureByUniform("u_texColormap"); + Texture* colormap = m_material->getTextureByUniform("u_colormap"); if (colormap && colormap->getData() == nullptr) { colormap->addDataReference(); diff --git a/src/renderer/texture.cpp b/src/renderer/texture.cpp index 7b22b34d3..e49cf37a0 100644 --- a/src/renderer/texture.cpp +++ b/src/renderer/texture.cpp @@ -316,7 +316,7 @@ void Texture::onDataUpdated(int x, int y, int w, int h) bool loadRaw(Texture& texture, FS::IFile& file, IAllocator& allocator) { -/* PROFILE_FUNCTION(); + PROFILE_FUNCTION(); size_t size = file.size(); texture.bytes_per_pixel = 2; texture.width = (int)sqrt(size / texture.bytes_per_pixel); @@ -337,15 +337,13 @@ bool loadRaw(Texture& texture, FS::IFile& file, IAllocator& allocator) dst_mem[i] = src_mem[i] / 65535.0f; } - texture.handle = ffr::createTexture(texture.width, texture.height, ffr::TextureFormat::R32F, texture.getFFRFlags(), &dst_mem[0]); + const Renderer::MemRef mem = texture.renderer.copy(dst_mem.begin(), dst_mem.byte_size()); + texture.handle = texture.renderer.createTexture(texture.width, texture.height, ffr::TextureFormat::R32F, texture.getFFRFlags(), mem); texture.depth = 1; texture.layers = 1; texture.mips = 1; texture.is_cubemap = false; - return texture.handle.isValid();*/ - // TODO - ASSERT(false); - return false; + return texture.handle.isValid(); }