Skip to content
Snippets Groups Projects
Commit d7298ec2 authored by Markus Wick's avatar Markus Wick
Browse files

Update the stream_buffer helper from Citra.

Please see https://github.com/citra-emu/citra/pull/3666 for more details.
parent 5926fbd3
No related branches found
No related tags found
No related merge requests found
...@@ -36,7 +36,8 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); ...@@ -36,7 +36,8 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_window{window} { RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window)
: emu_window{window}, stream_buffer(GL_ARRAY_BUFFER, STREAM_BUFFER_SIZE) {
// Create sampler objects // Create sampler objects
for (size_t i = 0; i < texture_samplers.size(); ++i) { for (size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create(); texture_samplers[i].Create();
...@@ -57,9 +58,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_wind ...@@ -57,9 +58,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_wind
const std::string_view extension{ const std::string_view extension{
reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))}; reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))};
if (extension == "GL_ARB_buffer_storage") { if (extension == "GL_ARB_direct_state_access") {
has_ARB_buffer_storage = true;
} else if (extension == "GL_ARB_direct_state_access") {
has_ARB_direct_state_access = true; has_ARB_direct_state_access = true;
} else if (extension == "GL_ARB_separate_shader_objects") { } else if (extension == "GL_ARB_separate_shader_objects") {
has_ARB_separate_shader_objects = true; has_ARB_separate_shader_objects = true;
...@@ -86,16 +85,14 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_wind ...@@ -86,16 +85,14 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_wind
hw_vao.Create(); hw_vao.Create();
stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); state.draw.vertex_buffer = stream_buffer.GetHandle();
stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2);
state.draw.vertex_buffer = stream_buffer->GetHandle();
shader_program_manager = std::make_unique<GLShader::ProgramManager>(); shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0; state.draw.shader_program = 0;
state.draw.vertex_array = hw_vao.handle; state.draw.vertex_array = hw_vao.handle;
state.Apply(); state.Apply();
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer.GetHandle());
for (unsigned index = 0; index < uniform_buffers.size(); ++index) { for (unsigned index = 0; index < uniform_buffers.size(); ++index) {
auto& buffer = uniform_buffers[index]; auto& buffer = uniform_buffers[index];
...@@ -111,13 +108,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_wind ...@@ -111,13 +108,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_wind
LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");
} }
RasterizerOpenGL::~RasterizerOpenGL() { RasterizerOpenGL::~RasterizerOpenGL() {}
if (stream_buffer != nullptr) {
state.draw.vertex_buffer = stream_buffer->GetHandle();
state.Apply();
stream_buffer->Release();
}
}
std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
GLintptr buffer_offset) { GLintptr buffer_offset) {
...@@ -126,7 +117,7 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, ...@@ -126,7 +117,7 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager;
state.draw.vertex_array = hw_vao.handle; state.draw.vertex_array = hw_vao.handle;
state.draw.vertex_buffer = stream_buffer->GetHandle(); state.draw.vertex_buffer = stream_buffer.GetHandle();
state.Apply(); state.Apply();
// Upload all guest vertex arrays sequentially to our buffer // Upload all guest vertex arrays sequentially to our buffer
...@@ -145,7 +136,7 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, ...@@ -145,7 +136,7 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size);
// Bind the vertex array to the buffer at the current offset. // Bind the vertex array to the buffer at the current offset.
glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride); glBindVertexBuffer(index, stream_buffer.GetHandle(), buffer_offset, vertex_array.stride);
ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented"); ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented");
...@@ -205,7 +196,7 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { ...@@ -205,7 +196,7 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
// Helper function for uploading uniform data // Helper function for uploading uniform data
const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
if (has_ARB_direct_state_access) { if (has_ARB_direct_state_access) {
glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); glCopyNamedBufferSubData(stream_buffer.GetHandle(), handle, offset, 0, size);
} else { } else {
glBindBuffer(GL_COPY_WRITE_BUFFER, handle); glBindBuffer(GL_COPY_WRITE_BUFFER, handle);
glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size);
...@@ -456,7 +447,7 @@ void RasterizerOpenGL::DrawArrays() { ...@@ -456,7 +447,7 @@ void RasterizerOpenGL::DrawArrays() {
const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count};
state.draw.vertex_buffer = stream_buffer->GetHandle(); state.draw.vertex_buffer = stream_buffer.GetHandle();
state.Apply(); state.Apply();
size_t buffer_size = CalculateVertexArraysSize(); size_t buffer_size = CalculateVertexArraysSize();
...@@ -471,8 +462,8 @@ void RasterizerOpenGL::DrawArrays() { ...@@ -471,8 +462,8 @@ void RasterizerOpenGL::DrawArrays() {
u8* buffer_ptr; u8* buffer_ptr;
GLintptr buffer_offset; GLintptr buffer_offset;
std::tie(buffer_ptr, buffer_offset) = std::tie(buffer_ptr, buffer_offset, std::ignore) =
stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); stream_buffer.Map(static_cast<GLsizeiptr>(buffer_size), 4);
u8* offseted_buffer; u8* offseted_buffer;
std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset);
...@@ -500,7 +491,8 @@ void RasterizerOpenGL::DrawArrays() { ...@@ -500,7 +491,8 @@ void RasterizerOpenGL::DrawArrays() {
SetupShaders(offseted_buffer, buffer_offset); SetupShaders(offseted_buffer, buffer_offset);
stream_buffer->Unmap(); // TODO: Don't use buffer_size here, use the updated buffer_offset.
stream_buffer.Unmap(buffer_size);
shader_program_manager->ApplyTo(state); shader_program_manager->ApplyTo(state);
state.Apply(); state.Apply();
......
...@@ -139,7 +139,6 @@ private: ...@@ -139,7 +139,6 @@ private:
/// Syncs the blend state to match the guest state /// Syncs the blend state to match the guest state
void SyncBlendState(); void SyncBlendState();
bool has_ARB_buffer_storage = false;
bool has_ARB_direct_state_access = false; bool has_ARB_direct_state_access = false;
bool has_ARB_separate_shader_objects = false; bool has_ARB_separate_shader_objects = false;
bool has_ARB_vertex_attrib_binding = false; bool has_ARB_vertex_attrib_binding = false;
...@@ -160,7 +159,7 @@ private: ...@@ -160,7 +159,7 @@ private:
ssbos; ssbos;
static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
std::unique_ptr<OGLStreamBuffer> stream_buffer; OGLStreamBuffer stream_buffer;
OGLBuffer uniform_buffer; OGLBuffer uniform_buffer;
OGLFramebuffer framebuffer; OGLFramebuffer framebuffer;
......
...@@ -9,174 +9,91 @@ ...@@ -9,174 +9,91 @@
#include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h" #include "video_core/renderer_opengl/gl_stream_buffer.h"
class OrphanBuffer : public OGLStreamBuffer { OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent)
public: : gl_target(target), buffer_size(size) {
explicit OrphanBuffer(GLenum target) : OGLStreamBuffer(target) {} gl_buffer.Create();
~OrphanBuffer() override; glBindBuffer(gl_target, gl_buffer.handle);
private:
void Create(size_t size, size_t sync_subdivide) override;
void Release() override;
std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override;
void Unmap() override;
std::vector<u8> data;
};
class StorageBuffer : public OGLStreamBuffer {
public:
explicit StorageBuffer(GLenum target) : OGLStreamBuffer(target) {}
~StorageBuffer() override;
private:
void Create(size_t size, size_t sync_subdivide) override;
void Release() override;
std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override;
void Unmap() override;
struct Fence {
OGLSync sync;
size_t offset;
};
std::deque<Fence> head;
std::deque<Fence> tail;
u8* mapped_ptr;
};
OGLStreamBuffer::OGLStreamBuffer(GLenum target) {
gl_target = target;
}
GLuint OGLStreamBuffer::GetHandle() const {
return gl_buffer.handle;
}
std::unique_ptr<OGLStreamBuffer> OGLStreamBuffer::MakeBuffer(bool storage_buffer, GLenum target) { GLsizeiptr allocate_size = size;
if (storage_buffer) { if (target == GL_ARRAY_BUFFER) {
return std::make_unique<StorageBuffer>(target); // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
// read position is near the end and is an out-of-bound access to the vertex buffer. This is
// probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
// vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
// crash.
allocate_size *= 2;
} }
return std::make_unique<OrphanBuffer>(target);
}
OrphanBuffer::~OrphanBuffer() { if (GLAD_GL_ARB_buffer_storage) {
Release(); persistent = true;
coherent = prefer_coherent;
GLbitfield flags =
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
glBufferStorage(gl_target, allocate_size, nullptr, flags);
mapped_ptr = static_cast<u8*>(glMapBufferRange(
gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
} else {
glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW);
}
} }
void OrphanBuffer::Create(size_t size, size_t /*sync_subdivide*/) { OGLStreamBuffer::~OGLStreamBuffer() {
buffer_pos = 0; if (persistent) {
buffer_size = size;
data.resize(buffer_size);
if (gl_buffer.handle == 0) {
gl_buffer.Create();
glBindBuffer(gl_target, gl_buffer.handle); glBindBuffer(gl_target, gl_buffer.handle);
glUnmapBuffer(gl_target);
} }
glBufferData(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, GL_STREAM_DRAW);
}
void OrphanBuffer::Release() {
gl_buffer.Release(); gl_buffer.Release();
} }
std::pair<u8*, GLintptr> OrphanBuffer::Map(size_t size, size_t alignment) { GLuint OGLStreamBuffer::GetHandle() const {
buffer_pos = Common::AlignUp(buffer_pos, alignment); return gl_buffer.handle;
if (buffer_pos + size > buffer_size) {
Create(std::max(buffer_size, size), 0);
}
mapped_size = size;
return std::make_pair(&data[buffer_pos], static_cast<GLintptr>(buffer_pos));
}
void OrphanBuffer::Unmap() {
glBufferSubData(gl_target, static_cast<GLintptr>(buffer_pos),
static_cast<GLsizeiptr>(mapped_size), &data[buffer_pos]);
buffer_pos += mapped_size;
}
StorageBuffer::~StorageBuffer() {
Release();
} }
void StorageBuffer::Create(size_t size, size_t sync_subdivide) { GLsizeiptr OGLStreamBuffer::GetSize() const {
if (gl_buffer.handle != 0) return buffer_size;
return;
buffer_pos = 0;
buffer_size = size;
buffer_sync_subdivide = std::max<size_t>(sync_subdivide, 1);
gl_buffer.Create();
glBindBuffer(gl_target, gl_buffer.handle);
glBufferStorage(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr,
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
mapped_ptr = reinterpret_cast<u8*>(
glMapBufferRange(gl_target, 0, static_cast<GLsizeiptr>(buffer_size),
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
} }
void StorageBuffer::Release() { std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
if (gl_buffer.handle == 0)
return;
glUnmapBuffer(gl_target);
gl_buffer.Release();
head.clear();
tail.clear();
}
std::pair<u8*, GLintptr> StorageBuffer::Map(size_t size, size_t alignment) {
ASSERT(size <= buffer_size); ASSERT(size <= buffer_size);
ASSERT(alignment <= buffer_size);
mapped_size = size;
OGLSync sync; if (alignment > 0) {
buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment);
buffer_pos = Common::AlignUp(buffer_pos, alignment);
size_t effective_offset = Common::AlignDown(buffer_pos, buffer_sync_subdivide);
if (!head.empty() &&
(effective_offset > head.back().offset || buffer_pos + size > buffer_size)) {
ASSERT(head.back().sync.handle == 0);
head.back().sync.Create();
} }
bool invalidate = false;
if (buffer_pos + size > buffer_size) { if (buffer_pos + size > buffer_size) {
if (!tail.empty()) {
std::swap(sync, tail.back().sync);
tail.clear();
}
std::swap(tail, head);
buffer_pos = 0; buffer_pos = 0;
effective_offset = 0; invalidate = true;
}
while (!tail.empty() && buffer_pos + size > tail.front().offset) { if (persistent) {
std::swap(sync, tail.front().sync); glUnmapBuffer(gl_target);
tail.pop_front(); }
} }
if (sync.handle != 0) { if (invalidate | !persistent) {
glClientWaitSync(sync.handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
sync.Release(); (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
(invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
mapped_ptr = static_cast<u8*>(
glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags));
mapped_offset = buffer_pos;
} }
if (head.empty() || effective_offset > head.back().offset) { return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
head.emplace_back(); }
head.back().offset = effective_offset;
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
ASSERT(size <= mapped_size);
if (!coherent && size > 0) {
glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size);
} }
mapped_size = size; if (!persistent) {
return std::make_pair(&mapped_ptr[buffer_pos], static_cast<GLintptr>(buffer_pos)); glUnmapBuffer(gl_target);
} }
void StorageBuffer::Unmap() { buffer_pos += size;
glFlushMappedBufferRange(gl_target, static_cast<GLintptr>(buffer_pos),
static_cast<GLsizeiptr>(mapped_size));
buffer_pos += mapped_size;
} }
...@@ -2,35 +2,41 @@ ...@@ -2,35 +2,41 @@
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#pragma once #include <tuple>
#include <memory>
#include <glad/glad.h> #include <glad/glad.h>
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
class OGLStreamBuffer : private NonCopyable { class OGLStreamBuffer : private NonCopyable {
public: public:
explicit OGLStreamBuffer(GLenum target); explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent = false);
virtual ~OGLStreamBuffer() = default; ~OGLStreamBuffer();
public:
static std::unique_ptr<OGLStreamBuffer> MakeBuffer(bool storage_buffer, GLenum target);
virtual void Create(size_t size, size_t sync_subdivide) = 0;
virtual void Release() {}
GLuint GetHandle() const; GLuint GetHandle() const;
GLsizeiptr GetSize() const;
/*
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
* and the optional alignment requirement.
* If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
* The return values are the pointer to the new chunk, the offset within the buffer,
* and the invalidation flag for previous chunks.
* The actual used size must be specified on unmapping the chunk.
*/
std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0);
virtual std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) = 0; void Unmap(GLsizeiptr size);
virtual void Unmap() = 0;
protected: private:
OGLBuffer gl_buffer; OGLBuffer gl_buffer;
GLenum gl_target; GLenum gl_target;
size_t buffer_pos = 0; bool coherent = false;
size_t buffer_size = 0; bool persistent = false;
size_t buffer_sync_subdivide = 0;
size_t mapped_size = 0; GLintptr buffer_pos = 0;
GLsizeiptr buffer_size = 0;
GLintptr mapped_offset = 0;
GLsizeiptr mapped_size = 0;
u8* mapped_ptr = nullptr;
}; };
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment