diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 65a88b06c10b57a2dcb605036b58e98761572e76..ad15ea54e73443cd97165e177f5f637db17a9df4 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -28,6 +28,7 @@ Device::Device() {
     max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
     has_variable_aoffi = TestVariableAoffi();
     has_component_indexing_bug = TestComponentIndexingBug();
+    is_turing_plus = GLAD_GL_NV_mesh_shader;
 }
 
 Device::Device(std::nullptr_t) {
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 8c8c937600298c8cfdbe19a4745abe4da943771f..1afe16779b55d4efd36224e29c5ed941d2e63b47 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -34,6 +34,10 @@ public:
         return has_component_indexing_bug;
     }
 
+    bool IsTuringGPU() const {
+        return is_turing_plus;
+    }
+
 private:
     static bool TestVariableAoffi();
     static bool TestComponentIndexingBug();
@@ -43,6 +47,7 @@ private:
     u32 max_varyings{};
     bool has_variable_aoffi{};
     bool has_component_indexing_bug{};
+    bool is_turing_plus{};
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 8fe115aec1cae937d608807ce94d9425738444ff..97c55f2ecb56ee20be595e011c95dd634a27a879 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -82,7 +82,7 @@ struct DrawParameters {
 
 RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
                                    ScreenInfo& info)
-    : texture_cache{system, *this}, shader_cache{*this, system, emu_window, device},
+    : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
       global_cache{*this}, system{system}, screen_info{info},
       buffer_cache(*this, STREAM_BUFFER_SIZE) {
     OpenGLState::ApplyDefaultState();
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 9f81c15cb7ecbe8735453743e74a19712273991a..a1f91d6775a0e223de29aed7aec80e117f034e01 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -148,6 +148,14 @@ void OGLBuffer::Release() {
     handle = 0;
 }
 
+void OGLBuffer::MakePersistant(std::size_t buffer_size) {
+    if (handle == 0 || buffer_size == 0)
+        return;
+
+    const GLbitfield flags = GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT | GL_MAP_READ_BIT;
+    glNamedBufferStorage(handle, static_cast<GLsizeiptr>(buffer_size), nullptr, flags);
+}
+
 void OGLSync::Create() {
     if (handle != 0)
         return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 310ee2bf345c72c56fef78e6d2d80fd268a16325..f2873ef96aab31f16ac5b8dc9f64522ff8118e2b 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -186,6 +186,9 @@ public:
     /// Deletes the internal OpenGL resource
     void Release();
 
+    // Converts the buffer into a persistant storage buffer
+    void MakePersistant(std::size_t buffer_size);
+
     GLuint handle = 0;
 };
 
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index e6f08a7640084c84839db98af6f7c39ee95cef78..bddb15cb111290580e8d3fc4f2727762c8027810 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include "common/assert.h"
+#include "common/bit_util.h"
 #include "common/common_types.h"
 #include "common/microprofile.h"
 #include "common/scope_exit.h"
@@ -435,8 +436,10 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
 }
 
 TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
-                                       VideoCore::RasterizerInterface& rasterizer)
+                                       VideoCore::RasterizerInterface& rasterizer,
+                                       const Device& device)
     : TextureCacheBase{system, rasterizer} {
+    support_info.depth_color_image_copies = !device.IsTuringGPU();
     src_framebuffer.Create();
     dst_framebuffer.Create();
 }
@@ -449,6 +452,14 @@ Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams
 
 void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface,
                                    const VideoCommon::CopyParams& copy_params) {
+    if (!support_info.depth_color_image_copies) {
+        const auto& src_params = src_surface->GetSurfaceParams();
+        const auto& dst_params = dst_surface->GetSurfaceParams();
+        if (src_params.type != dst_params.type) {
+            // A fallback is needed
+            return;
+        }
+    }
     const auto src_handle = src_surface->GetTexture();
     const auto src_target = src_surface->GetTarget();
     const auto dst_handle = dst_surface->GetTexture();
@@ -517,4 +528,83 @@ void TextureCacheOpenGL::ImageBlit(View src_view, View dst_view,
                       is_linear ? GL_LINEAR : GL_NEAREST);
 }
 
+void TextureCacheOpenGL::BufferCopy(Surface src_surface, Surface dst_surface) {
+    const auto& src_params = src_surface->GetSurfaceParams();
+    const auto& dst_params = dst_surface->GetSurfaceParams();
+
+    const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
+    const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
+
+    const std::size_t source_size = src_surface->GetHostSizeInBytes();
+    const std::size_t dest_size = dst_surface->GetHostSizeInBytes();
+
+    const std::size_t buffer_size = std::max(source_size, dest_size);
+
+    GLuint copy_pbo_handle = FetchPBO(buffer_size);
+
+    glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
+
+    if (source_format.compressed) {
+        glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size),
+                                    nullptr);
+    } else {
+        glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type,
+                          static_cast<GLsizei>(source_size), nullptr);
+    }
+    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle);
+
+    const GLsizei width = static_cast<GLsizei>(dst_params.width);
+    const GLsizei height = static_cast<GLsizei>(dst_params.height);
+    const GLsizei depth = static_cast<GLsizei>(dst_params.depth);
+    if (dest_format.compressed) {
+        LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!");
+        UNREACHABLE();
+    } else {
+        switch (dst_params.target) {
+        case SurfaceTarget::Texture1D:
+            glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format,
+                                dest_format.type, nullptr);
+            break;
+        case SurfaceTarget::Texture2D:
+            glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height,
+                                dest_format.format, dest_format.type, nullptr);
+            break;
+        case SurfaceTarget::Texture3D:
+        case SurfaceTarget::Texture2DArray:
+        case SurfaceTarget::TextureCubeArray:
+            glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
+                                dest_format.format, dest_format.type, nullptr);
+            break;
+        case SurfaceTarget::TextureCubemap:
+            glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
+                                dest_format.format, dest_format.type, nullptr);
+            break;
+        default:
+            LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+                         static_cast<u32>(dst_params.target));
+            UNREACHABLE();
+        }
+    }
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+    glTextureBarrier();
+}
+
+GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) {
+    if (buffer_size < 0) {
+        UNREACHABLE();
+        return 0;
+    }
+    const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size));
+    OGLBuffer& cp = copy_pbo_cache[l2];
+    if (cp.handle == 0) {
+        const std::size_t ceil_size = 1ULL << l2;
+        cp.Create();
+        cp.MakePersistant(ceil_size);
+    }
+    return cp.handle;
+}
+
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 0b333e9e3cc59eb72c5d1e8e195536f54d7ae029..f514f137c0b8704bdd5c6b116a87c3eb6661570a 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -13,6 +13,7 @@
 
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
+#include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/texture_cache/texture_cache.h"
 
@@ -129,7 +130,8 @@ private:
 
 class TextureCacheOpenGL final : public TextureCacheBase {
 public:
-    explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
+    explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                                const Device& device);
     ~TextureCacheOpenGL();
 
 protected:
@@ -141,9 +143,14 @@ protected:
     void ImageBlit(View src_view, View dst_view,
                    const Tegra::Engines::Fermi2D::Config& copy_config) override;
 
+    void BufferCopy(Surface src_surface, Surface dst_surface) override;
+
 private:
+    GLuint FetchPBO(std::size_t buffer_size);
+
     OGLFramebuffer src_framebuffer;
     OGLFramebuffer dst_framebuffer;
+    std::unordered_map<u32, OGLBuffer> copy_pbo_cache;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 69ef7a2bd190d8a4f715840264c4226d0367e99a..e0d0e1f700078cc0c0f10a09a0da04db03b083cc 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -214,6 +214,13 @@ public:
     }
 
 protected:
+    // This structure is used for communicating with the backend, on which behaviors
+    // it supports and what not, to avoid assuming certain things about hardware.
+    // The backend is RESPONSIBLE for filling this settings on creation.
+    struct Support {
+        bool depth_color_image_copies;
+    } support_info;
+
     TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
         : system{system}, rasterizer{rasterizer} {
         for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
@@ -233,6 +240,10 @@ protected:
     virtual void ImageBlit(TView src_view, TView dst_view,
                            const Tegra::Engines::Fermi2D::Config& copy_config) = 0;
 
+    // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture
+    // and reading it from a sepparate buffer.
+    virtual void BufferCopy(TSurface src_surface, TSurface dst_surface) = 0;
+
     void Register(TSurface surface) {
         std::lock_guard lock{mutex};
 
@@ -377,9 +388,14 @@ private:
                                               const SurfaceParams& params) {
         const auto gpu_addr = current_surface->GetGpuAddr();
         TSurface new_surface = GetUncachedSurface(gpu_addr, params);
-        std::vector<CopyParams> bricks = current_surface->BreakDown(params);
-        for (auto& brick : bricks) {
-            ImageCopy(current_surface, new_surface, brick);
+        const auto& cr_params = current_surface->GetSurfaceParams();
+        if (!support_info.depth_color_image_copies && cr_params.type != params.type) {
+            BufferCopy(current_surface, new_surface);
+        } else {
+            std::vector<CopyParams> bricks = current_surface->BreakDown(params);
+            for (auto& brick : bricks) {
+                ImageCopy(current_surface, new_surface, brick);
+            }
         }
         Unregister(current_surface);
         Register(new_surface);
@@ -505,7 +521,8 @@ private:
             auto topological_result = current_surface->MatchesTopology(params);
             if (topological_result != MatchTopologyResult::FullMatch) {
                 std::vector<TSurface> overlaps{current_surface};
-                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result);
+                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
+                                      topological_result);
             }
             MatchStructureResult s_result = current_surface->MatchesStructure(params);
             if (s_result != MatchStructureResult::None &&
@@ -537,7 +554,8 @@ private:
         for (auto surface : overlaps) {
             auto topological_result = surface->MatchesTopology(params);
             if (topological_result != MatchTopologyResult::FullMatch) {
-                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result);
+                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
+                                      topological_result);
             }
         }
 
@@ -555,7 +573,8 @@ private:
                         return *view;
                     }
                 }
-                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch);
+                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
+                                      MatchTopologyResult::FullMatch);
             }
             // Now we check if the candidate is a mipmap/layer of the overlap
             std::optional<TView> view =
@@ -578,13 +597,15 @@ private:
                         pair.first->EmplaceView(params, gpu_addr, candidate_size);
                     if (mirage_view)
                         return {pair.first, *mirage_view};
-                    return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch);
+                    return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
+                                          MatchTopologyResult::FullMatch);
                 }
                 return {current_surface, *view};
             }
             // The next case is unsafe, so if we r in accurate GPU, just skip it
             if (Settings::values.use_accurate_gpu_emulation) {
-                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch);
+                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
+                                      MatchTopologyResult::FullMatch);
             }
             // This is the case the texture is a part of the parent.
             if (current_surface->MatchesSubTexture(params, gpu_addr)) {
@@ -601,7 +622,8 @@ private:
             }
         }
         // We failed all the tests, recycle the overlaps into a new texture.
-        return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch);
+        return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
+                              MatchTopologyResult::FullMatch);
     }
 
     std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,