diff --git a/src/common/alignment.h b/src/common/alignment.h
index 617b14d9b7746db5a21aa4e4a463fab788f721be..b3fbdfe2094074ef844692d741384c2728547dae 100644
--- a/src/common/alignment.h
+++ b/src/common/alignment.h
@@ -3,7 +3,10 @@
 #pragma once
 
 #include <cstddef>
+#include <cstdlib>
 #include <type_traits>
+#include <malloc.h>
+#include <stdlib.h>
 
 namespace Common {
 
@@ -37,4 +40,80 @@ constexpr bool IsWordAligned(T value) {
     return (value & 0b11) == 0;
 }
 
+template <typename T, std::size_t Align = 16>
+class AlignmentAllocator {
+public:
+    typedef T value_type;
+    typedef std::size_t size_type;
+    typedef std::ptrdiff_t difference_type;
+
+    typedef T* pointer;
+    typedef const T* const_pointer;
+
+    typedef T& reference;
+    typedef const T& const_reference;
+
+public:
+    inline AlignmentAllocator() throw() {}
+
+    template <typename T2>
+    inline AlignmentAllocator(const AlignmentAllocator<T2, Align>&) throw() {}
+
+    inline ~AlignmentAllocator() throw() {}
+
+    inline pointer adress(reference r) {
+        return &r;
+    }
+
+    inline const_pointer adress(const_reference r) const {
+        return &r;
+    }
+
+#if (defined _MSC_VER)
+    inline pointer allocate(size_type n) {
+        return (pointer)_aligned_malloc(n * sizeof(value_type), Align);
+    }
+
+    inline void deallocate(pointer p, size_type) {
+        _aligned_free(p);
+    }
+#else
+    inline pointer allocate(size_type n) {
+        return (pointer)std::aligned_alloc(Align, n * sizeof(value_type));
+    }
+
+    inline void deallocate(pointer p, size_type) {
+        std::free(p);
+    }
+#endif
+
+    inline void construct(pointer p, const value_type& wert) {
+        new (p) value_type(wert);
+    }
+
+    inline void destroy(pointer p) {
+        p->~value_type();
+    }
+
+    inline size_type max_size() const throw() {
+        return size_type(-1) / sizeof(value_type);
+    }
+
+    template <typename T2>
+    struct rebind {
+        typedef AlignmentAllocator<T2, Align> other;
+    };
+
+    bool operator!=(const AlignmentAllocator<T, Align>& other) const {
+        return !(*this == other);
+    }
+
+    // Returns true if and only if storage allocated from *this
+    // can be deallocated from other, and vice versa.
+    // Always returns true for stateless allocators.
+    bool operator==(const AlignmentAllocator<T, Align>& other) const {
+        return true;
+    }
+};
+
 } // namespace Common
diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h
index 879957dcb19a2b9ff9929824b3f4cee606c7177d..d8ad540309546134c3bb862959aee343849c4236 100644
--- a/src/core/hle/kernel/code_set.h
+++ b/src/core/hle/kernel/code_set.h
@@ -8,6 +8,7 @@
 #include <vector>
 
 #include "common/common_types.h"
+#include "core/hle/kernel/physical_memory.h"
 
 namespace Kernel {
 
@@ -77,7 +78,7 @@ struct CodeSet final {
     }
 
     /// The overall data that backs this code set.
-    std::vector<u8> memory;
+    Kernel::PhysicalMemory memory;
 
     /// The segments that comprise this code set.
     std::array<Segment, 3> segments;
diff --git a/src/core/hle/kernel/physical_memory.h b/src/core/hle/kernel/physical_memory.h
new file mode 100644
index 0000000000000000000000000000000000000000..dd49c75a279231e7e444ee910f288b8613746d31
--- /dev/null
+++ b/src/core/hle/kernel/physical_memory.h
@@ -0,0 +1,13 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/alignment.h"
+
+namespace Kernel {
+
+using PhysicalMemory = std::vector<u8, Common::AlignmentAllocator<u8, 256>>;
+
+}
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 92169a97b076a83c57cfcaf430f2021d245a9208..e80a12ac3527748e045715adfc4e80ee6519e284 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -247,7 +247,7 @@ VAddr Process::CreateTLSRegion() {
         ASSERT(region_address.Succeeded());
 
         const auto map_result = vm_manager.MapMemoryBlock(
-            *region_address, std::make_shared<std::vector<u8>>(Memory::PAGE_SIZE), 0,
+            *region_address, std::make_shared<PhysicalMemory>(Memory::PAGE_SIZE), 0,
             Memory::PAGE_SIZE, MemoryState::ThreadLocal);
         ASSERT(map_result.Succeeded());
 
@@ -277,7 +277,7 @@ void Process::FreeTLSRegion(VAddr tls_address) {
 }
 
 void Process::LoadModule(CodeSet module_, VAddr base_addr) {
-    const auto memory = std::make_shared<std::vector<u8>>(std::move(module_.memory));
+    const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory));
 
     const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
                                 MemoryState memory_state) {
@@ -327,7 +327,7 @@ void Process::AllocateMainThreadStack(u64 stack_size) {
     // Allocate and map the main thread stack
     const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
     vm_manager
-        .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
+        .MapMemoryBlock(mapping_address, std::make_shared<PhysicalMemory>(main_thread_stack_size),
                         0, main_thread_stack_size, MemoryState::Stack)
         .Unwrap();
 }
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index f15c5ee3622618057ea78bdcd5f71d4e3ed445fd..45a9e194229e1f77b84f78d0b69a8eabdc796cd4 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -28,7 +28,7 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
     shared_memory->other_permissions = other_permissions;
 
     if (address == 0) {
-        shared_memory->backing_block = std::make_shared<std::vector<u8>>(size);
+        shared_memory->backing_block = std::make_shared<Kernel::PhysicalMemory>(size);
         shared_memory->backing_block_offset = 0;
 
         // Refresh the address mappings for the current process.
@@ -59,7 +59,7 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
 }
 
 SharedPtr<SharedMemory> SharedMemory::CreateForApplet(
-    KernelCore& kernel, std::shared_ptr<std::vector<u8>> heap_block, std::size_t offset, u64 size,
+    KernelCore& kernel, std::shared_ptr<Kernel::PhysicalMemory> heap_block, std::size_t offset, u64 size,
     MemoryPermission permissions, MemoryPermission other_permissions, std::string name) {
     SharedPtr<SharedMemory> shared_memory(new SharedMemory(kernel));
 
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h
index c2b6155e1896623166a3b1bcbcec56cb1a21111a..01ca6dcd22ff3f52c1515480f805f97a3c55abf3 100644
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -10,6 +10,7 @@
 
 #include "common/common_types.h"
 #include "core/hle/kernel/object.h"
+#include "core/hle/kernel/physical_memory.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/result.h"
 
@@ -62,12 +63,10 @@ public:
      * block.
      * @param name Optional object name, used for debugging purposes.
      */
-    static SharedPtr<SharedMemory> CreateForApplet(KernelCore& kernel,
-                                                   std::shared_ptr<std::vector<u8>> heap_block,
-                                                   std::size_t offset, u64 size,
-                                                   MemoryPermission permissions,
-                                                   MemoryPermission other_permissions,
-                                                   std::string name = "Unknown Applet");
+    static SharedPtr<SharedMemory> CreateForApplet(
+        KernelCore& kernel, std::shared_ptr<Kernel::PhysicalMemory> heap_block, std::size_t offset,
+        u64 size, MemoryPermission permissions, MemoryPermission other_permissions,
+        std::string name = "Unknown Applet");
 
     std::string GetTypeName() const override {
         return "SharedMemory";
@@ -135,7 +134,7 @@ private:
     ~SharedMemory() override;
 
     /// Backing memory for this shared memory block.
-    std::shared_ptr<std::vector<u8>> backing_block;
+    std::shared_ptr<PhysicalMemory> backing_block;
     /// Offset into the backing block for this shared memory.
     std::size_t backing_block_offset = 0;
     /// Size of the memory block. Page-aligned.
diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp
index 26c4e5e674930969f2cd05f98e5c086dd21e7be8..1113c815ec309c8ce1530dd1eac6806d4f45e142 100644
--- a/src/core/hle/kernel/transfer_memory.cpp
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -47,7 +47,7 @@ ResultCode TransferMemory::MapMemory(VAddr address, u64 size, MemoryPermission p
         return ERR_INVALID_STATE;
     }
 
-    backing_block = std::make_shared<std::vector<u8>>(size);
+    backing_block = std::make_shared<PhysicalMemory>(size);
 
     const auto map_state = owner_permissions == MemoryPermission::None
                                ? MemoryState::TransferMemoryIsolated
diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h
index a140b1e2bb04cc168b0c3d0dcefcabde4c453cd1..6be9dc0946ede649995d006c10aa7a1412f01cde 100644
--- a/src/core/hle/kernel/transfer_memory.h
+++ b/src/core/hle/kernel/transfer_memory.h
@@ -8,6 +8,7 @@
 #include <vector>
 
 #include "core/hle/kernel/object.h"
+#include "core/hle/kernel/physical_memory.h"
 
 union ResultCode;
 
@@ -82,7 +83,7 @@ private:
     ~TransferMemory() override;
 
     /// Memory block backing this instance.
-    std::shared_ptr<std::vector<u8>> backing_block;
+    std::shared_ptr<PhysicalMemory> backing_block;
 
     /// The base address for the memory managed by this instance.
     VAddr base_address = 0;
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 4f45fb03b3c01fc7eb6d498687c5423e9a400379..40cea1e7cc4eb1a4484cdec6be1fa07875cfd85c 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include <iterator>
 #include <utility>
+#include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "common/memory_hook.h"
@@ -103,7 +104,7 @@ bool VMManager::IsValidHandle(VMAHandle handle) const {
 }
 
 ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
-                                                          std::shared_ptr<std::vector<u8>> block,
+                                                          std::shared_ptr<PhysicalMemory> block,
                                                           std::size_t offset, u64 size,
                                                           MemoryState state, VMAPermission perm) {
     ASSERT(block != nullptr);
@@ -260,7 +261,7 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
 
     if (heap_memory == nullptr) {
         // Initialize heap
-        heap_memory = std::make_shared<std::vector<u8>>(size);
+        heap_memory = std::make_shared<PhysicalMemory>(size);
         heap_end = heap_region_base + size;
     } else {
         UnmapRange(heap_region_base, GetCurrentHeapSize());
@@ -341,7 +342,7 @@ ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) {
             const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr);
             if (vma.state == MemoryState::Unmapped) {
                 const auto map_res =
-                    MapMemoryBlock(cur_addr, std::make_shared<std::vector<u8>>(map_size, 0), 0,
+                    MapMemoryBlock(cur_addr, std::make_shared<PhysicalMemory>(map_size, 0), 0,
                                    map_size, MemoryState::Heap, VMAPermission::ReadWrite);
                 result = map_res.Code();
                 if (result.IsError()) {
@@ -442,7 +443,7 @@ ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) {
     if (result.IsError()) {
         for (const auto [map_address, map_size] : unmapped_regions) {
             const auto remap_res =
-                MapMemoryBlock(map_address, std::make_shared<std::vector<u8>>(map_size, 0), 0,
+                MapMemoryBlock(map_address, std::make_shared<PhysicalMemory>(map_size, 0), 0,
                                map_size, MemoryState::Heap, VMAPermission::None);
             ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error");
         }
@@ -593,7 +594,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem
     ASSERT_MSG(vma_offset + size <= vma->second.size,
                "Shared memory exceeds bounds of mapped block");
 
-    const std::shared_ptr<std::vector<u8>>& backing_block = vma->second.backing_block;
+    const std::shared_ptr<PhysicalMemory>& backing_block = vma->second.backing_block;
     const std::size_t backing_block_offset = vma->second.offset + vma_offset;
 
     CASCADE_RESULT(auto new_vma,
@@ -606,7 +607,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem
     return RESULT_SUCCESS;
 }
 
-void VMManager::RefreshMemoryBlockMappings(const std::vector<u8>* block) {
+void VMManager::RefreshMemoryBlockMappings(const PhysicalMemory* block) {
     // If this ever proves to have a noticeable performance impact, allow users of the function to
     // specify a specific range of addresses to limit the scan to.
     for (const auto& p : vma_map) {
@@ -764,7 +765,7 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre
                                        right.backing_block->begin() + right.offset + right.size);
         } else {
             // Slow case: make a new memory block for left and right.
-            auto new_memory = std::make_shared<std::vector<u8>>();
+            auto new_memory = std::make_shared<PhysicalMemory>();
             new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset,
                                left.backing_block->begin() + left.offset + left.size);
             new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset,
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 0aecb749913919b028574cf6f382ae261eefac0f..b18cde6197ca3bb4824cf369aa3d96614cc368e1 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -11,6 +11,7 @@
 #include "common/common_types.h"
 #include "common/memory_hook.h"
 #include "common/page_table.h"
+#include "core/hle/kernel/physical_memory.h"
 #include "core/hle/result.h"
 #include "core/memory.h"
 
@@ -290,7 +291,7 @@ struct VirtualMemoryArea {
 
     // Settings for type = AllocatedMemoryBlock
     /// Memory block backing this VMA.
-    std::shared_ptr<std::vector<u8>> backing_block = nullptr;
+    std::shared_ptr<PhysicalMemory> backing_block = nullptr;
     /// Offset into the backing_memory the mapping starts from.
     std::size_t offset = 0;
 
@@ -348,7 +349,7 @@ public:
      * @param size Size of the mapping.
      * @param state MemoryState tag to attach to the VMA.
      */
-    ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block,
+    ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<PhysicalMemory> block,
                                         std::size_t offset, u64 size, MemoryState state,
                                         VMAPermission perm = VMAPermission::ReadWrite);
 
@@ -547,7 +548,7 @@ public:
      * Scans all VMAs and updates the page table range of any that use the given vector as backing
      * memory. This should be called after any operation that causes reallocation of the vector.
      */
-    void RefreshMemoryBlockMappings(const std::vector<u8>* block);
+    void RefreshMemoryBlockMappings(const PhysicalMemory* block);
 
     /// Dumps the address space layout to the log, for debugging
     void LogLayout() const;
@@ -777,7 +778,7 @@ private:
     // the entire virtual address space extents that bound the allocations, including any holes.
     // This makes deallocation and reallocation of holes fast and keeps process memory contiguous
     // in the emulator address space, allowing Memory::GetPointer to be reasonably safe.
-    std::shared_ptr<std::vector<u8>> heap_memory;
+    std::shared_ptr<PhysicalMemory> heap_memory;
 
     // The end of the currently allocated heap. This is not an inclusive
     // end of the range. This is essentially 'base_address + current_size'.
diff --git a/src/core/hle/service/ns/pl_u.cpp b/src/core/hle/service/ns/pl_u.cpp
index ad176f89dfcfec91f1e62a54989c5badebe4ff81..2a522136d0c715e9a7c48f7df12dc30f61be8924 100644
--- a/src/core/hle/service/ns/pl_u.cpp
+++ b/src/core/hle/service/ns/pl_u.cpp
@@ -77,7 +77,7 @@ enum class LoadState : u32 {
     Done = 1,
 };
 
-static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output,
+static void DecryptSharedFont(const std::vector<u32>& input, Kernel::PhysicalMemory& output,
                               std::size_t& offset) {
     ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE,
                "Shared fonts exceeds 17mb!");
@@ -94,7 +94,7 @@ static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& ou
     offset += transformed_font.size() * sizeof(u32);
 }
 
-static void EncryptSharedFont(const std::vector<u8>& input, std::vector<u8>& output,
+static void EncryptSharedFont(const std::vector<u8>& input, Kernel::PhysicalMemory& output,
                               std::size_t& offset) {
     ASSERT_MSG(offset + input.size() + 8 < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!");
     const u32 KEY = EXPECTED_MAGIC ^ EXPECTED_RESULT;
@@ -121,7 +121,7 @@ struct PL_U::Impl {
         return shared_font_regions.at(index);
     }
 
-    void BuildSharedFontsRawRegions(const std::vector<u8>& input) {
+    void BuildSharedFontsRawRegions(const Kernel::PhysicalMemory& input) {
         // As we can derive the xor key we can just populate the offsets
         // based on the shared memory dump
         unsigned cur_offset = 0;
@@ -144,7 +144,7 @@ struct PL_U::Impl {
     Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem;
 
     /// Backing memory for the shared font data
-    std::shared_ptr<std::vector<u8>> shared_font;
+    std::shared_ptr<Kernel::PhysicalMemory> shared_font;
 
     // Automatically populated based on shared_fonts dump or system archives.
     std::vector<FontRegion> shared_font_regions;
@@ -166,7 +166,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} {
     // Rebuild shared fonts from data ncas
     if (nand->HasEntry(static_cast<u64>(FontArchives::Standard),
                        FileSys::ContentRecordType::Data)) {
-        impl->shared_font = std::make_shared<std::vector<u8>>(SHARED_FONT_MEM_SIZE);
+        impl->shared_font = std::make_shared<Kernel::PhysicalMemory>(SHARED_FONT_MEM_SIZE);
         for (auto font : SHARED_FONTS) {
             const auto nca =
                 nand->GetEntry(static_cast<u64>(font.first), FileSys::ContentRecordType::Data);
@@ -207,7 +207,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} {
         }
 
     } else {
-        impl->shared_font = std::make_shared<std::vector<u8>>(
+        impl->shared_font = std::make_shared<Kernel::PhysicalMemory>(
             SHARED_FONT_MEM_SIZE); // Shared memory needs to always be allocated and a fixed size
 
         const std::string user_path = FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir);
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index 6d4b0237586db0949a0f95afc415c7c6cfcec95a..f1795fdd6213d172e575b0389d2889df85fb81f4 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -295,7 +295,7 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) {
         }
     }
 
-    std::vector<u8> program_image(total_image_size);
+    Kernel::PhysicalMemory program_image(total_image_size);
     std::size_t current_image_position = 0;
 
     Kernel::CodeSet codeset;
diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp
index 70051c13ae308f0345d8a75e3bce74b2f0ec29ce..474b55cb13f207941c908185c168347b4b95694f 100644
--- a/src/core/loader/kip.cpp
+++ b/src/core/loader/kip.cpp
@@ -69,7 +69,7 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) {
 
     const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
     Kernel::CodeSet codeset;
-    std::vector<u8> program_image;
+    Kernel::PhysicalMemory program_image;
 
     const auto load_segment = [&program_image](Kernel::CodeSet::Segment& segment,
                                                const std::vector<u8>& data, u32 offset) {
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index 6a0ca389b0fc590252e1f72dce3dd633d27b2c91..e92e2e06eab3100b683f8f0703c1d7d7a88a7d51 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -143,7 +143,7 @@ static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data,
     }
 
     // Build program image
-    std::vector<u8> program_image(PageAlignSize(nro_header.file_size));
+    Kernel::PhysicalMemory program_image(PageAlignSize(nro_header.file_size));
     std::memcpy(program_image.data(), data.data(), program_image.size());
     if (program_image.size() != PageAlignSize(nro_header.file_size)) {
         return {};
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 29311404a742d21e335da7d46f5321a8d6fa5b6d..70c90109f38503191f060cb9c8a6fa19ed1f9165 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -89,7 +89,7 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
 
     // Build program image
     Kernel::CodeSet codeset;
-    std::vector<u8> program_image;
+    Kernel::PhysicalMemory program_image;
     for (std::size_t i = 0; i < nso_header.segments.size(); ++i) {
         std::vector<u8> data =
             file.ReadBytes(nso_header.segments_compressed_size[i], nso_header.segments[i].offset);