diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index a41e22f4a5ac2711b09441548aec3df22c76f482..821d2f8833da3dcf961612a34d51ee4e565140e4 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -4,6 +4,8 @@ add_library(core STATIC
     arm/unicorn/arm_unicorn.h
     core.cpp
     core.h
+    core_cpu.cpp
+    core_cpu.h
     core_timing.cpp
     core_timing.h
     file_sys/directory.h
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index 57492213093ea7be9bd41fe2d9a12e69412d63d5..c0cc62f03a6ad8379931a24d1e8189492cdbd5dd 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -52,7 +52,7 @@ static void InterruptHook(uc_engine* uc, u32 intNo, void* user_data) {
 static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value,
                                void* user_data) {
     ARM_Interface::ThreadContext ctx{};
-    Core::CPU().SaveContext(ctx);
+    Core::CurrentArmInterface().SaveContext(ctx);
     ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr,
                ctx.pc, ctx.cpu_registers[30]);
     return {};
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 9e2229d02ea9225185da62012af441ec4d7c7d3d..84ab876cc19e296fa0ba121e60e74211cee4e84b 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -5,10 +5,6 @@
 #include <memory>
 #include <utility>
 #include "common/logging/log.h"
-#ifdef ARCHITECTURE_x86_64
-#include "core/arm/dynarmic/arm_dynarmic.h"
-#endif
-#include "core/arm/unicorn/arm_unicorn.h"
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/gdbstub/gdbstub.h"
@@ -31,11 +27,31 @@ namespace Core {
 
 System::~System() = default;
 
+/// Runs a CPU core while the system is powered on
+static void RunCpuCore(std::shared_ptr<Cpu> cpu_state) {
+    while (Core::System().GetInstance().IsPoweredOn()) {
+        cpu_state->RunLoop(true);
+    }
+}
+
+Cpu& System::CurrentCpuCore() {
+    // If multicore is enabled, use host thread to figure out the current CPU core
+    if (Settings::values.use_multi_core) {
+        const auto& search = thread_to_cpu.find(std::this_thread::get_id());
+        ASSERT(search != thread_to_cpu.end());
+        ASSERT(search->second);
+        return *search->second;
+    }
+
+    // Otherwise, use single-threaded mode active_core variable
+    return *cpu_cores[active_core];
+}
+
 System::ResultStatus System::RunLoop(bool tight_loop) {
     status = ResultStatus::Success;
-    if (!cpu_core) {
-        return ResultStatus::ErrorNotInitialized;
-    }
+
+    // Update thread_to_cpu in case Core 0 is run from a different host thread
+    thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0];
 
     if (GDBStub::IsServerEnabled()) {
         GDBStub::HandlePacket();
@@ -52,25 +68,14 @@ System::ResultStatus System::RunLoop(bool tight_loop) {
         }
     }
 
-    // If we don't have a currently active thread then don't execute instructions,
-    // instead advance to the next event and try to yield to the next thread
-    if (Kernel::GetCurrentThread() == nullptr) {
-        NGLOG_TRACE(Core_ARM, "Idling");
-        CoreTiming::Idle();
-        CoreTiming::Advance();
-        PrepareReschedule();
-    } else {
-        CoreTiming::Advance();
-        if (tight_loop) {
-            cpu_core->Run();
-        } else {
-            cpu_core->Step();
+    for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) {
+        cpu_cores[active_core]->RunLoop(tight_loop);
+        if (Settings::values.use_multi_core) {
+            // Cores 1-3 are run on other threads in this mode
+            break;
         }
     }
 
-    HW::Update();
-    Reschedule();
-
     return status;
 }
 
@@ -133,21 +138,26 @@ System::ResultStatus System::Load(EmuWindow* emu_window, const std::string& file
 }
 
 void System::PrepareReschedule() {
-    cpu_core->PrepareReschedule();
-    reschedule_pending = true;
+    CurrentCpuCore().PrepareReschedule();
 }
 
 PerfStats::Results System::GetAndResetPerfStats() {
     return perf_stats.GetAndResetStats(CoreTiming::GetGlobalTimeUs());
 }
 
-void System::Reschedule() {
-    if (!reschedule_pending) {
-        return;
-    }
+const std::shared_ptr<Kernel::Scheduler>& System::Scheduler(size_t core_index) {
+    ASSERT(core_index < NUM_CPU_CORES);
+    return cpu_cores[core_index]->Scheduler();
+}
 
-    reschedule_pending = false;
-    Core::System::GetInstance().Scheduler().Reschedule();
+ARM_Interface& System::ArmInterface(size_t core_index) {
+    ASSERT(core_index < NUM_CPU_CORES);
+    return cpu_cores[core_index]->ArmInterface();
+}
+
+Cpu& System::CpuCore(size_t core_index) {
+    ASSERT(core_index < NUM_CPU_CORES);
+    return *cpu_cores[core_index];
 }
 
 System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
@@ -157,26 +167,17 @@ System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
 
     current_process = Kernel::Process::Create("main");
 
-    if (Settings::values.use_cpu_jit) {
-#ifdef ARCHITECTURE_x86_64
-        cpu_core = std::make_shared<ARM_Dynarmic>();
-#else
-        cpu_core = std::make_shared<ARM_Unicorn>();
-        NGLOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
-#endif
-    } else {
-        cpu_core = std::make_shared<ARM_Unicorn>();
+    cpu_barrier = std::make_shared<CpuBarrier>();
+    for (size_t index = 0; index < cpu_cores.size(); ++index) {
+        cpu_cores[index] = std::make_shared<Cpu>(cpu_barrier, index);
     }
 
     gpu_core = std::make_unique<Tegra::GPU>();
-
     telemetry_session = std::make_unique<Core::TelemetrySession>();
-
     service_manager = std::make_shared<Service::SM::ServiceManager>();
 
     HW::Init();
     Kernel::Init(system_mode);
-    scheduler = std::make_unique<Kernel::Scheduler>(cpu_core.get());
     Service::Init(service_manager);
     GDBStub::Init();
 
@@ -184,6 +185,17 @@ System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
         return ResultStatus::ErrorVideoCore;
     }
 
+    // Create threads for CPU cores 1-3, and build thread_to_cpu map
+    // CPU core 0 is run on the main thread
+    thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0];
+    if (Settings::values.use_multi_core) {
+        for (size_t index = 0; index < cpu_core_threads.size(); ++index) {
+            cpu_core_threads[index] =
+                std::make_unique<std::thread>(RunCpuCore, cpu_cores[index + 1]);
+            thread_to_cpu[cpu_core_threads[index]->get_id()] = cpu_cores[index + 1];
+        }
+    }
+
     NGLOG_DEBUG(Core, "Initialized OK");
 
     // Reset counters and set time origin to current frame
@@ -207,15 +219,30 @@ void System::Shutdown() {
     VideoCore::Shutdown();
     GDBStub::Shutdown();
     Service::Shutdown();
-    scheduler.reset();
     Kernel::Shutdown();
     HW::Shutdown();
     service_manager.reset();
     telemetry_session.reset();
     gpu_core.reset();
-    cpu_core.reset();
+
+    // Close all CPU/threading state
+    cpu_barrier->NotifyEnd();
+    if (Settings::values.use_multi_core) {
+        for (auto& thread : cpu_core_threads) {
+            thread->join();
+            thread.reset();
+        }
+    }
+    thread_to_cpu.clear();
+    for (auto& cpu_core : cpu_cores) {
+        cpu_core.reset();
+    }
+    cpu_barrier.reset();
+
+    // Close core timing
     CoreTiming::Shutdown();
 
+    // Close app loader
     app_loader.reset();
 
     NGLOG_DEBUG(Core, "Shutdown OK");
diff --git a/src/core/core.h b/src/core/core.h
index f81cbfb3ccfc390447146552896d8267fea0b1d0..f90f085ad10976efef83b9a8120429514c9d41bb 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -4,9 +4,12 @@
 
 #pragma once
 
+#include <array>
 #include <memory>
 #include <string>
+#include <thread>
 #include "common/common_types.h"
+#include "core/core_cpu.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/scheduler.h"
 #include "core/loader/loader.h"
@@ -89,7 +92,7 @@ public:
      * @returns True if the emulated system is powered on, otherwise false.
      */
     bool IsPoweredOn() const {
-        return cpu_core != nullptr;
+        return cpu_barrier && cpu_barrier->IsAlive();
     }
 
     /**
@@ -103,24 +106,34 @@ public:
     /// Prepare the core emulation for a reschedule
     void PrepareReschedule();
 
+    /// Gets and resets core performance statistics
     PerfStats::Results GetAndResetPerfStats();
 
-    /**
-     * Gets a reference to the emulated CPU.
-     * @returns A reference to the emulated CPU.
-     */
-    ARM_Interface& CPU() {
-        return *cpu_core;
+    /// Gets an ARM interface to the CPU core that is currently running
+    ARM_Interface& CurrentArmInterface() {
+        return CurrentCpuCore().ArmInterface();
     }
 
+    /// Gets an ARM interface to the CPU core with the specified index
+    ARM_Interface& ArmInterface(size_t core_index);
+
+    /// Gets a CPU interface to the CPU core with the specified index
+    Cpu& CpuCore(size_t core_index);
+
+    /// Gets the GPU interface
     Tegra::GPU& GPU() {
         return *gpu_core;
     }
 
-    Kernel::Scheduler& Scheduler() {
-        return *scheduler;
+    /// Gets the scheduler for the CPU core that is currently running
+    Kernel::Scheduler& CurrentScheduler() {
+        return *CurrentCpuCore().Scheduler();
     }
 
+    /// Gets the scheduler for the CPU core with the specified index
+    const std::shared_ptr<Kernel::Scheduler>& Scheduler(size_t core_index);
+
+    /// Gets the current process
     Kernel::SharedPtr<Kernel::Process>& CurrentProcess() {
         return current_process;
     }
@@ -155,6 +168,9 @@ public:
     }
 
 private:
+    /// Returns the currently running CPU core
+    Cpu& CurrentCpuCore();
+
     /**
      * Initialize the emulated system.
      * @param emu_window Pointer to the host-system window used for video output and keyboard input.
@@ -163,22 +179,15 @@ private:
      */
     ResultStatus Init(EmuWindow* emu_window, u32 system_mode);
 
-    /// Reschedule the core emulation
-    void Reschedule();
-
     /// AppLoader used to load the current executing application
     std::unique_ptr<Loader::AppLoader> app_loader;
-
-    std::shared_ptr<ARM_Interface> cpu_core;
-    std::unique_ptr<Kernel::Scheduler> scheduler;
     std::unique_ptr<Tegra::GPU> gpu_core;
-
     std::shared_ptr<Tegra::DebugContext> debug_context;
-
     Kernel::SharedPtr<Kernel::Process> current_process;
-
-    /// When true, signals that a reschedule should happen
-    bool reschedule_pending{};
+    std::shared_ptr<CpuBarrier> cpu_barrier;
+    std::array<std::shared_ptr<Cpu>, NUM_CPU_CORES> cpu_cores;
+    std::array<std::unique_ptr<std::thread>, NUM_CPU_CORES - 1> cpu_core_threads;
+    size_t active_core{}; ///< Active core, only used in single thread mode
 
     /// Service manager
     std::shared_ptr<Service::SM::ServiceManager> service_manager;
@@ -190,10 +199,13 @@ private:
 
     ResultStatus status = ResultStatus::Success;
     std::string status_details = "";
+
+    /// Map of guest threads to CPU cores
+    std::map<std::thread::id, std::shared_ptr<Cpu>> thread_to_cpu;
 };
 
-inline ARM_Interface& CPU() {
-    return System::GetInstance().CPU();
+inline ARM_Interface& CurrentArmInterface() {
+    return System::GetInstance().CurrentArmInterface();
 }
 
 inline TelemetrySession& Telemetry() {
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..099f2bb1ab0a9fdda49a2e99ae3fdc5461bb1b03
--- /dev/null
+++ b/src/core/core_cpu.cpp
@@ -0,0 +1,119 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <condition_variable>
+#include <mutex>
+
+#include "common/logging/log.h"
+#ifdef ARCHITECTURE_x86_64
+#include "core/arm/dynarmic/arm_dynarmic.h"
+#endif
+#include "core/arm/unicorn/arm_unicorn.h"
+#include "core/core_cpu.h"
+#include "core/core_timing.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/scheduler.h"
+#include "core/hle/kernel/thread.h"
+#include "core/settings.h"
+
+namespace Core {
+
+void CpuBarrier::NotifyEnd() {
+    std::unique_lock<std::mutex> lock(mutex);
+    end = true;
+    condition.notify_all();
+}
+
+bool CpuBarrier::Rendezvous() {
+    if (!Settings::values.use_multi_core) {
+        // Meaningless when running in single-core mode
+        return true;
+    }
+
+    if (!end) {
+        std::unique_lock<std::mutex> lock(mutex);
+
+        --cores_waiting;
+        if (!cores_waiting) {
+            cores_waiting = NUM_CPU_CORES;
+            condition.notify_all();
+            return true;
+        }
+
+        condition.wait(lock);
+        return true;
+    }
+
+    return false;
+}
+
+Cpu::Cpu(std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index)
+    : cpu_barrier{std::move(cpu_barrier)}, core_index{core_index} {
+
+    if (Settings::values.use_cpu_jit) {
+#ifdef ARCHITECTURE_x86_64
+        arm_interface = std::make_shared<ARM_Dynarmic>();
+#else
+        cpu_core = std::make_shared<ARM_Unicorn>();
+        NGLOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
+#endif
+    } else {
+        arm_interface = std::make_shared<ARM_Unicorn>();
+    }
+
+    scheduler = std::make_shared<Kernel::Scheduler>(arm_interface.get());
+}
+
+void Cpu::RunLoop(bool tight_loop) {
+    // Wait for all other CPU cores to complete the previous slice, such that they run in lock-step
+    if (!cpu_barrier->Rendezvous()) {
+        // If rendezvous failed, session has been killed
+        return;
+    }
+
+    // If we don't have a currently active thread then don't execute instructions,
+    // instead advance to the next event and try to yield to the next thread
+    if (Kernel::GetCurrentThread() == nullptr) {
+        NGLOG_TRACE(Core, "Core-{} idling", core_index);
+
+        if (IsMainCore()) {
+            CoreTiming::Idle();
+            CoreTiming::Advance();
+        }
+
+        PrepareReschedule();
+    } else {
+        if (IsMainCore()) {
+            CoreTiming::Advance();
+        }
+
+        if (tight_loop) {
+            arm_interface->Run();
+        } else {
+            arm_interface->Step();
+        }
+    }
+
+    Reschedule();
+}
+
+void Cpu::SingleStep() {
+    return RunLoop(false);
+}
+
+void Cpu::PrepareReschedule() {
+    arm_interface->PrepareReschedule();
+    reschedule_pending = true;
+}
+
+void Cpu::Reschedule() {
+    if (!reschedule_pending) {
+        return;
+    }
+
+    reschedule_pending = false;
+    scheduler->Reschedule();
+}
+
+} // namespace Core
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
new file mode 100644
index 0000000000000000000000000000000000000000..243f0b5e74c24e7df1fb1550f0fc966026ad5d45
--- /dev/null
+++ b/src/core/core_cpu.h
@@ -0,0 +1,78 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+#include <condition_variable>
+#include <memory>
+#include <mutex>
+#include <string>
+#include "common/common_types.h"
+
+class ARM_Interface;
+
+namespace Kernel {
+class Scheduler;
+}
+
+namespace Core {
+
+constexpr unsigned NUM_CPU_CORES{4};
+
+class CpuBarrier {
+public:
+    bool IsAlive() const {
+        return !end;
+    }
+
+    void NotifyEnd();
+
+    bool Rendezvous();
+
+private:
+    unsigned cores_waiting{NUM_CPU_CORES};
+    std::mutex mutex;
+    std::condition_variable condition;
+    std::atomic<bool> end{};
+};
+
+class Cpu {
+public:
+    Cpu(std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index);
+
+    void RunLoop(bool tight_loop = true);
+
+    void SingleStep();
+
+    void PrepareReschedule();
+
+    ARM_Interface& ArmInterface() {
+        return *arm_interface;
+    }
+
+    const ARM_Interface& ArmInterface() const {
+        return *arm_interface;
+    }
+
+    const std::shared_ptr<Kernel::Scheduler>& Scheduler() const {
+        return scheduler;
+    }
+
+    bool IsMainCore() const {
+        return core_index == 0;
+    }
+
+private:
+    void Reschedule();
+
+    std::shared_ptr<ARM_Interface> arm_interface;
+    std::shared_ptr<CpuBarrier> cpu_barrier;
+    std::shared_ptr<Kernel::Scheduler> scheduler;
+
+    bool reschedule_pending{};
+    size_t core_index;
+};
+
+} // namespace Core
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index 46606b992240ceb34235a9b8e5f48cb708500bdc..6c5a40ba8c03647919dd90d66eb17b98d4988983 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -598,11 +598,11 @@ static void ReadRegister() {
     }
 
     if (id <= SP_REGISTER) {
-        LongToGdbHex(reply, Core::CPU().GetReg(static_cast<int>(id)));
+        LongToGdbHex(reply, Core::CurrentArmInterface().GetReg(static_cast<int>(id)));
     } else if (id == PC_REGISTER) {
-        LongToGdbHex(reply, Core::CPU().GetPC());
+        LongToGdbHex(reply, Core::CurrentArmInterface().GetPC());
     } else if (id == CPSR_REGISTER) {
-        IntToGdbHex(reply, Core::CPU().GetCPSR());
+        IntToGdbHex(reply, Core::CurrentArmInterface().GetCPSR());
     } else {
         return SendReply("E01");
     }
@@ -618,16 +618,16 @@ static void ReadRegisters() {
     u8* bufptr = buffer;
 
     for (int reg = 0; reg <= SP_REGISTER; reg++) {
-        LongToGdbHex(bufptr + reg * 16, Core::CPU().GetReg(reg));
+        LongToGdbHex(bufptr + reg * 16, Core::CurrentArmInterface().GetReg(reg));
     }
 
     bufptr += (32 * 16);
 
-    LongToGdbHex(bufptr, Core::CPU().GetPC());
+    LongToGdbHex(bufptr, Core::CurrentArmInterface().GetPC());
 
     bufptr += 16;
 
-    IntToGdbHex(bufptr, Core::CPU().GetCPSR());
+    IntToGdbHex(bufptr, Core::CurrentArmInterface().GetCPSR());
 
     bufptr += 8;
 
@@ -646,11 +646,11 @@ static void WriteRegister() {
     }
 
     if (id <= SP_REGISTER) {
-        Core::CPU().SetReg(id, GdbHexToLong(buffer_ptr));
+        Core::CurrentArmInterface().SetReg(id, GdbHexToLong(buffer_ptr));
     } else if (id == PC_REGISTER) {
-        Core::CPU().SetPC(GdbHexToLong(buffer_ptr));
+        Core::CurrentArmInterface().SetPC(GdbHexToLong(buffer_ptr));
     } else if (id == CPSR_REGISTER) {
-        Core::CPU().SetCPSR(GdbHexToInt(buffer_ptr));
+        Core::CurrentArmInterface().SetCPSR(GdbHexToInt(buffer_ptr));
     } else {
         return SendReply("E01");
     }
@@ -667,11 +667,11 @@ static void WriteRegisters() {
 
     for (int i = 0, reg = 0; reg <= CPSR_REGISTER; i++, reg++) {
         if (reg <= SP_REGISTER) {
-            Core::CPU().SetReg(reg, GdbHexToLong(buffer_ptr + i * 16));
+            Core::CurrentArmInterface().SetReg(reg, GdbHexToLong(buffer_ptr + i * 16));
         } else if (reg == PC_REGISTER) {
-            Core::CPU().SetPC(GdbHexToLong(buffer_ptr + i * 16));
+            Core::CurrentArmInterface().SetPC(GdbHexToLong(buffer_ptr + i * 16));
         } else if (reg == CPSR_REGISTER) {
-            Core::CPU().SetCPSR(GdbHexToInt(buffer_ptr + i * 16));
+            Core::CurrentArmInterface().SetCPSR(GdbHexToInt(buffer_ptr + i * 16));
         } else {
             UNIMPLEMENTED();
         }
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index ff6a0941a2dc42d2a13e88da61fac0271233db41..9cb9e0e5c945121a9601ab29b6ff362306da5dee 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -9,6 +9,8 @@
 
 namespace Kernel {
 
+std::mutex Scheduler::scheduler_mutex;
+
 Scheduler::Scheduler(ARM_Interface* cpu_core) : cpu_core(cpu_core) {}
 
 Scheduler::~Scheduler() {
@@ -18,6 +20,7 @@ Scheduler::~Scheduler() {
 }
 
 bool Scheduler::HaveReadyThreads() {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
     return ready_queue.get_first() != nullptr;
 }
 
@@ -90,6 +93,8 @@ void Scheduler::SwitchContext(Thread* new_thread) {
 }
 
 void Scheduler::Reschedule() {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
     Thread* cur = GetCurrentThread();
     Thread* next = PopNextReadyThread();
 
@@ -105,26 +110,36 @@ void Scheduler::Reschedule() {
 }
 
 void Scheduler::AddThread(SharedPtr<Thread> thread, u32 priority) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
     thread_list.push_back(thread);
     ready_queue.prepare(priority);
 }
 
 void Scheduler::RemoveThread(Thread* thread) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
     thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
                       thread_list.end());
 }
 
 void Scheduler::ScheduleThread(Thread* thread, u32 priority) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
     ASSERT(thread->status == THREADSTATUS_READY);
     ready_queue.push_back(priority, thread);
 }
 
 void Scheduler::UnscheduleThread(Thread* thread, u32 priority) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
     ASSERT(thread->status == THREADSTATUS_READY);
     ready_queue.remove(priority, thread);
 }
 
 void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
     // If thread was ready, adjust queues
     if (thread->status == THREADSTATUS_READY)
         ready_queue.move(thread, thread->current_priority, priority);
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 27d0247d6550673e33a5019ae82e7112e36ed907..a3b5fb8ca60db9738cf1090d2feb88aee434d289 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <mutex>
 #include <vector>
 #include "common/common_types.h"
 #include "common/thread_queue_list.h"
@@ -68,6 +69,8 @@ private:
     SharedPtr<Thread> current_thread = nullptr;
 
     ARM_Interface* cpu_core;
+
+    static std::mutex scheduler_mutex;
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 72b5c05f28a73302e7c146d2275b244d996923f4..1ae530c904599a2925258f231a043bdd4f446d59 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -401,8 +401,8 @@ static ResultCode SetThreadPriority(Handle handle, u32 priority) {
 
 /// Get which CPU core is executing the current thread
 static u32 GetCurrentProcessorNumber() {
-    NGLOG_WARNING(Kernel_SVC, "(STUBBED) called, defaulting to processor 0");
-    return 0;
+    NGLOG_TRACE(Kernel_SVC, "called");
+    return GetCurrentThread()->processor_id;
 }
 
 static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 size,
@@ -485,22 +485,28 @@ static void ExitProcess() {
 
     Core::CurrentProcess()->status = ProcessStatus::Exited;
 
-    // Stop all the process threads that are currently waiting for objects.
-    auto& thread_list = Core::System::GetInstance().Scheduler().GetThreadList();
-    for (auto& thread : thread_list) {
-        if (thread->owner_process != Core::CurrentProcess())
-            continue;
+    auto stop_threads = [](const std::vector<SharedPtr<Thread>>& thread_list) {
+        for (auto& thread : thread_list) {
+            if (thread->owner_process != Core::CurrentProcess())
+                continue;
 
-        if (thread == GetCurrentThread())
-            continue;
+            if (thread == GetCurrentThread())
+                continue;
 
-        // TODO(Subv): When are the other running/ready threads terminated?
-        ASSERT_MSG(thread->status == THREADSTATUS_WAIT_SYNCH_ANY ||
-                       thread->status == THREADSTATUS_WAIT_SYNCH_ALL,
-                   "Exiting processes with non-waiting threads is currently unimplemented");
+            // TODO(Subv): When are the other running/ready threads terminated?
+            ASSERT_MSG(thread->status == THREADSTATUS_WAIT_SYNCH_ANY ||
+                           thread->status == THREADSTATUS_WAIT_SYNCH_ALL,
+                       "Exiting processes with non-waiting threads is currently unimplemented");
 
-        thread->Stop();
-    }
+            thread->Stop();
+        }
+    };
+
+    auto& system = Core::System::GetInstance();
+    stop_threads(system.Scheduler(0)->GetThreadList());
+    stop_threads(system.Scheduler(1)->GetThreadList());
+    stop_threads(system.Scheduler(2)->GetThreadList());
+    stop_threads(system.Scheduler(3)->GetThreadList());
 
     // Kill the current thread
     GetCurrentThread()->Stop();
@@ -530,14 +536,9 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V
 
     switch (processor_id) {
     case THREADPROCESSORID_0:
-        break;
     case THREADPROCESSORID_1:
     case THREADPROCESSORID_2:
     case THREADPROCESSORID_3:
-        // TODO(bunnei): Implement support for other processor IDs
-        NGLOG_ERROR(Kernel_SVC,
-                    "Newly created thread must run in another thread ({}), unimplemented.",
-                    processor_id);
         break;
     default:
         ASSERT_MSG(false, "Unsupported thread processor ID: {}", processor_id);
@@ -576,7 +577,7 @@ static ResultCode StartThread(Handle thread_handle) {
 
 /// Called when a thread exits
 static void ExitThread() {
-    NGLOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CPU().GetPC());
+    NGLOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CurrentArmInterface().GetPC());
 
     ExitCurrentThread();
     Core::System::GetInstance().PrepareReschedule();
@@ -588,7 +589,7 @@ static void SleepThread(s64 nanoseconds) {
 
     // Don't attempt to yield execution if there are no available threads to run,
     // this way we avoid a useless reschedule to the idle thread.
-    if (nanoseconds == 0 && !Core::System::GetInstance().Scheduler().HaveReadyThreads())
+    if (nanoseconds == 0 && !Core::System::GetInstance().CurrentScheduler().HaveReadyThreads())
         return;
 
     // Sleep current thread and check for next thread to schedule
@@ -624,7 +625,7 @@ static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_var
 
     // Note: Deliberately don't attempt to inherit the lock owner's priority.
 
-    Core::System::GetInstance().PrepareReschedule();
+    Core::System::GetInstance().CpuCore(current_thread->processor_id).PrepareReschedule();
     return RESULT_SUCCESS;
 }
 
@@ -634,53 +635,60 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
                 condition_variable_addr, target);
 
     u32 processed = 0;
-    auto& thread_list = Core::System::GetInstance().Scheduler().GetThreadList();
-
-    for (auto& thread : thread_list) {
-        if (thread->condvar_wait_address != condition_variable_addr)
-            continue;
-
-        // Only process up to 'target' threads, unless 'target' is -1, in which case process
-        // them all.
-        if (target != -1 && processed >= target)
-            break;
-
-        // If the mutex is not yet acquired, acquire it.
-        u32 mutex_val = Memory::Read32(thread->mutex_wait_address);
-
-        if (mutex_val == 0) {
-            // We were able to acquire the mutex, resume this thread.
-            Memory::Write32(thread->mutex_wait_address, thread->wait_handle);
-            ASSERT(thread->status == THREADSTATUS_WAIT_MUTEX);
-            thread->ResumeFromWait();
-
-            auto lock_owner = thread->lock_owner;
-            if (lock_owner)
-                lock_owner->RemoveMutexWaiter(thread);
-
-            thread->lock_owner = nullptr;
-            thread->mutex_wait_address = 0;
-            thread->condvar_wait_address = 0;
-            thread->wait_handle = 0;
-        } else {
-            // Couldn't acquire the mutex, block the thread.
-            Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
-            auto owner = g_handle_table.Get<Thread>(owner_handle);
-            ASSERT(owner);
-            ASSERT(thread->status != THREADSTATUS_RUNNING);
-            thread->status = THREADSTATUS_WAIT_MUTEX;
-            thread->wakeup_callback = nullptr;
 
-            // Signal that the mutex now has a waiting thread.
-            Memory::Write32(thread->mutex_wait_address, mutex_val | Mutex::MutexHasWaitersFlag);
-
-            owner->AddMutexWaiter(thread);
-
-            Core::System::GetInstance().PrepareReschedule();
+    auto signal_process_wide_key = [&](size_t core_index) {
+        const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
+        for (auto& thread : scheduler->GetThreadList()) {
+            if (thread->condvar_wait_address != condition_variable_addr)
+                continue;
+
+            // Only process up to 'target' threads, unless 'target' is -1, in which case process
+            // them all.
+            if (target != -1 && processed >= target)
+                break;
+
+            // If the mutex is not yet acquired, acquire it.
+            u32 mutex_val = Memory::Read32(thread->mutex_wait_address);
+
+            if (mutex_val == 0) {
+                // We were able to acquire the mutex, resume this thread.
+                Memory::Write32(thread->mutex_wait_address, thread->wait_handle);
+                ASSERT(thread->status == THREADSTATUS_WAIT_MUTEX);
+                thread->ResumeFromWait();
+
+                auto lock_owner = thread->lock_owner;
+                if (lock_owner)
+                    lock_owner->RemoveMutexWaiter(thread);
+
+                thread->lock_owner = nullptr;
+                thread->mutex_wait_address = 0;
+                thread->condvar_wait_address = 0;
+                thread->wait_handle = 0;
+            } else {
+                // Couldn't acquire the mutex, block the thread.
+                Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
+                auto owner = g_handle_table.Get<Thread>(owner_handle);
+                ASSERT(owner);
+                ASSERT(thread->status != THREADSTATUS_RUNNING);
+                thread->status = THREADSTATUS_WAIT_MUTEX;
+                thread->wakeup_callback = nullptr;
+
+                // Signal that the mutex now has a waiting thread.
+                Memory::Write32(thread->mutex_wait_address, mutex_val | Mutex::MutexHasWaitersFlag);
+
+                owner->AddMutexWaiter(thread);
+
+                Core::System::GetInstance().CpuCore(thread->processor_id).PrepareReschedule();
+            }
+
+            ++processed;
         }
+    };
 
-        ++processed;
-    }
+    signal_process_wide_key(0);
+    signal_process_wide_key(1);
+    signal_process_wide_key(2);
+    signal_process_wide_key(3);
 
     return RESULT_SUCCESS;
 }
@@ -718,16 +726,31 @@ static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32
     return RESULT_SUCCESS;
 }
 
-static ResultCode GetThreadCoreMask(Handle handle, u32* mask, u64* unknown) {
-    NGLOG_WARNING(Kernel_SVC, "(STUBBED) called, handle=0x{:08X}", handle);
-    *mask = 0x0;
-    *unknown = 0xf;
+static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask) {
+    NGLOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle);
+
+    const SharedPtr<Thread> thread = g_handle_table.Get<Thread>(thread_handle);
+    if (!thread) {
+        return ERR_INVALID_HANDLE;
+    }
+
+    *core = thread->ideal_core;
+    *mask = thread->affinity_mask;
+
     return RESULT_SUCCESS;
 }
 
-static ResultCode SetThreadCoreMask(Handle handle, u32 mask, u64 unknown) {
-    NGLOG_WARNING(Kernel_SVC, "(STUBBED) called, handle=0x{:08X}, mask=0x{:08X}, unknown=0x{:X}",
-                  handle, mask, unknown);
+static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) {
+    NGLOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:08X}, core=0x{:X}", thread_handle,
+                mask, core);
+
+    const SharedPtr<Thread> thread = g_handle_table.Get<Thread>(thread_handle);
+    if (!thread) {
+        return ERR_INVALID_HANDLE;
+    }
+
+    thread->ChangeCore(core, mask);
+
     return RESULT_SUCCESS;
 }
 
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index c86ad3e04e503c31dfba301d7aea5938f9085672..40aa88cc1fd8d1613eed383dd3887c46baecd8e1 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -13,14 +13,14 @@
 
 namespace Kernel {
 
-#define PARAM(n) Core::CPU().GetReg(n)
+#define PARAM(n) Core::CurrentArmInterface().GetReg(n)
 
 /**
  * HLE a function return from the current ARM userland process
  * @param res Result to return
  */
 static inline void FuncReturn(u64 res) {
-    Core::CPU().SetReg(0, res);
+    Core::CurrentArmInterface().SetReg(0, res);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -45,7 +45,7 @@ template <ResultCode func(u32*, u32)>
 void SvcWrap() {
     u32 param_1 = 0;
     u32 retval = func(&param_1, (u32)PARAM(1)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
 
@@ -53,7 +53,7 @@ template <ResultCode func(u32*, u64)>
 void SvcWrap() {
     u32 param_1 = 0;
     u32 retval = func(&param_1, PARAM(1)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
 
@@ -66,7 +66,7 @@ template <ResultCode func(u64*, u64)>
 void SvcWrap() {
     u64 param_1 = 0;
     u32 retval = func(&param_1, PARAM(1)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
 
@@ -85,8 +85,8 @@ void SvcWrap() {
     u32 param_1 = 0;
     u64 param_2 = 0;
     ResultCode retval = func((u32)(PARAM(2) & 0xFFFFFFFF), &param_1, &param_2);
-    Core::CPU().SetReg(1, param_1);
-    Core::CPU().SetReg(2, param_2);
+    Core::CurrentArmInterface().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(2, param_2);
     FuncReturn(retval.raw);
 }
 
@@ -120,7 +120,7 @@ template <ResultCode func(u32*, u64, u64, s64)>
 void SvcWrap() {
     u32 param_1 = 0;
     ResultCode retval = func(&param_1, PARAM(1), (u32)(PARAM(2) & 0xFFFFFFFF), (s64)PARAM(3));
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval.raw);
 }
 
@@ -133,7 +133,7 @@ template <ResultCode func(u64*, u64, u64, u64)>
 void SvcWrap() {
     u64 param_1 = 0;
     u32 retval = func(&param_1, PARAM(1), PARAM(2), PARAM(3)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
 
@@ -143,7 +143,7 @@ void SvcWrap() {
     u32 retval =
         func(&param_1, PARAM(1), PARAM(2), PARAM(3), (u32)PARAM(4), (s32)(PARAM(5) & 0xFFFFFFFF))
             .raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
 
@@ -166,7 +166,7 @@ template <ResultCode func(u32*, u64, u64, u32)>
 void SvcWrap() {
     u32 param_1 = 0;
     u32 retval = func(&param_1, PARAM(1), PARAM(2), (u32)(PARAM(3) & 0xFFFFFFFF)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
 
@@ -175,7 +175,7 @@ void SvcWrap() {
     u32 param_1 = 0;
     u32 retval =
         func(&param_1, PARAM(1), (u32)(PARAM(2) & 0xFFFFFFFF), (u32)(PARAM(3) & 0xFFFFFFFF)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
 
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 1bd5d9ebf66af3c3a8415c61da3219f35704a3f2..46fcdefb8616d30ca2a3623d2b6ccf7b162130fd 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -64,7 +64,7 @@ void Thread::Stop() {
     // Clean up thread from ready queue
     // This is only needed when the thread is termintated forcefully (SVC TerminateProcess)
     if (status == THREADSTATUS_READY) {
-        Core::System::GetInstance().Scheduler().UnscheduleThread(this, current_priority);
+        scheduler->UnscheduleThread(this, current_priority);
     }
 
     status = THREADSTATUS_DEAD;
@@ -92,7 +92,7 @@ void WaitCurrentThread_Sleep() {
 void ExitCurrentThread() {
     Thread* thread = GetCurrentThread();
     thread->Stop();
-    Core::System::GetInstance().Scheduler().RemoveThread(thread);
+    Core::System::GetInstance().CurrentScheduler().RemoveThread(thread);
 }
 
 /**
@@ -154,6 +154,18 @@ void Thread::CancelWakeupTimer() {
     CoreTiming::UnscheduleEvent(ThreadWakeupEventType, callback_handle);
 }
 
+static boost::optional<s32> GetNextProcessorId(u64 mask) {
+    for (s32 index = 0; index < Core::NUM_CPU_CORES; ++index) {
+        if (mask & (1ULL << index)) {
+            if (!Core::System().GetInstance().Scheduler(index)->GetCurrentThread()) {
+                // Core is enabled and not running any threads, use this one
+                return index;
+            }
+        }
+    }
+    return {};
+}
+
 void Thread::ResumeFromWait() {
     ASSERT_MSG(wait_objects.empty(), "Thread is waking up while waiting for objects");
 
@@ -188,8 +200,37 @@ void Thread::ResumeFromWait() {
     wakeup_callback = nullptr;
 
     status = THREADSTATUS_READY;
-    Core::System::GetInstance().Scheduler().ScheduleThread(this, current_priority);
-    Core::System::GetInstance().PrepareReschedule();
+
+    boost::optional<s32> new_processor_id = GetNextProcessorId(affinity_mask);
+    if (!new_processor_id) {
+        new_processor_id = processor_id;
+    }
+    if (ideal_core != -1 &&
+        Core::System().GetInstance().Scheduler(ideal_core)->GetCurrentThread() == nullptr) {
+        new_processor_id = ideal_core;
+    }
+
+    ASSERT(*new_processor_id < 4);
+
+    // Add thread to new core's scheduler
+    auto& next_scheduler = Core::System().GetInstance().Scheduler(*new_processor_id);
+
+    if (*new_processor_id != processor_id) {
+        // Remove thread from previous core's scheduler
+        scheduler->RemoveThread(this);
+        next_scheduler->AddThread(this, current_priority);
+    }
+
+    processor_id = *new_processor_id;
+
+    // If the thread was ready, unschedule from the previous core and schedule on the new core
+    scheduler->UnscheduleThread(this, current_priority);
+    next_scheduler->ScheduleThread(this, current_priority);
+
+    // Change thread's scheduler
+    scheduler = next_scheduler;
+
+    Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule();
 }
 
 /**
@@ -259,8 +300,6 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
 
     SharedPtr<Thread> thread(new Thread);
 
-    Core::System::GetInstance().Scheduler().AddThread(thread, priority);
-
     thread->thread_id = NewThreadId();
     thread->status = THREADSTATUS_DORMANT;
     thread->entry_point = entry_point;
@@ -268,6 +307,8 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
     thread->nominal_priority = thread->current_priority = priority;
     thread->last_running_ticks = CoreTiming::GetTicks();
     thread->processor_id = processor_id;
+    thread->ideal_core = processor_id;
+    thread->affinity_mask = 1ULL << processor_id;
     thread->wait_objects.clear();
     thread->mutex_wait_address = 0;
     thread->condvar_wait_address = 0;
@@ -275,6 +316,8 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
     thread->name = std::move(name);
     thread->callback_handle = wakeup_callback_handle_table.Create(thread).Unwrap();
     thread->owner_process = owner_process;
+    thread->scheduler = Core::System().GetInstance().Scheduler(processor_id);
+    thread->scheduler->AddThread(thread, priority);
 
     // Find the next available TLS index, and mark it as used
     auto& tls_slots = owner_process->tls_slots;
@@ -337,7 +380,7 @@ void Thread::SetPriority(u32 priority) {
 }
 
 void Thread::BoostPriority(u32 priority) {
-    Core::System::GetInstance().Scheduler().SetThreadPriority(this, priority);
+    scheduler->SetThreadPriority(this, priority);
     current_priority = priority;
 }
 
@@ -406,7 +449,7 @@ void Thread::UpdatePriority() {
     if (new_priority == current_priority)
         return;
 
-    Core::System::GetInstance().Scheduler().SetThreadPriority(this, new_priority);
+    scheduler->SetThreadPriority(this, new_priority);
 
     current_priority = new_priority;
 
@@ -415,13 +458,54 @@ void Thread::UpdatePriority() {
         lock_owner->UpdatePriority();
 }
 
+void Thread::ChangeCore(u32 core, u64 mask) {
+    ideal_core = core;
+    mask = mask;
+
+    if (status != THREADSTATUS_READY) {
+        return;
+    }
+
+    boost::optional<s32> new_processor_id{GetNextProcessorId(mask)};
+
+    if (!new_processor_id) {
+        new_processor_id = processor_id;
+    }
+    if (ideal_core != -1 &&
+        Core::System().GetInstance().Scheduler(ideal_core)->GetCurrentThread() == nullptr) {
+        new_processor_id = ideal_core;
+    }
+
+    ASSERT(new_processor_id < 4);
+
+    // Add thread to new core's scheduler
+    auto& next_scheduler = Core::System().GetInstance().Scheduler(*new_processor_id);
+
+    if (*new_processor_id != processor_id) {
+        // Remove thread from previous core's scheduler
+        scheduler->RemoveThread(this);
+        next_scheduler->AddThread(this, current_priority);
+    }
+
+    processor_id = *new_processor_id;
+
+    // If the thread was ready, unschedule from the previous core and schedule on the new core
+    scheduler->UnscheduleThread(this, current_priority);
+    next_scheduler->ScheduleThread(this, current_priority);
+
+    // Change thread's scheduler
+    scheduler = next_scheduler;
+
+    Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule();
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
 /**
  * Gets the current thread
  */
 Thread* GetCurrentThread() {
-    return Core::System::GetInstance().Scheduler().GetCurrentThread();
+    return Core::System::GetInstance().CurrentScheduler().GetCurrentThread();
 }
 
 void ThreadingInit() {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index e0a3c093459556ff2b5978de32d0205161d52ccf..1d2da6d50a1f7c0bf59960fcd399514bb4e18da6 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <memory>
 #include <string>
 #include <unordered_map>
 #include <vector>
@@ -56,6 +57,7 @@ enum class ThreadWakeupReason {
 namespace Kernel {
 
 class Process;
+class Scheduler;
 
 class Thread final : public WaitObject {
 public:
@@ -118,6 +120,9 @@ public:
     /// Recalculates the current priority taking into account priority inheritance.
     void UpdatePriority();
 
+    /// Changes the core that the thread is running or scheduled to run on.
+    void ChangeCore(u32 core, u64 mask);
+
     /**
      * Gets the thread's thread ID
      * @return The thread's ID
@@ -240,6 +245,11 @@ public:
     // available. In case of a timeout, the object will be nullptr.
     std::function<WakeupCallback> wakeup_callback;
 
+    std::shared_ptr<Scheduler> scheduler;
+
+    u32 ideal_core{0xFFFFFFFF};
+    u64 affinity_mask{0x1};
+
 private:
     Thread();
     ~Thread() override;
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 2f0044c1130866c7629a82ea56a13e3e59f5732c..676e5b28231d2244ef7078443c1aadeff808c984 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -104,8 +104,15 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
     VirtualMemoryArea& final_vma = vma_handle->second;
     ASSERT(final_vma.size == size);
 
-    Core::CPU().MapBackingMemory(target, size, block->data() + offset,
-                                 VMAPermission::ReadWriteExecute);
+    auto& system = Core::System::GetInstance();
+    system.ArmInterface(0).MapBackingMemory(target, size, block->data() + offset,
+                                            VMAPermission::ReadWriteExecute);
+    system.ArmInterface(1).MapBackingMemory(target, size, block->data() + offset,
+                                            VMAPermission::ReadWriteExecute);
+    system.ArmInterface(2).MapBackingMemory(target, size, block->data() + offset,
+                                            VMAPermission::ReadWriteExecute);
+    system.ArmInterface(3).MapBackingMemory(target, size, block->data() + offset,
+                                            VMAPermission::ReadWriteExecute);
 
     final_vma.type = VMAType::AllocatedMemoryBlock;
     final_vma.permissions = VMAPermission::ReadWrite;
@@ -126,7 +133,11 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me
     VirtualMemoryArea& final_vma = vma_handle->second;
     ASSERT(final_vma.size == size);
 
-    Core::CPU().MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
+    auto& system = Core::System::GetInstance();
+    system.ArmInterface(0).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
+    system.ArmInterface(1).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
+    system.ArmInterface(2).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
+    system.ArmInterface(3).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
 
     final_vma.type = VMAType::BackingMemory;
     final_vma.permissions = VMAPermission::ReadWrite;
@@ -184,7 +195,11 @@ ResultCode VMManager::UnmapRange(VAddr target, u64 size) {
 
     ASSERT(FindVMA(target)->second.size >= size);
 
-    Core::CPU().UnmapMemory(target, size);
+    auto& system = Core::System::GetInstance();
+    system.ArmInterface(0).UnmapMemory(target, size);
+    system.ArmInterface(1).UnmapMemory(target, size);
+    system.ArmInterface(2).UnmapMemory(target, size);
+    system.ArmInterface(3).UnmapMemory(target, size);
 
     return RESULT_SUCCESS;
 }
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index db821146303080e5931eb0e2c78b353ebbdc98e5..3b81acd631c6720e94fa8756026bf51bcea964b1 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -28,8 +28,13 @@ static PageTable* current_page_table = nullptr;
 
 void SetCurrentPageTable(PageTable* page_table) {
     current_page_table = page_table;
-    if (Core::System::GetInstance().IsPoweredOn()) {
-        Core::CPU().PageTableChanged();
+
+    auto& system = Core::System::GetInstance();
+    if (system.IsPoweredOn()) {
+        system.ArmInterface(0).PageTableChanged();
+        system.ArmInterface(1).PageTableChanged();
+        system.ArmInterface(2).PageTableChanged();
+        system.ArmInterface(3).PageTableChanged();
     }
 }
 
diff --git a/src/core/settings.h b/src/core/settings.h
index cfec63c21056bbd5b739118fcf5c9cd0a1b3fe50..a7f1e5fa0d489fa1379de4f5ec0b38585e1a33f2 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -121,6 +121,7 @@ struct Values {
 
     // Core
     bool use_cpu_jit;
+    bool use_multi_core;
 
     // Data Storage
     bool use_virtual_sd;
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 02c52bb55d8097ca507335061c82500bf99f930b..a60aa1143b7119a3ceff7831160ff25b0d58ee08 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -155,6 +155,8 @@ TelemetrySession::TelemetrySession() {
 
     // Log user configuration information
     AddField(Telemetry::FieldType::UserConfig, "Core_UseCpuJit", Settings::values.use_cpu_jit);
+    AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore",
+             Settings::values.use_multi_core);
     AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor",
              Settings::values.resolution_factor);
     AddField(Telemetry::FieldType::UserConfig, "Renderer_ToggleFramelimit",
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 8843f207805b9fa50f99dcf52568ad7c2063a11c..8316db7082201c19a935ecacaaa6fd30e78c6e96 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -78,6 +78,7 @@ void Config::ReadValues() {
 
     qt_config->beginGroup("Core");
     Settings::values.use_cpu_jit = qt_config->value("use_cpu_jit", true).toBool();
+    Settings::values.use_multi_core = qt_config->value("use_multi_core", false).toBool();
     qt_config->endGroup();
 
     qt_config->beginGroup("Renderer");
@@ -177,6 +178,7 @@ void Config::SaveValues() {
 
     qt_config->beginGroup("Core");
     qt_config->setValue("use_cpu_jit", Settings::values.use_cpu_jit);
+    qt_config->setValue("use_multi_core", Settings::values.use_multi_core);
     qt_config->endGroup();
 
     qt_config->beginGroup("Renderer");
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp
index 2d73fc5aabb8c5bfaaf53f42eea7cad66801fde9..baa5586673d373c90763e4181b8f4df923428966 100644
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -20,6 +20,7 @@ ConfigureGeneral::ConfigureGeneral(QWidget* parent)
     this->setConfiguration();
 
     ui->use_cpu_jit->setEnabled(!Core::System::GetInstance().IsPoweredOn());
+    ui->use_multi_core->setEnabled(!Core::System::GetInstance().IsPoweredOn());
     ui->use_docked_mode->setEnabled(!Core::System::GetInstance().IsPoweredOn());
 }
 
@@ -30,6 +31,7 @@ void ConfigureGeneral::setConfiguration() {
     ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing);
     ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme));
     ui->use_cpu_jit->setChecked(Settings::values.use_cpu_jit);
+    ui->use_multi_core->setChecked(Settings::values.use_multi_core);
     ui->use_docked_mode->setChecked(Settings::values.use_docked_mode);
 }
 
@@ -40,6 +42,7 @@ void ConfigureGeneral::applyConfiguration() {
         ui->theme_combobox->itemData(ui->theme_combobox->currentIndex()).toString();
 
     Settings::values.use_cpu_jit = ui->use_cpu_jit->isChecked();
+    Settings::values.use_multi_core = ui->use_multi_core->isChecked();
     Settings::values.use_docked_mode = ui->use_docked_mode->isChecked();
     Settings::Apply();
 }
diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui
index 1775c4d40cfd09718181c275eeadfca695a6405f..233adbe27a5ba57ec057d18b1da6eef04ba00232 100644
--- a/src/yuzu/configuration/configure_general.ui
+++ b/src/yuzu/configuration/configure_general.ui
@@ -58,6 +58,13 @@
             </property>
            </widget>
           </item>
+          <item>
+           <widget class="QCheckBox" name="use_multi_core">
+            <property name="text">
+             <string>Enable multi-core</string>
+            </property>
+           </widget>
+          </item>
          </layout>
         </item>
        </layout>
diff --git a/src/yuzu/debugger/registers.cpp b/src/yuzu/debugger/registers.cpp
index 06e2d1647dbea88e941887a58c84e5246435bd31..178cc65a79d9c6335b1fe0788a70ead0c6b2e867 100644
--- a/src/yuzu/debugger/registers.cpp
+++ b/src/yuzu/debugger/registers.cpp
@@ -63,7 +63,7 @@ void RegistersWidget::OnDebugModeEntered() {
 
     for (int i = 0; i < core_registers->childCount(); ++i)
         core_registers->child(i)->setText(
-            1, QString("0x%1").arg(Core::CPU().GetReg(i), 8, 16, QLatin1Char('0')));
+            1, QString("0x%1").arg(Core::CurrentArmInterface().GetReg(i), 8, 16, QLatin1Char('0')));
 
     UpdateCPSRValues();
 }
@@ -122,7 +122,7 @@ void RegistersWidget::CreateCPSRChildren() {
 }
 
 void RegistersWidget::UpdateCPSRValues() {
-    const u32 cpsr_val = Core::CPU().GetCPSR();
+    const u32 cpsr_val = Core::CurrentArmInterface().GetCPSR();
 
     cpsr->setText(1, QString("0x%1").arg(cpsr_val, 8, 16, QLatin1Char('0')));
     cpsr->child(0)->setText(
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index acc4c2e0be219199157f444eea13667d5ecd8e72..8b074db5a4c9b63e421b44a34ee42f2e619f1967 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -51,13 +51,21 @@ std::size_t WaitTreeItem::Row() const {
 }
 
 std::vector<std::unique_ptr<WaitTreeThread>> WaitTreeItem::MakeThreadItemList() {
-    const auto& threads = Core::System::GetInstance().Scheduler().GetThreadList();
     std::vector<std::unique_ptr<WaitTreeThread>> item_list;
-    item_list.reserve(threads.size());
-    for (std::size_t i = 0; i < threads.size(); ++i) {
-        item_list.push_back(std::make_unique<WaitTreeThread>(*threads[i]));
-        item_list.back()->row = i;
-    }
+    std::size_t row = 0;
+    auto add_threads = [&](const std::vector<Kernel::SharedPtr<Kernel::Thread>>& threads) {
+        for (std::size_t i = 0; i < threads.size(); ++i) {
+            item_list.push_back(std::make_unique<WaitTreeThread>(*threads[i]));
+            item_list.back()->row = row;
+            ++row;
+        }
+    };
+
+    add_threads(Core::System::GetInstance().Scheduler(0)->GetThreadList());
+    add_threads(Core::System::GetInstance().Scheduler(1)->GetThreadList());
+    add_threads(Core::System::GetInstance().Scheduler(2)->GetThreadList());
+    add_threads(Core::System::GetInstance().Scheduler(3)->GetThreadList());
+
     return item_list;
 }
 
@@ -240,6 +248,9 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
     }
 
     list.push_back(std::make_unique<WaitTreeText>(tr("processor = %1").arg(processor)));
+    list.push_back(std::make_unique<WaitTreeText>(tr("ideal core = %1").arg(thread.ideal_core)));
+    list.push_back(
+        std::make_unique<WaitTreeText>(tr("affinity mask = %1").arg(thread.affinity_mask)));
     list.push_back(std::make_unique<WaitTreeText>(tr("thread id = %1").arg(thread.GetThreadId())));
     list.push_back(std::make_unique<WaitTreeText>(tr("priority = %1(current) / %2(normal)")
                                                       .arg(thread.current_priority)
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 675f9cafa8375b353f4913a6cd25610c09c36869..ee6e4d658fcdd4a33964177fbc2fa90d37f879f6 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -91,6 +91,7 @@ void Config::ReadValues() {
 
     // Core
     Settings::values.use_cpu_jit = sdl2_config->GetBoolean("Core", "use_cpu_jit", true);
+    Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);
 
     // Renderer
     Settings::values.resolution_factor =
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 02254403d827e544444b2d31b0dcc4f5eeb5ac48..1c438c3f5b66486e9a9a8940777775a3275c31c0 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -80,6 +80,10 @@ touch_device=
 # 0: Interpreter (slow), 1 (default): JIT (fast)
 use_cpu_jit =
 
+# Whether to use multi-core for CPU emulation
+# 0 (default): Disabled, 1: Enabled
+use_multi_core=
+
 [Renderer]
 # Whether to use software or hardware rendering.
 # 0: Software, 1 (default): Hardware