diff --git a/src/citra_qt/debugger/wait_tree.cpp b/src/citra_qt/debugger/wait_tree.cpp
index 1d2de5185fbe32f8e3020e5f81670cbc377e54eb..b6ecf381946a577016a76f3ef5e829003f3d7235 100644
--- a/src/citra_qt/debugger/wait_tree.cpp
+++ b/src/citra_qt/debugger/wait_tree.cpp
@@ -153,7 +153,8 @@ QString WaitTreeThread::GetText() const {
     case THREADSTATUS_WAIT_SLEEP:
         status = tr("sleeping");
         break;
-    case THREADSTATUS_WAIT_SYNCH:
+    case THREADSTATUS_WAIT_SYNCH_ALL:
+    case THREADSTATUS_WAIT_SYNCH_ANY:
         status = tr("waiting for objects");
         break;
     case THREADSTATUS_DORMANT:
@@ -180,7 +181,8 @@ QColor WaitTreeThread::GetColor() const {
         return QColor(Qt::GlobalColor::darkRed);
     case THREADSTATUS_WAIT_SLEEP:
         return QColor(Qt::GlobalColor::darkYellow);
-    case THREADSTATUS_WAIT_SYNCH:
+    case THREADSTATUS_WAIT_SYNCH_ALL:
+    case THREADSTATUS_WAIT_SYNCH_ANY:
         return QColor(Qt::GlobalColor::red);
     case THREADSTATUS_DORMANT:
         return QColor(Qt::GlobalColor::darkCyan);
@@ -228,7 +230,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
     } else {
         list.push_back(std::make_unique<WaitTreeMutexList>(thread.held_mutexes));
     }
-    if (thread.status == THREADSTATUS_WAIT_SYNCH) {
+    if (thread.status == THREADSTATUS_WAIT_SYNCH_ANY ||
+        thread.status == THREADSTATUS_WAIT_SYNCH_ALL) {
         list.push_back(std::make_unique<WaitTreeObjectList>(thread.wait_objects,
                                                             thread.IsSleepingOnWaitAll()));
     }
diff --git a/src/core/hle/kernel/event.cpp b/src/core/hle/kernel/event.cpp
index 3e116e3dfced073d420382e80592f5421fc4df67..e1f42af051a0f227f94a55f3b70b69c49e3ae84b 100644
--- a/src/core/hle/kernel/event.cpp
+++ b/src/core/hle/kernel/event.cpp
@@ -30,12 +30,12 @@ SharedPtr<Event> Event::Create(ResetType reset_type, std::string name) {
     return evt;
 }
 
-bool Event::ShouldWait() {
+bool Event::ShouldWait(Thread* thread) const {
     return !signaled;
 }
 
-void Event::Acquire() {
-    ASSERT_MSG(!ShouldWait(), "object unavailable!");
+void Event::Acquire(Thread* thread) {
+    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
 
     // Release the event if it's not sticky...
     if (reset_type != ResetType::Sticky)
diff --git a/src/core/hle/kernel/event.h b/src/core/hle/kernel/event.h
index 8dcd23edb8b922e0c6cbb1e3cf9e9f5c94f8695b..39452bf338b7551b0737085f1cd50d31dad0f196 100644
--- a/src/core/hle/kernel/event.h
+++ b/src/core/hle/kernel/event.h
@@ -35,8 +35,8 @@ public:
     bool signaled;    ///< Whether the event has already been signaled
     std::string name; ///< Name of event (optional)
 
-    bool ShouldWait() override;
-    void Acquire() override;
+    bool ShouldWait(Thread* thread) const override;
+    void Acquire(Thread* thread) override;
 
     void Signal();
     void Clear();
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 1db8e102f0c403870250128a49b06280cc954386..f599916f0d1a23173f776ac1d65b7b3477aa647a 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -3,7 +3,6 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
-#include <boost/range/algorithm_ext/erase.hpp>
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/hle/config_mem.h"
@@ -28,32 +27,39 @@ void WaitObject::AddWaitingThread(SharedPtr<Thread> thread) {
 
 void WaitObject::RemoveWaitingThread(Thread* thread) {
     auto itr = std::find(waiting_threads.begin(), waiting_threads.end(), thread);
+    // If a thread passed multiple handles to the same object,
+    // the kernel might attempt to remove the thread from the object's
+    // waiting threads list multiple times.
     if (itr != waiting_threads.end())
         waiting_threads.erase(itr);
 }
 
 SharedPtr<Thread> WaitObject::GetHighestPriorityReadyThread() {
-    // Remove the threads that are ready or already running from our waitlist
-    boost::range::remove_erase_if(waiting_threads, [](const SharedPtr<Thread>& thread) {
-        return thread->status == THREADSTATUS_RUNNING || thread->status == THREADSTATUS_READY ||
-               thread->status == THREADSTATUS_DEAD;
-    });
-
-    // TODO(Subv): This call should be performed inside the loop below to check if an object can be
-    // acquired by a particular thread. This is useful for things like recursive locking of Mutexes.
-    if (ShouldWait())
-        return nullptr;
-
     Thread* candidate = nullptr;
     s32 candidate_priority = THREADPRIO_LOWEST + 1;
 
     for (const auto& thread : waiting_threads) {
+        // The list of waiting threads must not contain threads that are not waiting to be awakened.
+        ASSERT_MSG(thread->status == THREADSTATUS_WAIT_SYNCH_ANY ||
+                       thread->status == THREADSTATUS_WAIT_SYNCH_ALL,
+                   "Inconsistent thread statuses in waiting_threads");
+
         if (thread->current_priority >= candidate_priority)
             continue;
 
-        bool ready_to_run =
-            std::none_of(thread->wait_objects.begin(), thread->wait_objects.end(),
-                         [](const SharedPtr<WaitObject>& object) { return object->ShouldWait(); });
+        if (ShouldWait(thread.get()))
+            continue;
+
+        // A thread is ready to run if it's either in THREADSTATUS_WAIT_SYNCH_ANY or
+        // in THREADSTATUS_WAIT_SYNCH_ALL and the rest of the objects it is waiting on are ready.
+        bool ready_to_run = true;
+        if (thread->status == THREADSTATUS_WAIT_SYNCH_ALL) {
+            ready_to_run = std::none_of(thread->wait_objects.begin(), thread->wait_objects.end(),
+                                        [&thread](const SharedPtr<WaitObject>& object) {
+                                            return object->ShouldWait(thread.get());
+                                        });
+        }
+
         if (ready_to_run) {
             candidate = thread.get();
             candidate_priority = thread->current_priority;
@@ -66,7 +72,7 @@ SharedPtr<Thread> WaitObject::GetHighestPriorityReadyThread() {
 void WaitObject::WakeupAllWaitingThreads() {
     while (auto thread = GetHighestPriorityReadyThread()) {
         if (!thread->IsSleepingOnWaitAll()) {
-            Acquire();
+            Acquire(thread.get());
             // Set the output index of the WaitSynchronizationN call to the index of this object.
             if (thread->wait_set_output) {
                 thread->SetWaitSynchronizationOutput(thread->GetWaitObjectIndex(this));
@@ -74,18 +80,17 @@ void WaitObject::WakeupAllWaitingThreads() {
             }
         } else {
             for (auto& object : thread->wait_objects) {
-                object->Acquire();
-                object->RemoveWaitingThread(thread.get());
+                object->Acquire(thread.get());
             }
             // Note: This case doesn't update the output index of WaitSynchronizationN.
-            // Clear the thread's waitlist
-            thread->wait_objects.clear();
         }
 
+        for (auto& object : thread->wait_objects)
+            object->RemoveWaitingThread(thread.get());
+        thread->wait_objects.clear();
+
         thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
         thread->ResumeFromWait();
-        // Note: Removing the thread from the object's waitlist will be
-        // done by GetHighestPriorityReadyThread.
     }
 }
 
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 9503e7d0445310af2f4716cec622af89f3b85fe0..05097824b5bf66b8873b346aff14733f24bca543 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -132,25 +132,26 @@ using SharedPtr = boost::intrusive_ptr<T>;
 class WaitObject : public Object {
 public:
     /**
-     * Check if the current thread should wait until the object is available
+     * Check if the specified thread should wait until the object is available
+     * @param thread The thread about which we're deciding.
      * @return True if the current thread should wait due to this object being unavailable
      */
-    virtual bool ShouldWait() = 0;
+    virtual bool ShouldWait(Thread* thread) const = 0;
 
-    /// Acquire/lock the object if it is available
-    virtual void Acquire() = 0;
+    /// Acquire/lock the object for the specified thread if it is available
+    virtual void Acquire(Thread* thread) = 0;
 
     /**
      * Add a thread to wait on this object
      * @param thread Pointer to thread to add
      */
-    void AddWaitingThread(SharedPtr<Thread> thread);
+    virtual void AddWaitingThread(SharedPtr<Thread> thread);
 
     /**
      * Removes a thread from waiting on this object (e.g. if it was resumed already)
      * @param thread Pointer to thread to remove
      */
-    void RemoveWaitingThread(Thread* thread);
+    virtual void RemoveWaitingThread(Thread* thread);
 
     /**
      * Wake up all threads waiting on this object that can be awoken, in priority order,
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 736944bae0acc5351d087874e4cd7a87c4f68d3f..cef961289917f5d57153fc5df9452d32b972513c 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -6,26 +6,18 @@
 #include <vector>
 #include <boost/range/algorithm_ext/erase.hpp>
 #include "common/assert.h"
+#include "core/core.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/mutex.h"
 #include "core/hle/kernel/thread.h"
 
 namespace Kernel {
 
-/**
- * Resumes a thread waiting for the specified mutex
- * @param mutex The mutex that some thread is waiting on
- */
-static void ResumeWaitingThread(Mutex* mutex) {
-    // Reset mutex lock thread handle, nothing is waiting
-    mutex->lock_count = 0;
-    mutex->holding_thread = nullptr;
-    mutex->WakeupAllWaitingThreads();
-}
-
 void ReleaseThreadMutexes(Thread* thread) {
     for (auto& mtx : thread->held_mutexes) {
-        ResumeWaitingThread(mtx.get());
+        mtx->lock_count = 0;
+        mtx->holding_thread = nullptr;
+        mtx->WakeupAllWaitingThreads();
     }
     thread->held_mutexes.clear();
 }
@@ -40,52 +32,74 @@ SharedPtr<Mutex> Mutex::Create(bool initial_locked, std::string name) {
     mutex->name = std::move(name);
     mutex->holding_thread = nullptr;
 
-    // Acquire mutex with current thread if initialized as locked...
+    // Acquire mutex with current thread if initialized as locked
     if (initial_locked)
-        mutex->Acquire();
+        mutex->Acquire(GetCurrentThread());
 
     return mutex;
 }
 
-bool Mutex::ShouldWait() {
-    auto thread = GetCurrentThread();
-    bool wait = lock_count > 0 && holding_thread != thread;
-
-    // If the holding thread of the mutex is lower priority than this thread, that thread should
-    // temporarily inherit this thread's priority
-    if (wait && thread->current_priority < holding_thread->current_priority)
-        holding_thread->BoostPriority(thread->current_priority);
-
-    return wait;
-}
-
-void Mutex::Acquire() {
-    Acquire(GetCurrentThread());
+bool Mutex::ShouldWait(Thread* thread) const {
+    return lock_count > 0 && thread != holding_thread;
 }
 
-void Mutex::Acquire(SharedPtr<Thread> thread) {
-    ASSERT_MSG(!ShouldWait(), "object unavailable!");
+void Mutex::Acquire(Thread* thread) {
+    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
 
-    // Actually "acquire" the mutex only if we don't already have it...
+    // Actually "acquire" the mutex only if we don't already have it
     if (lock_count == 0) {
+        priority = thread->current_priority;
         thread->held_mutexes.insert(this);
-        holding_thread = std::move(thread);
+        holding_thread = thread;
+        thread->UpdatePriority();
+        Core::System::GetInstance().PrepareReschedule();
     }
 
     lock_count++;
 }
 
 void Mutex::Release() {
-    // Only release if the mutex is held...
+    // Only release if the mutex is held
     if (lock_count > 0) {
         lock_count--;
 
-        // Yield to the next thread only if we've fully released the mutex...
+        // Yield to the next thread only if we've fully released the mutex
         if (lock_count == 0) {
             holding_thread->held_mutexes.erase(this);
-            ResumeWaitingThread(this);
+            holding_thread->UpdatePriority();
+            holding_thread = nullptr;
+            WakeupAllWaitingThreads();
+            Core::System::GetInstance().PrepareReschedule();
         }
     }
 }
 
+void Mutex::AddWaitingThread(SharedPtr<Thread> thread) {
+    WaitObject::AddWaitingThread(thread);
+    thread->pending_mutexes.insert(this);
+    UpdatePriority();
+}
+
+void Mutex::RemoveWaitingThread(Thread* thread) {
+    WaitObject::RemoveWaitingThread(thread);
+    thread->pending_mutexes.erase(this);
+    UpdatePriority();
+}
+
+void Mutex::UpdatePriority() {
+    if (!holding_thread)
+        return;
+
+    s32 best_priority = THREADPRIO_LOWEST;
+    for (auto& waiter : GetWaitingThreads()) {
+        if (waiter->current_priority < best_priority)
+            best_priority = waiter->current_priority;
+    }
+
+    if (best_priority != priority) {
+        priority = best_priority;
+        holding_thread->UpdatePriority();
+    }
+}
+
 } // namespace
diff --git a/src/core/hle/kernel/mutex.h b/src/core/hle/kernel/mutex.h
index 53c3dc1f1f373cc8107d3cc14114bf562e0b5dea..c57adf4002b0dbce1a8f52a88fd8b49016bbc810 100644
--- a/src/core/hle/kernel/mutex.h
+++ b/src/core/hle/kernel/mutex.h
@@ -35,17 +35,22 @@ public:
     }
 
     int lock_count;                   ///< Number of times the mutex has been acquired
+    u32 priority;                     ///< The priority of the mutex, used for priority inheritance.
     std::string name;                 ///< Name of mutex (optional)
     SharedPtr<Thread> holding_thread; ///< Thread that has acquired the mutex
 
-    bool ShouldWait() override;
-    void Acquire() override;
-
     /**
-     * Acquires the specified mutex for the specified thread
-     * @param thread Thread that will acquire the mutex
+     * Elevate the mutex priority to the best priority
+     * among the priorities of all its waiting threads.
      */
-    void Acquire(SharedPtr<Thread> thread);
+    void UpdatePriority();
+
+    bool ShouldWait(Thread* thread) const override;
+    void Acquire(Thread* thread) override;
+
+    void AddWaitingThread(SharedPtr<Thread> thread) override;
+    void RemoveWaitingThread(Thread* thread) override;
+
     void Release();
 
 private:
diff --git a/src/core/hle/kernel/semaphore.cpp b/src/core/hle/kernel/semaphore.cpp
index bf760078051760122a4e96939deeacbef09e7cbf..5e6139265dd1224a04505150c75c9066995e1994 100644
--- a/src/core/hle/kernel/semaphore.cpp
+++ b/src/core/hle/kernel/semaphore.cpp
@@ -30,12 +30,12 @@ ResultVal<SharedPtr<Semaphore>> Semaphore::Create(s32 initial_count, s32 max_cou
     return MakeResult<SharedPtr<Semaphore>>(std::move(semaphore));
 }
 
-bool Semaphore::ShouldWait() {
+bool Semaphore::ShouldWait(Thread* thread) const {
     return available_count <= 0;
 }
 
-void Semaphore::Acquire() {
-    ASSERT_MSG(!ShouldWait(), "object unavailable!");
+void Semaphore::Acquire(Thread* thread) {
+    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
     --available_count;
 }
 
diff --git a/src/core/hle/kernel/semaphore.h b/src/core/hle/kernel/semaphore.h
index e01908a2576edb14c97072694de64778dba195b1..cde94f7ccd20997cfcd60a5f86080e1ab5e4b317 100644
--- a/src/core/hle/kernel/semaphore.h
+++ b/src/core/hle/kernel/semaphore.h
@@ -39,8 +39,8 @@ public:
     s32 available_count; ///< Number of free slots left in the semaphore
     std::string name;    ///< Name of semaphore (optional)
 
-    bool ShouldWait() override;
-    void Acquire() override;
+    bool ShouldWait(Thread* thread) const override;
+    void Acquire(Thread* thread) override;
 
     /**
      * Releases a certain number of slots from a semaphore.
diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp
index 6c19aa7c09df582c990930176a18237f6de95abe..fd3bbbcad36ca2101c00ad6d5ead4c039c269ef0 100644
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -14,13 +14,13 @@ namespace Kernel {
 ServerPort::ServerPort() {}
 ServerPort::~ServerPort() {}
 
-bool ServerPort::ShouldWait() {
+bool ServerPort::ShouldWait(Thread* thread) const {
     // If there are no pending sessions, we wait until a new one is added.
     return pending_sessions.size() == 0;
 }
 
-void ServerPort::Acquire() {
-    ASSERT_MSG(!ShouldWait(), "object unavailable!");
+void ServerPort::Acquire(Thread* thread) {
+    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
 }
 
 std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> ServerPort::CreatePortPair(
diff --git a/src/core/hle/kernel/server_port.h b/src/core/hle/kernel/server_port.h
index b0f8df62c504b7297b1f36292358f540457c0f0a..6f8bdb6a900f3b172d11dd76666d91796cdcb631 100644
--- a/src/core/hle/kernel/server_port.h
+++ b/src/core/hle/kernel/server_port.h
@@ -53,8 +53,8 @@ public:
     /// ServerSessions created from this port inherit a reference to this handler.
     std::shared_ptr<Service::SessionRequestHandler> hle_handler;
 
-    bool ShouldWait() override;
-    void Acquire() override;
+    bool ShouldWait(Thread* thread) const override;
+    void Acquire(Thread* thread) override;
 
 private:
     ServerPort();
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index 146458c1ca21f223255a66340378054fa94a6cba..9447ff236a78845076ac8e2d9fdc80fc32eac3ed 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -29,12 +29,12 @@ ResultVal<SharedPtr<ServerSession>> ServerSession::Create(
     return MakeResult<SharedPtr<ServerSession>>(std::move(server_session));
 }
 
-bool ServerSession::ShouldWait() {
+bool ServerSession::ShouldWait(Thread* thread) const {
     return !signaled;
 }
 
-void ServerSession::Acquire() {
-    ASSERT_MSG(!ShouldWait(), "object unavailable!");
+void ServerSession::Acquire(Thread* thread) {
+    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
     signaled = false;
 }
 
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h
index 458284a5db60489d845d243224569ee6fa1154d6..c088b9a199d7caa1a167c8a500a17be166bdebf9 100644
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -57,9 +57,9 @@ public:
      */
     ResultCode HandleSyncRequest();
 
-    bool ShouldWait() override;
+    bool ShouldWait(Thread* thread) const override;
 
-    void Acquire() override;
+    void Acquire(Thread* thread) override;
 
     std::string name; ///< The name of this session (optional)
     bool signaled;    ///< Whether there's new data available to this ServerSession
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 5fb95dadaa5f7cca816b88ce41db7a81696198f9..9109bd10baa6fd20253d87a98a005b77e87a60dc 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -27,12 +27,12 @@ namespace Kernel {
 /// Event type for the thread wake up event
 static int ThreadWakeupEventType;
 
-bool Thread::ShouldWait() {
+bool Thread::ShouldWait(Thread* thread) const {
     return status != THREADSTATUS_DEAD;
 }
 
-void Thread::Acquire() {
-    ASSERT_MSG(!ShouldWait(), "object unavailable!");
+void Thread::Acquire(Thread* thread) {
+    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
 }
 
 // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future, allowing
@@ -72,7 +72,8 @@ Thread* GetCurrentThread() {
  * @return True if the thread is waiting, false otherwise
  */
 static bool CheckWait_WaitObject(const Thread* thread, WaitObject* wait_object) {
-    if (thread->status != THREADSTATUS_WAIT_SYNCH)
+    if (thread->status != THREADSTATUS_WAIT_SYNCH_ALL &&
+        thread->status != THREADSTATUS_WAIT_SYNCH_ANY)
         return false;
 
     auto itr = std::find(thread->wait_objects.begin(), thread->wait_objects.end(), wait_object);
@@ -90,9 +91,6 @@ static bool CheckWait_AddressArbiter(const Thread* thread, VAddr wait_address) {
 }
 
 void Thread::Stop() {
-    // Release all the mutexes that this thread holds
-    ReleaseThreadMutexes(this);
-
     // Cancel any outstanding wakeup events for this thread
     CoreTiming::UnscheduleEvent(ThreadWakeupEventType, callback_handle);
     wakeup_callback_handle_table.Close(callback_handle);
@@ -114,6 +112,9 @@ void Thread::Stop() {
     }
     wait_objects.clear();
 
+    // Release all the mutexes that this thread holds
+    ReleaseThreadMutexes(this);
+
     // Mark the TLS slot in the thread's page as free.
     u32 tls_page = (tls_address - Memory::TLS_AREA_VADDR) / Memory::PAGE_SIZE;
     u32 tls_slot =
@@ -199,8 +200,8 @@ static void SwitchContext(Thread* new_thread) {
 
     // Load context of new thread
     if (new_thread) {
-        DEBUG_ASSERT_MSG(new_thread->status == THREADSTATUS_READY,
-                         "Thread must be ready to become running.");
+        ASSERT_MSG(new_thread->status == THREADSTATUS_READY,
+                   "Thread must be ready to become running.");
 
         // Cancel any outstanding wakeup events for this thread
         CoreTiming::UnscheduleEvent(ThreadWakeupEventType, new_thread->callback_handle);
@@ -253,7 +254,7 @@ void WaitCurrentThread_WaitSynchronization(std::vector<SharedPtr<WaitObject>> wa
     Thread* thread = GetCurrentThread();
     thread->wait_set_output = wait_set_output;
     thread->wait_objects = std::move(wait_objects);
-    thread->status = THREADSTATUS_WAIT_SYNCH;
+    thread->status = THREADSTATUS_WAIT_SYNCH_ANY;
 }
 
 void WaitCurrentThread_ArbitrateAddress(VAddr wait_address) {
@@ -281,7 +282,8 @@ static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) {
         return;
     }
 
-    if (thread->status == THREADSTATUS_WAIT_SYNCH || thread->status == THREADSTATUS_WAIT_ARB) {
+    if (thread->status == THREADSTATUS_WAIT_SYNCH_ANY ||
+        thread->status == THREADSTATUS_WAIT_SYNCH_ALL || thread->status == THREADSTATUS_WAIT_ARB) {
         thread->wait_set_output = false;
         // Remove the thread from each of its waiting objects' waitlists
         for (auto& object : thread->wait_objects)
@@ -305,8 +307,11 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {
 }
 
 void Thread::ResumeFromWait() {
+    ASSERT_MSG(wait_objects.empty(), "Thread is waking up while waiting for objects");
+
     switch (status) {
-    case THREADSTATUS_WAIT_SYNCH:
+    case THREADSTATUS_WAIT_SYNCH_ALL:
+    case THREADSTATUS_WAIT_SYNCH_ANY:
     case THREADSTATUS_WAIT_ARB:
     case THREADSTATUS_WAIT_SLEEP:
         break;
@@ -515,8 +520,21 @@ void Thread::SetPriority(s32 priority) {
     nominal_priority = current_priority = priority;
 }
 
+void Thread::UpdatePriority() {
+    s32 best_priority = nominal_priority;
+    for (auto& mutex : held_mutexes) {
+        if (mutex->priority < best_priority)
+            best_priority = mutex->priority;
+    }
+    BoostPriority(best_priority);
+}
+
 void Thread::BoostPriority(s32 priority) {
-    ready_queue.move(this, current_priority, priority);
+    // If thread was ready, adjust queues
+    if (status == THREADSTATUS_READY)
+        ready_queue.move(this, current_priority, priority);
+    else
+        ready_queue.prepare(priority);
     current_priority = priority;
 }
 
@@ -563,6 +581,12 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
     context.cpu_registers[1] = output;
 }
 
+s32 Thread::GetWaitObjectIndex(WaitObject* object) const {
+    ASSERT_MSG(!wait_objects.empty(), "Thread is not waiting for anything");
+    auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object);
+    return std::distance(match, wait_objects.rend()) - 1;
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
 void ThreadingInit() {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index c77ac644d504c28f04e28078339a89d0f41129cf..af72b76eab2847b3f01821cb500a865f0fc2ad0b 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -31,13 +31,14 @@ enum ThreadProcessorId : s32 {
 };
 
 enum ThreadStatus {
-    THREADSTATUS_RUNNING,    ///< Currently running
-    THREADSTATUS_READY,      ///< Ready to run
-    THREADSTATUS_WAIT_ARB,   ///< Waiting on an address arbiter
-    THREADSTATUS_WAIT_SLEEP, ///< Waiting due to a SleepThread SVC
-    THREADSTATUS_WAIT_SYNCH, ///< Waiting due to a WaitSynchronization SVC
-    THREADSTATUS_DORMANT,    ///< Created but not yet made ready
-    THREADSTATUS_DEAD        ///< Run to completion, or forcefully terminated
+    THREADSTATUS_RUNNING,        ///< Currently running
+    THREADSTATUS_READY,          ///< Ready to run
+    THREADSTATUS_WAIT_ARB,       ///< Waiting on an address arbiter
+    THREADSTATUS_WAIT_SLEEP,     ///< Waiting due to a SleepThread SVC
+    THREADSTATUS_WAIT_SYNCH_ANY, ///< Waiting due to WaitSynch1 or WaitSynchN with wait_all = false
+    THREADSTATUS_WAIT_SYNCH_ALL, ///< Waiting due to WaitSynchronizationN with wait_all = true
+    THREADSTATUS_DORMANT,        ///< Created but not yet made ready
+    THREADSTATUS_DEAD            ///< Run to completion, or forcefully terminated
 };
 
 namespace Kernel {
@@ -72,8 +73,8 @@ public:
         return HANDLE_TYPE;
     }
 
-    bool ShouldWait() override;
-    void Acquire() override;
+    bool ShouldWait(Thread* thread) const override;
+    void Acquire(Thread* thread) override;
 
     /**
      * Gets the thread's current priority
@@ -89,6 +90,12 @@ public:
      */
     void SetPriority(s32 priority);
 
+    /**
+     * Boost's a thread's priority to the best priority among the thread's held mutexes.
+     * This prevents priority inversion via priority inheritance.
+     */
+    void UpdatePriority();
+
     /**
      * Temporarily boosts the thread's priority until the next time it is scheduled
      * @param priority The new priority
@@ -128,13 +135,14 @@ public:
 
     /**
      * Retrieves the index that this particular object occupies in the list of objects
-     * that the thread passed to WaitSynchronizationN.
+     * that the thread passed to WaitSynchronizationN, starting the search from the last element.
      * It is used to set the output value of WaitSynchronizationN when the thread is awakened.
+     * When a thread wakes up due to an object signal, the kernel will use the index of the last
+     * matching object in the wait objects list in case of having multiple instances of the same
+     * object in the list.
      * @param object Object to query the index of.
      */
-    s32 GetWaitObjectIndex(const WaitObject* object) const {
-        return wait_objects_index.at(object->GetObjectId());
-    }
+    s32 GetWaitObjectIndex(WaitObject* object) const;
 
     /**
      * Stops a thread, invalidating it from further use
@@ -152,10 +160,10 @@ public:
     /**
      * Returns whether this thread is waiting for all the objects in
      * its wait list to become ready, as a result of a WaitSynchronizationN call
-     * with wait_all = true, or a ReplyAndReceive call.
+     * with wait_all = true.
      */
     bool IsSleepingOnWaitAll() const {
-        return !wait_objects.empty();
+        return status == THREADSTATUS_WAIT_SYNCH_ALL;
     }
 
     ARM_Interface::ThreadContext context;
@@ -178,15 +186,15 @@ public:
     /// Mutexes currently held by this thread, which will be released when it exits.
     boost::container::flat_set<SharedPtr<Mutex>> held_mutexes;
 
+    /// Mutexes that this thread is currently waiting for.
+    boost::container::flat_set<SharedPtr<Mutex>> pending_mutexes;
+
     SharedPtr<Process> owner_process; ///< Process that owns this thread
 
-    /// Objects that the thread is waiting on.
-    /// This is only populated when the thread should wait for all the objects to become ready.
+    /// Objects that the thread is waiting on, in the same order as they were
+    // passed to WaitSynchronization1/N.
     std::vector<SharedPtr<WaitObject>> wait_objects;
 
-    /// Mapping of Object ids to their position in the last waitlist that this object waited on.
-    boost::container::flat_map<int, s32> wait_objects_index;
-
     VAddr wait_address; ///< If waiting on an AddressArbiter, this is the arbitration address
 
     /// True if the WaitSynchronizationN output parameter should be set on thread wakeup.
diff --git a/src/core/hle/kernel/timer.cpp b/src/core/hle/kernel/timer.cpp
index b50cf520dffb2d5848f8fb8cc7d90342f1d57983..8f2bc4c7f1c5c555aa762613782218c5b25fa3cc 100644
--- a/src/core/hle/kernel/timer.cpp
+++ b/src/core/hle/kernel/timer.cpp
@@ -39,12 +39,12 @@ SharedPtr<Timer> Timer::Create(ResetType reset_type, std::string name) {
     return timer;
 }
 
-bool Timer::ShouldWait() {
+bool Timer::ShouldWait(Thread* thread) const {
     return !signaled;
 }
 
-void Timer::Acquire() {
-    ASSERT_MSG(!ShouldWait(), "object unavailable!");
+void Timer::Acquire(Thread* thread) {
+    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
 
     if (reset_type == ResetType::OneShot)
         signaled = false;
diff --git a/src/core/hle/kernel/timer.h b/src/core/hle/kernel/timer.h
index 18ea0236b70ed2ef1e596dc13623052b7f55798c..2e3b31b23d4f78f337fc94e39d1b68c86f680b07 100644
--- a/src/core/hle/kernel/timer.h
+++ b/src/core/hle/kernel/timer.h
@@ -39,8 +39,8 @@ public:
     u64 initial_delay;  ///< The delay until the timer fires for the first time
     u64 interval_delay; ///< The delay until the timer fires after the first time
 
-    bool ShouldWait() override;
-    void Acquire() override;
+    bool ShouldWait(Thread* thread) const override;
+    void Acquire(Thread* thread) override;
 
     /**
      * Starts the timer, with the specified initial delay and interval.
diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp
index 2ca270de3dfb57fa9a6ff77a6062f02b531b4e54..855f3af82211dabde875f1b9fb8af7b0343572b4 100644
--- a/src/core/hle/svc.cpp
+++ b/src/core/hle/svc.cpp
@@ -248,6 +248,8 @@ static ResultCode SendSyncRequest(Kernel::Handle handle) {
 
     LOG_TRACE(Kernel_SVC, "called handle=0x%08X(%s)", handle, session->GetName().c_str());
 
+    Core::System::GetInstance().PrepareReschedule();
+
     // TODO(Subv): svcSendSyncRequest should put the caller thread to sleep while the server
     // responds and cause a reschedule.
     return session->SendSyncRequest();
@@ -270,27 +272,27 @@ static ResultCode WaitSynchronization1(Kernel::Handle handle, s64 nano_seconds)
     LOG_TRACE(Kernel_SVC, "called handle=0x%08X(%s:%s), nanoseconds=%lld", handle,
               object->GetTypeName().c_str(), object->GetName().c_str(), nano_seconds);
 
-    if (object->ShouldWait()) {
+    if (object->ShouldWait(thread)) {
 
         if (nano_seconds == 0)
             return ERR_SYNC_TIMEOUT;
 
+        thread->wait_objects = {object};
         object->AddWaitingThread(thread);
-        // TODO(Subv): Perform things like update the mutex lock owner's priority to
-        // prevent priority inversion. Currently this is done in Mutex::ShouldWait,
-        // but it should be moved to a function that is called from here.
-        thread->status = THREADSTATUS_WAIT_SYNCH;
+        thread->status = THREADSTATUS_WAIT_SYNCH_ANY;
 
         // Create an event to wake the thread up after the specified nanosecond delay has passed
         thread->WakeAfterDelay(nano_seconds);
 
+        Core::System::GetInstance().PrepareReschedule();
+
         // Note: The output of this SVC will be set to RESULT_SUCCESS if the thread
         // resumes due to a signal in its wait objects.
         // Otherwise we retain the default value of timeout.
         return ERR_SYNC_TIMEOUT;
     }
 
-    object->Acquire();
+    object->Acquire(thread);
 
     return RESULT_SUCCESS;
 }
@@ -324,19 +326,14 @@ static ResultCode WaitSynchronizationN(s32* out, Kernel::Handle* handles, s32 ha
         objects[i] = object;
     }
 
-    // Clear the mapping of wait object indices.
-    // We don't want any lingering state in this map.
-    // It will be repopulated later in the wait_all = false case.
-    thread->wait_objects_index.clear();
-
     if (wait_all) {
         bool all_available =
             std::all_of(objects.begin(), objects.end(),
-                        [](const ObjectPtr& object) { return !object->ShouldWait(); });
+                        [thread](const ObjectPtr& object) { return !object->ShouldWait(thread); });
         if (all_available) {
             // We can acquire all objects right now, do so.
             for (auto& object : objects)
-                object->Acquire();
+                object->Acquire(thread);
             // Note: In this case, the `out` parameter is not set,
             // and retains whatever value it had before.
             return RESULT_SUCCESS;
@@ -350,22 +347,20 @@ static ResultCode WaitSynchronizationN(s32* out, Kernel::Handle* handles, s32 ha
             return ERR_SYNC_TIMEOUT;
 
         // Put the thread to sleep
-        thread->status = THREADSTATUS_WAIT_SYNCH;
+        thread->status = THREADSTATUS_WAIT_SYNCH_ALL;
 
         // Add the thread to each of the objects' waiting threads.
         for (auto& object : objects) {
             object->AddWaitingThread(thread);
-            // TODO(Subv): Perform things like update the mutex lock owner's priority to
-            // prevent priority inversion. Currently this is done in Mutex::ShouldWait,
-            // but it should be moved to a function that is called from here.
         }
 
-        // Set the thread's waitlist to the list of objects passed to WaitSynchronizationN
         thread->wait_objects = std::move(objects);
 
         // Create an event to wake the thread up after the specified nanosecond delay has passed
         thread->WakeAfterDelay(nano_seconds);
 
+        Core::System::GetInstance().PrepareReschedule();
+
         // This value gets set to -1 by default in this case, it is not modified after this.
         *out = -1;
         // Note: The output of this SVC will be set to RESULT_SUCCESS if the thread resumes due to
@@ -373,13 +368,14 @@ static ResultCode WaitSynchronizationN(s32* out, Kernel::Handle* handles, s32 ha
         return ERR_SYNC_TIMEOUT;
     } else {
         // Find the first object that is acquirable in the provided list of objects
-        auto itr = std::find_if(objects.begin(), objects.end(),
-                                [](const ObjectPtr& object) { return !object->ShouldWait(); });
+        auto itr = std::find_if(objects.begin(), objects.end(), [thread](const ObjectPtr& object) {
+            return !object->ShouldWait(thread);
+        });
 
         if (itr != objects.end()) {
             // We found a ready object, acquire it and set the result value
             Kernel::WaitObject* object = itr->get();
-            object->Acquire();
+            object->Acquire(thread);
             *out = std::distance(objects.begin(), itr);
             return RESULT_SUCCESS;
         }
@@ -392,28 +388,24 @@ static ResultCode WaitSynchronizationN(s32* out, Kernel::Handle* handles, s32 ha
             return ERR_SYNC_TIMEOUT;
 
         // Put the thread to sleep
-        thread->status = THREADSTATUS_WAIT_SYNCH;
-
-        // Clear the thread's waitlist, we won't use it for wait_all = false
-        thread->wait_objects.clear();
+        thread->status = THREADSTATUS_WAIT_SYNCH_ANY;
 
         // Add the thread to each of the objects' waiting threads.
         for (size_t i = 0; i < objects.size(); ++i) {
             Kernel::WaitObject* object = objects[i].get();
-            // Set the index of this object in the mapping of Objects -> index for this thread.
-            thread->wait_objects_index[object->GetObjectId()] = static_cast<int>(i);
             object->AddWaitingThread(thread);
-            // TODO(Subv): Perform things like update the mutex lock owner's priority to
-            // prevent priority inversion. Currently this is done in Mutex::ShouldWait,
-            // but it should be moved to a function that is called from here.
         }
 
+        thread->wait_objects = std::move(objects);
+
         // Note: If no handles and no timeout were given, then the thread will deadlock, this is
         // consistent with hardware behavior.
 
         // Create an event to wake the thread up after the specified nanosecond delay has passed
         thread->WakeAfterDelay(nano_seconds);
 
+        Core::System::GetInstance().PrepareReschedule();
+
         // Note: The output of this SVC will be set to RESULT_SUCCESS if the thread resumes due to a
         // signal in one of its wait objects.
         // Otherwise we retain the default value of timeout, and -1 in the out parameter
@@ -448,6 +440,9 @@ static ResultCode ArbitrateAddress(Kernel::Handle handle, u32 address, u32 type,
     auto res = arbiter->ArbitrateAddress(static_cast<Kernel::ArbitrationType>(type), address, value,
                                          nanoseconds);
 
+    // TODO(Subv): Identify in which specific cases this call should cause a reschedule.
+    Core::System::GetInstance().PrepareReschedule();
+
     return res;
 }
 
@@ -574,6 +569,8 @@ static ResultCode CreateThread(Kernel::Handle* out_handle, s32 priority, u32 ent
 
     CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(thread)));
 
+    Core::System::GetInstance().PrepareReschedule();
+
     LOG_TRACE(Kernel_SVC, "called entrypoint=0x%08X (%s), arg=0x%08X, stacktop=0x%08X, "
                           "threadpriority=0x%08X, processorid=0x%08X : created handle=0x%08X",
               entry_point, name.c_str(), arg, stack_top, priority, processor_id, *out_handle);
@@ -586,6 +583,7 @@ static void ExitThread() {
     LOG_TRACE(Kernel_SVC, "called, pc=0x%08X", Core::CPU().GetPC());
 
     Kernel::ExitCurrentThread();
+    Core::System::GetInstance().PrepareReschedule();
 }
 
 /// Gets the priority for the specified thread
@@ -605,6 +603,13 @@ static ResultCode SetThreadPriority(Kernel::Handle handle, s32 priority) {
         return ERR_INVALID_HANDLE;
 
     thread->SetPriority(priority);
+    thread->UpdatePriority();
+
+    // Update the mutexes that this thread is waiting for
+    for (auto& mutex : thread->pending_mutexes)
+        mutex->UpdatePriority();
+
+    Core::System::GetInstance().PrepareReschedule();
     return RESULT_SUCCESS;
 }
 
@@ -849,6 +854,8 @@ static void SleepThread(s64 nanoseconds) {
 
     // Create an event to wake the thread up after the specified nanosecond delay has passed
     Kernel::GetCurrentThread()->WakeAfterDelay(nanoseconds);
+
+    Core::System::GetInstance().PrepareReschedule();
 }
 
 /// This returns the total CPU ticks elapsed since the CPU was powered-on
@@ -1184,8 +1191,6 @@ void CallSVC(u32 immediate) {
     if (info) {
         if (info->func) {
             info->func();
-            //  TODO(Subv): Not all service functions should cause a reschedule in all cases.
-            Core::System::GetInstance().PrepareReschedule();
         } else {
             LOG_ERROR(Kernel_SVC, "unimplemented SVC function %s(..)", info->name);
         }