diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp
index ab97c8d2d911a418b721692d26e9242422b9af86..5186d2b448f869fdcddb104c099532646ee6bd97 100644
--- a/src/citra_qt/debugger/graphics_cmdlists.cpp
+++ b/src/citra_qt/debugger/graphics_cmdlists.cpp
@@ -21,6 +21,7 @@
 #include "common/vector_math.h"
 
 #include "video_core/pica.h"
+#include "video_core/pica_state.h"
 #include "video_core/debug_utils/debug_utils.h"
 
 QImage LoadTexture(u8* src, const Pica::DebugUtils::TextureInfo& info) {
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
index 80b32eaff27b450218a2cd853e648fbb81aadce0..c30e75933aa04f78efefd59f886d967f81dd4988 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.cpp
+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
@@ -18,6 +18,7 @@
 #include "core/hw/gpu.h"
 
 #include "video_core/pica.h"
+#include "video_core/pica_state.h"
 #include "video_core/utils.h"
 
 GraphicsFramebufferWidget::GraphicsFramebufferWidget(std::shared_ptr<Pica::DebugContext> debug_context,
diff --git a/src/citra_qt/debugger/graphics_tracing.cpp b/src/citra_qt/debugger/graphics_tracing.cpp
index b0bc782dfa2c4fe99d58771ec63026b5f1ae29d0..e064987442af1e01501891bd20537acd951832c4 100644
--- a/src/citra_qt/debugger/graphics_tracing.cpp
+++ b/src/citra_qt/debugger/graphics_tracing.cpp
@@ -22,7 +22,7 @@
 #include "nihstro/float24.h"
 
 #include "video_core/pica.h"
-
+#include "video_core/pica_state.h"
 
 GraphicsTracingWidget::GraphicsTracingWidget(std::shared_ptr<Pica::DebugContext> debug_context,
                                              QWidget* parent)
diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp
index a5a5fe6b0389a56e76ff86d901a2d1984e752735..a11c61667524b264af75b00548576d7c336d147e 100644
--- a/src/citra_qt/debugger/graphics_vertex_shader.cpp
+++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp
@@ -19,6 +19,8 @@
 #include "citra_qt/debugger/graphics_vertex_shader.h"
 #include "citra_qt/util/util.h"
 
+#include "video_core/pica.h"
+#include "video_core/pica_state.h"
 #include "video_core/shader/shader.h"
 
 using nihstro::OpCode;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 4b5d298f365905aefe209afa2d07f38350f6c05d..76cfd4f7de79d3ee65ac40a328fbf983b7260c4b 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -33,6 +33,7 @@ set(HEADERS
             command_processor.h
             gpu_debugger.h
             pica.h
+            pica_state.h
             pica_types.h
             primitive_assembly.h
             rasterizer.h
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index a385589d2d9589d3831f32b7e7ed03b049d99349..3d503486ea9167175170ee8944bedab28b3b32cd 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -6,6 +6,7 @@
 
 #include "video_core/clipper.h"
 #include "video_core/pica.h"
+#include "video_core/pica_state.h"
 #include "video_core/rasterizer.h"
 #include "video_core/shader/shader_interpreter.h"
 
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 73fdfbe9c4db12dd3ffc5e8b7032d1386f6b6125..2f92be66119406b79467487ba2f3ce1118bf04ee 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -15,6 +15,7 @@
 #include "video_core/clipper.h"
 #include "video_core/command_processor.h"
 #include "video_core/pica.h"
+#include "video_core/pica_state.h"
 #include "video_core/primitive_assembly.h"
 #include "video_core/renderer_base.h"
 #include "video_core/video_core.h"
@@ -73,6 +74,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
             GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D);
             break;
 
+        case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.index, 0x232):
+            if (regs.vs_default_attributes_setup.index == 15) {
+                // Reset immediate primitive state
+                g_state.immediate.primitive_assembler.Reconfigure(regs.triangle_topology);
+                g_state.immediate.attribute_id = 0;
+            }
+            break;
+
         // Load default vertex input attributes
         case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233):
         case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234):
@@ -108,11 +117,48 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
                           attribute.w.ToFloat32());
 
                 // TODO: Verify that this actually modifies the register!
-                setup.index = setup.index + 1;
+                if (setup.index < 15) {
+                    setup.index++;
+                } else {
+                    // Put each attribute into an immediate input buffer.
+                    // When all specified immediate attributes are present, the Vertex Shader is invoked and everything is
+                    // sent to the primitive assembler.
+
+                    auto& immediate_input = g_state.immediate.input;
+                    auto& immediate_attribute_id = g_state.immediate.attribute_id;
+                    const auto& attribute_config = regs.vertex_attributes;
+
+                    immediate_input.attr[immediate_attribute_id++] = attribute;
+
+                    if (immediate_attribute_id >= attribute_config.GetNumTotalAttributes()) {
+                        immediate_attribute_id = 0;
+
+                        Shader::UnitState<false> shader_unit;
+                        Shader::Setup(shader_unit);
+
+                        // Send to vertex shader
+                        Shader::OutputVertex output = Shader::Run(shader_unit, immediate_input, attribute_config.GetNumTotalAttributes());
+
+                        // Send to renderer
+                        using Pica::Shader::OutputVertex;
+                        auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) {
+                            VideoCore::g_renderer->rasterizer->AddTriangle(v0, v1, v2);
+                        };
+
+                        g_state.immediate.primitive_assembler.SubmitVertex(output, AddTriangle);
+                    }
+                }
             }
             break;
         }
 
+        case PICA_REG_INDEX(gpu_mode):
+            if (regs.gpu_mode == Regs::GPUMode::Configuring && regs.vs_default_attributes_setup.index == 15) {
+                // Draw immediate mode triangles when GPU Mode is set to GPUMode::Configuring
+                VideoCore::g_renderer->rasterizer->DrawTriangles();
+            }
+            break;
+
         case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c):
         case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d):
         {
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 6e6fd7335f14b14bfa3ddb35b4a6123f2ce19bb8..6e21caa782b694862d6f888659271be8c3a19e19 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -28,6 +28,7 @@
 #include "core/settings.h"
 
 #include "video_core/pica.h"
+#include "video_core/pica_state.h"
 #include "video_core/renderer_base.h"
 #include "video_core/utils.h"
 #include "video_core/video_core.h"
@@ -113,7 +114,7 @@ void GeometryDumper::Dump() {
 }
 
 
-void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes)
+void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, const Shader::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes)
 {
     struct StuffToWrite {
         u8* pointer;
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index 85762f5b487cbeccb2f53ef6736accbe6f4ad696..795160a32737e0100ba49e7825b8959152086e8d 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -17,6 +17,7 @@
 #include "core/tracer/recorder.h"
 
 #include "video_core/pica.h"
+#include "video_core/shader/shader.h"
 
 namespace Pica {
 
@@ -182,7 +183,7 @@ private:
 };
 
 void DumpShader(const std::string& filename, const Regs::ShaderConfig& config,
-                const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes);
+                const Shader::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes);
 
 
 // Utility class to log Pica commands.
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp
index d7b31384a0a002fad3137e60c3e2418f3822103f..32ad72674a4ebf2c658ed9b52ce873e48ee1adb6 100644
--- a/src/video_core/pica.cpp
+++ b/src/video_core/pica.cpp
@@ -6,6 +6,7 @@
 #include <unordered_map>
 
 #include "video_core/pica.h"
+#include "video_core/pica_state.h"
 #include "video_core/shader/shader.h"
 
 namespace Pica {
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 9077b1725c967732cc68408e84805a34601727ca..2e0c33201628e1730e455f3ef12f2265acfe82a6 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -1089,7 +1089,16 @@ struct Regs {
         }
     } command_buffer;
 
-    INSERT_PADDING_WORDS(0x20);
+    INSERT_PADDING_WORDS(0x07);
+
+    enum class GPUMode : u32 {
+        Drawing = 0,
+        Configuring = 1
+    };
+
+    GPUMode gpu_mode;
+
+    INSERT_PADDING_WORDS(0x18);
 
     enum class TriangleTopology : u32 {
         List   = 0,
@@ -1278,6 +1287,7 @@ ASSERT_REG_POSITION(trigger_draw, 0x22e);
 ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
 ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232);
 ASSERT_REG_POSITION(command_buffer, 0x238);
+ASSERT_REG_POSITION(gpu_mode, 0x245);
 ASSERT_REG_POSITION(triangle_topology, 0x25e);
 ASSERT_REG_POSITION(restart_primitive, 0x25f);
 ASSERT_REG_POSITION(gs, 0x280);
@@ -1292,64 +1302,10 @@ static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig st
 static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be");
 static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be");
 
-/// Struct used to describe current Pica state
-struct State {
-    /// Pica registers
-    Regs regs;
-
-    /// Vertex shader memory
-    struct ShaderSetup {
-        struct {
-            // The float uniforms are accessed by the shader JIT using SSE instructions, and are
-            // therefore required to be 16-byte aligned.
-            Math::Vec4<float24> MEMORY_ALIGNED16(f[96]);
-
-            std::array<bool, 16> b;
-            std::array<Math::Vec4<u8>, 4> i;
-        } uniforms;
-
-        Math::Vec4<float24> default_attributes[16];
-
-        std::array<u32, 1024> program_code;
-        std::array<u32, 1024> swizzle_data;
-    };
-
-    ShaderSetup vs;
-    ShaderSetup gs;
-
-    struct {
-        union LutEntry {
-            // Used for raw access
-            u32 raw;
-
-            // LUT value, encoded as 12-bit fixed point, with 12 fraction bits
-            BitField< 0, 12, u32> value;
-
-            // Used by HW for efficient interpolation, Citra does not use these
-            BitField<12, 12, u32> difference;
-
-            float ToFloat() {
-                return static_cast<float>(value) / 4095.f;
-            }
-        };
-
-        std::array<std::array<LutEntry, 256>, 24> luts;
-    } lighting;
-
-    /// Current Pica command list
-    struct {
-        const u32* head_ptr;
-        const u32* current_ptr;
-        u32 length;
-    } cmd_list;
-};
-
 /// Initialize Pica state
 void Init();
 
 /// Shutdown Pica state
 void Shutdown();
 
-extern State g_state; ///< Current Pica state
-
 } // namespace
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h
new file mode 100644
index 0000000000000000000000000000000000000000..c7616bc551c1d77450887006bed01e2d3f7e805c
--- /dev/null
+++ b/src/video_core/pica_state.h
@@ -0,0 +1,60 @@
+// Copyright 2016 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/pica.h"
+#include "video_core/primitive_assembly.h"
+#include "video_core/shader/shader.h"
+
+namespace Pica {
+
+/// Struct used to describe current Pica state
+struct State {
+    /// Pica registers
+    Regs regs;
+
+    Shader::ShaderSetup vs;
+    Shader::ShaderSetup gs;
+
+    struct {
+        union LutEntry {
+            // Used for raw access
+            u32 raw;
+
+            // LUT value, encoded as 12-bit fixed point, with 12 fraction bits
+            BitField< 0, 12, u32> value;
+
+            // Used by HW for efficient interpolation, Citra does not use these
+            BitField<12, 12, u32> difference;
+
+            float ToFloat() {
+                return static_cast<float>(value) / 4095.f;
+            }
+        };
+
+        std::array<std::array<LutEntry, 256>, 24> luts;
+    } lighting;
+
+    /// Current Pica command list
+    struct {
+        const u32* head_ptr;
+        const u32* current_ptr;
+        u32 length;
+    } cmd_list;
+
+    /// Struct used to describe immediate mode rendering state
+    struct ImmediateModeState {
+        Shader::InputVertex input;
+        // This is constructed with a dummy triangle topology
+        PrimitiveAssembler<Shader::OutputVertex> primitive_assembler;
+        int attribute_id = 0;
+
+        ImmediateModeState() : primitive_assembler(Regs::TriangleTopology::List) {}
+    } immediate;
+};
+
+extern State g_state; ///< Current Pica state
+
+} // namespace
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp
index d5a0a96a4f9b1f39406fd1dd0a014c78ece08b81..0061690f1324620f00acd2313aa8a009b82b7498 100644
--- a/src/video_core/primitive_assembly.cpp
+++ b/src/video_core/primitive_assembly.cpp
@@ -53,6 +53,18 @@ void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandl
     }
 }
 
+template<typename VertexType>
+void PrimitiveAssembler<VertexType>::Reset() {
+    buffer_index = 0;
+    strip_ready = false;
+}
+
+template<typename VertexType>
+void PrimitiveAssembler<VertexType>::Reconfigure(Regs::TriangleTopology topology) {
+    Reset();
+    this->topology = topology;
+}
+
 // explicitly instantiate use cases
 template
 struct PrimitiveAssembler<Shader::OutputVertex>;
diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h
index 52d0ec8ff4db3520c69711f845216f474716c7b2..cc6e5fde54a3432104ebb4423e365725ce92be82 100644
--- a/src/video_core/primitive_assembly.h
+++ b/src/video_core/primitive_assembly.h
@@ -30,6 +30,16 @@ struct PrimitiveAssembler {
      */
     void SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler);
 
+    /**
+     * Resets the internal state of the PrimitiveAssembler.
+     */
+    void Reset();
+
+    /**
+     * Reconfigures the PrimitiveAssembler to use a different triangle topology.
+     */
+    void Reconfigure(Regs::TriangleTopology topology);
+
 private:
     Regs::TriangleTopology topology;
 
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index ecfdbc9e851cba5bda0756ce582e4876bd817187..dd1604a38e770594ca0a7b55e69a90b798af0b4d 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -15,6 +15,7 @@
 #include "core/hw/gpu.h"
 
 #include "video_core/pica.h"
+#include "video_core/pica_state.h"
 #include "video_core/rasterizer.h"
 #include "video_core/utils.h"
 #include "video_core/debug_utils/debug_utils.h"
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index b7d19bf94363ba908ab03c27694cd06242b64a5f..0f864b617e69ca9b7af8d013291dcc9d2cba7ed7 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -19,6 +19,7 @@
 #include "core/hw/gpu.h"
 
 #include "video_core/pica.h"
+#include "video_core/pica_state.h"
 #include "video_core/utils.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index fef5f533155bbef15a9c3f78558df47f3344a72d..fc85aa3fff74d76fb5a2f7bdc4caf140c2e69ef2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -14,6 +14,7 @@
 #include "common/hash.h"
 
 #include "video_core/pica.h"
+#include "video_core/pica_state.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_state.h"
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 44c234ed86993c4dc52986d37bf9a908ae4aac54..5e8930476cd652aa9307ce3c5f81048693083953 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -14,6 +14,7 @@
 
 #include "video_core/debug_utils/debug_utils.h"
 #include "video_core/pica.h"
+#include "video_core/pica_state.h"
 #include "video_core/video_core.h"
 
 #include "shader.h"
@@ -145,7 +146,7 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
     return ret;
 }
 
-DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup) {
+DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) {
     UnitState<true> state;
 
     state.program_counter = config.main_offset;
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index f068cd93f24e605bc87d48cb192648eb19013986..1be4e37346eab4ee9bc7221a6d3b27e82b55c788 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -77,6 +77,22 @@ struct OutputVertex {
 static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
 static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
 
+/// Vertex shader memory
+struct ShaderSetup {
+    struct {
+        // The float uniforms are accessed by the shader JIT using SSE instructions, and are
+        // therefore required to be 16-byte aligned.
+        Math::Vec4<float24> MEMORY_ALIGNED16(f[96]);
+
+        std::array<bool, 16> b;
+        std::array<Math::Vec4<u8>, 4> i;
+    } uniforms;
+
+    Math::Vec4<float24> default_attributes[16];
+
+    std::array<u32, 1024> program_code;
+    std::array<u32, 1024> swizzle_data;
+};
 
 // Helper structure used to keep track of data useful for inspection of shader emulation
 template<bool full_debugging>
@@ -347,7 +363,7 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
  * @param setup Setup object for the shader pipeline
  * @return Debug information for this shader with regards to the given vertex
  */
-DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup);
+DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup);
 
 } // namespace Shader
 
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index aeced71b0be2352122661efa1eba688bced189a2..79fcc56b9bb7cb850d7d6ed503fa5687821db1e2 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -7,6 +7,7 @@
 #include <nihstro/shader_bytecode.h>
 
 #include "video_core/pica.h"
+#include "video_core/pica_state.h"
 #include "video_core/shader/shader.h"
 #include "video_core/shader/shader_interpreter.h"
 
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 4249675a5d619a0bad49f354640f84bdc0c0a06c..5083d7e54b37d3797fa0c0f4bd8d0c6b206ed467 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -11,6 +11,8 @@
 #include "shader.h"
 #include "shader_jit_x64.h"
 
+#include "video_core/pica_state.h"
+
 namespace Pica {
 
 namespace Shader {