Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Suyu
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
many-archive
Suyu
Commits
4555b637
There was an error fetching the commit references. Please try again later.
Commit
4555b637
authored
6 years ago
by
bunnei
Browse files
Options
Downloads
Patches
Plain Diff
gpu_thread: Improve synchronization by using CoreTiming.
parent
62860dc0
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/video_core/gpu_asynch.cpp
+1
-1
1 addition, 1 deletion
src/video_core/gpu_asynch.cpp
src/video_core/gpu_thread.cpp
+33
-10
33 additions, 10 deletions
src/video_core/gpu_thread.cpp
src/video_core/gpu_thread.h
+31
-40
31 additions, 40 deletions
src/video_core/gpu_thread.h
with
65 additions
and
51 deletions
src/video_core/gpu_asynch.cpp
+
1
−
1
View file @
4555b637
...
...
@@ -9,7 +9,7 @@
namespace
VideoCommon
{
GPUAsynch
::
GPUAsynch
(
Core
::
System
&
system
,
VideoCore
::
RendererBase
&
renderer
)
:
Tegra
::
GPU
(
system
,
renderer
),
gpu_thread
{
renderer
,
*
dma_pusher
}
{}
:
Tegra
::
GPU
(
system
,
renderer
),
gpu_thread
{
system
,
renderer
,
*
dma_pusher
}
{}
GPUAsynch
::~
GPUAsynch
()
=
default
;
...
...
This diff is collapsed.
Click to expand it.
src/video_core/gpu_thread.cpp
+
33
−
10
View file @
4555b637
...
...
@@ -4,6 +4,9 @@
#include
"common/assert.h"
#include
"common/microprofile.h"
#include
"core/core.h"
#include
"core/core_timing.h"
#include
"core/core_timing_util.h"
#include
"core/frontend/scope_acquire_window_context.h"
#include
"video_core/dma_pusher.h"
#include
"video_core/gpu.h"
...
...
@@ -36,7 +39,6 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
dma_pusher
.
Push
(
std
::
move
(
submit_list
->
entries
));
dma_pusher
.
DispatchCalls
();
}
else
if
(
const
auto
data
=
std
::
get_if
<
SwapBuffersCommand
>
(
&
next
.
data
))
{
state
.
DecrementFramesCounter
();
renderer
.
SwapBuffers
(
std
::
move
(
data
->
framebuffer
));
}
else
if
(
const
auto
data
=
std
::
get_if
<
FlushRegionCommand
>
(
&
next
.
data
))
{
renderer
.
Rasterizer
().
FlushRegion
(
data
->
addr
,
data
->
size
);
...
...
@@ -47,13 +49,18 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
}
else
{
UNREACHABLE
();
}
state
.
signaled_fence
=
next
.
fence
;
state
.
TrySynchronize
();
}
}
}
ThreadManager
::
ThreadManager
(
VideoCore
::
RendererBase
&
renderer
,
Tegra
::
DmaPusher
&
dma_pusher
)
:
renderer
{
renderer
},
thread
{
RunThread
,
std
::
ref
(
renderer
),
std
::
ref
(
dma_pusher
),
std
::
ref
(
state
)}
{}
ThreadManager
::
ThreadManager
(
Core
::
System
&
system
,
VideoCore
::
RendererBase
&
renderer
,
Tegra
::
DmaPusher
&
dma_pusher
)
:
system
{
system
},
thread
{
RunThread
,
std
::
ref
(
renderer
),
std
::
ref
(
dma_pusher
),
std
::
ref
(
state
)}
{
synchronization_event
=
system
.
CoreTiming
().
RegisterEvent
(
"GPUThreadSynch"
,
[
this
](
u64
fence
,
int
)
{
state
.
WaitForSynchronization
(
fence
);
});
}
ThreadManager
::~
ThreadManager
()
{
// Notify GPU thread that a shutdown is pending
...
...
@@ -62,14 +69,14 @@ ThreadManager::~ThreadManager() {
}
void
ThreadManager
::
SubmitList
(
Tegra
::
CommandList
&&
entries
)
{
PushCommand
(
SubmitListCommand
(
std
::
move
(
entries
)));
const
u64
fence
{
PushCommand
(
SubmitListCommand
(
std
::
move
(
entries
)))};
const
s64
synchronization_ticks
{
Core
::
Timing
::
usToCycles
(
9000
)};
system
.
CoreTiming
().
ScheduleEvent
(
synchronization_ticks
,
synchronization_event
,
fence
);
}
void
ThreadManager
::
SwapBuffers
(
std
::
optional
<
std
::
reference_wrapper
<
const
Tegra
::
FramebufferConfig
>>
framebuffer
)
{
state
.
IncrementFramesCounter
();
PushCommand
(
SwapBuffersCommand
(
std
::
move
(
framebuffer
)));
state
.
WaitForFrames
();
}
void
ThreadManager
::
FlushRegion
(
CacheAddr
addr
,
u64
size
)
{
...
...
@@ -79,7 +86,7 @@ void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
void
ThreadManager
::
InvalidateRegion
(
CacheAddr
addr
,
u64
size
)
{
if
(
state
.
queue
.
Empty
())
{
// It's quicker to invalidate a single region on the CPU if the queue is already empty
r
enderer
.
Rasterizer
().
InvalidateRegion
(
addr
,
size
);
system
.
R
enderer
()
.
Rasterizer
().
InvalidateRegion
(
addr
,
size
);
}
else
{
PushCommand
(
InvalidateRegionCommand
(
addr
,
size
));
}
...
...
@@ -90,9 +97,25 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
InvalidateRegion
(
addr
,
size
);
}
void
ThreadManager
::
PushCommand
(
CommandData
&&
command_data
)
{
state
.
queue
.
Push
(
CommandDataContainer
(
std
::
move
(
command_data
)));
u64
ThreadManager
::
PushCommand
(
CommandData
&&
command_data
)
{
const
u64
fence
{
++
state
.
last_fence
};
state
.
queue
.
Push
(
CommandDataContainer
(
std
::
move
(
command_data
),
fence
));
state
.
SignalCommands
();
return
fence
;
}
MICROPROFILE_DEFINE
(
GPU_wait
,
"GPU"
,
"Wait for the GPU"
,
MP_RGB
(
128
,
128
,
192
));
void
SynchState
::
WaitForSynchronization
(
u64
fence
)
{
if
(
signaled_fence
>=
fence
)
{
return
;
}
// Wait for the GPU to be idle (all commands to be executed)
{
MICROPROFILE_SCOPE
(
GPU_wait
);
std
::
unique_lock
<
std
::
mutex
>
lock
{
synchronization_mutex
};
synchronization_condition
.
wait
(
lock
,
[
this
,
fence
]
{
return
signaled_fence
>=
fence
;
});
}
}
}
// namespace VideoCommon::GPUThread
This diff is collapsed.
Click to expand it.
src/video_core/gpu_thread.h
+
31
−
40
View file @
4555b637
...
...
@@ -19,9 +19,12 @@ struct FramebufferConfig;
class
DmaPusher
;
}
// namespace Tegra
namespace
VideoCore
{
class
RendererBase
;
}
// namespace VideoCore
namespace
Core
{
class
System
;
namespace
Timing
{
struct
EventType
;
}
// namespace Timing
}
// namespace Core
namespace
VideoCommon
::
GPUThread
{
...
...
@@ -75,63 +78,47 @@ using CommandData =
struct
CommandDataContainer
{
CommandDataContainer
()
=
default
;
CommandDataContainer
(
CommandData
&&
data
)
:
data
{
std
::
move
(
data
)}
{}
CommandDataContainer
(
CommandData
&&
data
,
u64
next_fence
)
:
data
{
std
::
move
(
data
)},
fence
{
next_fence
}
{}
CommandDataContainer
&
operator
=
(
const
CommandDataContainer
&
t
)
{
data
=
std
::
move
(
t
.
data
);
fence
=
t
.
fence
;
return
*
this
;
}
CommandData
data
;
u64
fence
{};
};
/// Struct used to synchronize the GPU thread
struct
SynchState
final
{
std
::
atomic_bool
is_running
{
true
};
std
::
atomic_int
queued_frame_count
{};
std
::
mutex
frames
_mutex
;
std
::
mutex
synchronization
_mutex
;
std
::
mutex
commands_mutex
;
std
::
condition_variable
commands_condition
;
std
::
condition_variable
frames
_condition
;
std
::
condition_variable
synchronization
_condition
;
void
IncrementFramesCounter
()
{
std
::
lock_guard
lock
{
frames_mutex
};
++
queued_frame_count
;
/// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU
/// synchronized. This is entirely empirical.
bool
IsSynchronized
()
const
{
constexpr
std
::
size_t
max_queue_gap
{
5
};
return
queue
.
Size
()
<=
max_queue_gap
;
}
void
DecrementFramesCounter
()
{
{
std
::
lock_guard
lock
{
frames_mutex
};
--
queued_frame_count
;
if
(
queued_frame_count
)
{
return
;
}
void
TrySynchronize
()
{
if
(
IsSynchronized
())
{
std
::
lock_guard
<
std
::
mutex
>
lock
{
synchronization_mutex
};
synchronization_condition
.
notify_one
();
}
frames_condition
.
notify_one
();
}
void
WaitForFrames
()
{
{
std
::
lock_guard
lock
{
frames_mutex
};
if
(
!
queued_frame_count
)
{
return
;
}
}
// Wait for the GPU to be idle (all commands to be executed)
{
std
::
unique_lock
lock
{
frames_mutex
};
frames_condition
.
wait
(
lock
,
[
this
]
{
return
!
queued_frame_count
;
});
}
}
void
WaitForSynchronization
(
u64
fence
);
void
SignalCommands
()
{
{
std
::
unique_lock
lock
{
commands_mutex
};
if
(
queue
.
Empty
())
{
return
;
}
if
(
queue
.
Empty
())
{
return
;
}
commands_condition
.
notify_one
();
...
...
@@ -144,12 +131,15 @@ struct SynchState final {
using
CommandQueue
=
Common
::
SPSCQueue
<
CommandDataContainer
>
;
CommandQueue
queue
;
u64
last_fence
{};
std
::
atomic
<
u64
>
signaled_fence
{};
};
/// Class used to manage the GPU thread
class
ThreadManager
final
{
public:
explicit
ThreadManager
(
VideoCore
::
RendererBase
&
renderer
,
Tegra
::
DmaPusher
&
dma_pusher
);
explicit
ThreadManager
(
Core
::
System
&
system
,
VideoCore
::
RendererBase
&
renderer
,
Tegra
::
DmaPusher
&
dma_pusher
);
~
ThreadManager
();
/// Push GPU command entries to be processed
...
...
@@ -170,11 +160,12 @@ public:
private:
/// Pushes a command to be executed by the GPU thread
void
PushCommand
(
CommandData
&&
command_data
);
u64
PushCommand
(
CommandData
&&
command_data
);
private:
SynchState
state
;
VideoCore
::
RendererBase
&
renderer
;
Core
::
System
&
system
;
Core
::
Timing
::
EventType
*
synchronization_event
{};
std
::
thread
thread
;
std
::
thread
::
id
thread_id
;
};
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment