通常情况下UE4的渲染操作是在渲染线程下执行的,这样无疑可以提高系统的效率以及电脑CPU的利用率,当然在某些情况下也可能在game线程下进行渲染操作,但一般都是渲染线程会比game线程晚一帧执行。下面将对渲染线程的调用流程进行一个简单的介绍。
1 线程创建
将代码定位到
int32 FEngineLoop::PreInit(const TCHAR* CmdLine)
其中有一行代码为
// initialize task graph sub-system with potential multiple threads
FTaskGraphInterface::Startup(FPlatformMisc::NumberOfCores());
这行代码就是在引擎加载阶段对线程进行创建,具体过程请继续往下看,
// Statics in FTaskGraphInterface
void FTaskGraphInterface::Startup(int32 NumThreads)
{
// TaskGraphImplementationSingleton is actually set in the constructor because find work will be called before this returns.
new FTaskGraphImplementation(NumThreads);
}
在上述代码中FTaskGraphInterface是一个单件,而FTaskGraphImplementation是FTaskGraphInterface的一个实现类,也就是创建这个单件,下面进入FTaskGraphImplementation来看下这个单件实现类。
/**
* Constructor - initializes the data structures, sets the singleton pointer and creates the internal threads.
* @param InNumThreads; total number of threads in the system, including named threads, unnamed threads, internal threads and external threads. Must be at least 1 + the number of named threads.
**/
FTaskGraphImplementation(int32)
{
bCreatedHiPriorityThreads = !!ENamedThreads::bHasHighPriorityThreads;
bCreatedBackgroundPriorityThreads = !!ENamedThreads::bHasBackgroundThreads;
int32 MaxTaskThreads = MAX_THREADS;
int32 NumTaskThreads = FPlatformMisc::NumberOfWorkerThreadsToSpawn();
// if we don't want any performance-based threads, then force the task graph to not create any worker threads, and run in game thread
if (!FPlatformProcess::SupportsMultithreading())
{
// this is the logic that used to be spread over a couple of places, that will make the rest of this function disable a worker thread
// @todo: it could probably be made simpler/clearer
// this - 1 tells the below code there is no rendering thread
MaxTaskThreads = 1;
NumTaskThreads = 1;
LastExternalThread = (ENamedThreads::Type)(ENamedThreads::ActualRenderingThread - 1);
bCreatedHiPriorityThreads = false;
bCreatedBackgroundPriorityThreads = false;
ENamedThreads::bHasBackgroundThreads = 0;
ENamedThreads::bHasHighPriorityThreads = 0;
}
else
{
LastExternalThread = ENamedThreads::ActualRenderingThread;
}
NumNamedThreads = LastExternalThread + 1;
NumTaskThreadSets = 1 + bCreatedHiPriorityThreads + bCreatedBackgroundPriorityThreads;
// if we don't have enough threads to allow all of the sets asked for, then we can't create what was asked for.
check(NumTaskThreadSets == 1 || FMath::Min<int32>(NumTaskThreads * NumTaskThreadSets + NumNamedThreads, MAX_THREADS) == NumTaskThreads * NumTaskThreadSets + NumNamedThreads);
NumThreads = FMath::Max<int32>(FMath::Min<int32>(NumTaskThreads * NumTaskThreadSets + NumNamedThreads, MAX_THREADS), NumNamedThreads + 1);
// Cap number of extra threads to the platform worker thread count
// if we don't have enough threads to allow all of the sets asked for, then we can't create what was asked for.
check(NumTaskThreadSets == 1 || FMath::Min(NumThreads, NumNamedThreads + NumTaskThreads * NumTaskThreadSets) == NumThreads);
NumThreads = FMath::Min(NumThreads, NumNamedThreads + NumTaskThreads * NumTaskThreadSets);
NumTaskThreadsPerSet = (NumThreads - NumNamedThreads) / NumTaskThreadSets;
check((NumThreads - NumNamedThreads) % NumTaskThreadSets == 0); // should be equal numbers of threads per priority set
UE_LOG(LogTaskGraph, Log, TEXT("Started task graph with %d named threads and %d total threads with %d sets of task threads."), NumNamedThreads, NumThreads, NumTaskThreadSets);
check(NumThreads - NumNamedThreads >= 1); // need at least one pure worker thread
check(NumThreads <= MAX_THREADS);
check(!ReentrancyCheck.GetValue()); // reentrant?
ReentrancyCheck.Increment(); // just checking for reentrancy
PerThreadIDTLSSlot = FPlatformTLS::AllocTlsSlot();
for (int32 ThreadIndex = 0; ThreadIndex < NumThreads; ThreadIndex++)
{
check(!WorkerThreads[ThreadIndex].bAttached); // reentrant?
bool bAnyTaskThread = ThreadIndex >= NumNamedThreads;
if (bAnyTaskThread)
{
WorkerThreads[ThreadIndex].TaskGraphWorker = new FTaskThreadAnyThread(ThreadIndexToPriorityIndex(ThreadIndex));
}
else
{
WorkerThreads[ThreadIndex].TaskGraphWorker = new FNamedTaskThread;
}
WorkerThreads[ThreadIndex].TaskGraphWorker->Setup(ENamedThreads::Type(ThreadIndex), PerThreadIDTLSSlot, &WorkerThreads[ThreadIndex]);
}
TaskGraphImplementationSingleton = this; // now reentrancy is ok
for (int32 ThreadIndex = LastExternalThread + 1; ThreadIndex < NumThreads; ThreadIndex++)
{
FString Name;
int32 Priority = ThreadIndexToPriorityIndex(ThreadIndex);
EThreadPriority ThreadPri;
uint64 Affinity = FPlatformAffinity::GetTaskGraphThreadMask();
if (Priority == 1)
{
Name = FString::Printf(TEXT("TaskGraphThreadHP %d"), ThreadIndex - (LastExternalThread + 1));
ThreadPri = TPri_SlightlyBelowNormal; // we want even hi priority tasks below the normal threads
}
else if (Priority == 2)
{
Name = FString::Printf(TEXT("TaskGraphThreadBP %d"), ThreadIndex - (LastExternalThread + 1));
ThreadPri = TPri_Lowest;
// If the platform defines FPlatformAffinity::GetTaskGraphBackgroundTaskMask then use it
if ( FPlatformAffinity::GetTaskGraphBackgroundTaskMask() != 0xFFFFFFFFFFFFFFFF )
{
Affinity = FPlatformAffinity::GetTaskGraphBackgroundTaskMask();
}
}
else
{
Name = FString::Printf(TEXT("TaskGraphThreadNP %d"), ThreadIndex - (LastExternalThread + 1));
ThreadPri = TPri_BelowNormal; // we want normal tasks below normal threads like the game thread
}
#if WITH_EDITOR
uint32 StackSize = 1024 * 1024;
#elif ( UE_BUILD_SHIPPING || UE_BUILD_TEST )
uint32 StackSize = 384 * 1024;
#else
uint32 StackSize = 512 * 1024;
#endif
WorkerThreads[ThreadIndex].RunnableThread = FRunnableThread::Create(&Thread(ThreadIndex), *Name, StackSize, ThreadPri, Affinity); // these are below normal threads so that they sleep when the named threads are active
WorkerThreads[ThreadIndex].bAttached = true;
}
}
纵观以上代码,说白了就是计算需要创建线程个数,并且创建的过程,线程分为FTaskThreadAnyThread和FNamedTaskThread两种,创建的代码如下:
if (bAnyTaskThread)
{
WorkerThreads[ThreadIndex].TaskGraphWorker = new FTaskThreadAnyThread(ThreadIndexToPriorityIndex(ThreadIndex));
}
else
{
WorkerThreads[ThreadIndex].TaskGraphWorker = new FNamedTaskThread;
}
WorkerThreads[ThreadIndex].TaskGraphWorker->Setup(ENamedThreads::Type(ThreadIndex), PerThreadIDTLSSlot, &WorkerThreads[ThreadIndex]);
创建好的任务线程保存在WorkerThreads[]中,而且这些任务线程均继承于FRunnable接口,该接口是UE4自定义的线程,不过是一个假的线程,线程真正的创建在以下代码中执行。
WorkerThreads[ThreadIndex].RunnableThread = FRunnableThread::Create(&Thread(ThreadIndex), *Name, StackSize, ThreadPri, Affinity); // these are below normal threads so that they sleep when the named threads are active
以上线程的创建并未包括渲染线程,渲染线程的创建如下:
void StartRenderingThread()
{
static uint32 ThreadCount = 0;
check(!GIsThreadedRendering && GUseThreadedRendering);
check(!GRHIThread_InternalUseOnly && !GIsRunningRHIInSeparateThread_InternalUseOnly && !GIsRunningRHIInDedicatedThread_InternalUseOnly && !GIsRunningRHIInTaskThread_InternalUseOnly);
if (GUseRHIThread_InternalUseOnly)
{
FRHICommandListExecutor::GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);
if (!FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::RHIThread))
{
FRHIThread::Get().Start();
}
DECLARE_CYCLE_STAT(TEXT("Wait For RHIThread"), STAT_WaitForRHIThread, STATGROUP_TaskGraphTasks);
FGraphEventRef CompletionEvent = TGraphTask<FOwnershipOfRHIThreadTask>::CreateTask(NULL, ENamedThreads::GameThread).ConstructAndDispatchWhenReady(true, GET_STATID(STAT_WaitForRHIThread));
QUICK_SCOPE_CYCLE_COUNTER(STAT_StartRenderingThread);
FTaskGraphInterface::Get().WaitUntilTaskCompletes(CompletionEvent, ENamedThreads::GameThread_Local);
GRHIThread_InternalUseOnly = FRHIThread::Get().Thread;
check(GRHIThread_InternalUseOnly);
GIsRunningRHIInDedicatedThread_InternalUseOnly = true;
GIsRunningRHIInSeparateThread_InternalUseOnly = true;
GRHIThreadId = GRHIThread_InternalUseOnly->GetThreadID();
GRHICommandList.LatchBypass();
}
else if (GUseRHITaskThreads_InternalUseOnly)
{
GIsRunningRHIInSeparateThread_InternalUseOnly = true;
GIsRunningRHIInTaskThread_InternalUseOnly = true;
}
// Turn on the threaded rendering flag.
GIsThreadedRendering = true;
// Create the rendering thread.
GRenderingThreadRunnable = new FRenderingThread();
GRenderingThread = FRunnableThread::Create(GRenderingThreadRunnable, *BuildRenderingThreadName(ThreadCount), 0, FPlatformAffinity::GetRenderingThreadPriority(), FPlatformAffinity::GetRenderingThreadMask());
// Wait for render thread to have taskgraph bound before we dispatch any tasks for it.
((FRenderingThread*)GRenderingThreadRunnable)->TaskGraphBoundSyncEvent->Wait();
// register
IConsoleManager::Get().RegisterThreadPropagation(GRenderingThread->GetThreadID(), &FConsoleRenderThreadPropagation::GetSingleton());
// ensure the thread has actually started and is idling
FRenderCommandFence Fence;
Fence.BeginFence();
Fence.Wait();
GRunRenderingThreadHeartbeat = true;
// Create the rendering thread heartbeat
GRenderingThreadRunnableHeartbeat = new FRenderingThreadTickHeartbeat();
GRenderingThreadHeartbeat = FRunnableThread::Create(GRenderingThreadRunnableHeartbeat, *FString::Printf(TEXT("RTHeartBeat %d"), ThreadCount), 16 * 1024, TPri_AboveNormal, FPlatformAffinity::GetRTHeartBeatMask());
ThreadCount++;
}
该过程也是在以下代码中调用。
int32 FEngineLoop::PreInit(const TCHAR* CmdLine)
2 线程调用
渲染线程创建完成之后便可以对其进行使用了,下面以FPrimitiveSceneProxy::SetSelection_GameThread为例进行讲解,完整代码如下所示。
void FPrimitiveSceneProxy::SetSelection_GameThread(const bool bInParentSelected, const bool bInIndividuallySelected)
{
check(IsInGameThread());
// Enqueue a message to the rendering thread containing the interaction to add.
ENQUEUE_UNIQUE_RENDER_COMMAND_THREEPARAMETER(
SetNewSelection,
FPrimitiveSceneProxy*,PrimitiveSceneProxy,this,
const bool,bNewParentSelection,bInParentSelected,
const bool,bNewIndividuallySelected,bInIndividuallySelected,
{
PrimitiveSceneProxy->SetSelection_RenderThread(bNewParentSelection,bNewIndividuallySelected);
});
}
该段代码的意思是在game线程下向渲染线程传递SetSelection_RenderThread指令,该过程是通过ENQUEUE_UNIQUE_RENDER_COMMAND_THREEPARAMETER宏来实现的,展开这个宏可以看到如下代码。
#define ENQUEUE_UNIQUE_RENDER_COMMAND_THREEPARAMETER(TypeName,ParamType1,ParamName1,ParamValue1,ParamType2,ParamName2,ParamValue2,ParamType3,ParamName3,ParamValue3,Code) \
ENQUEUE_UNIQUE_RENDER_COMMAND_THREEPARAMETER_DECLARE(TypeName,ParamType1,ParamName1,ParamValue1,ParamType2,ParamName2,ParamValue2,ParamType3,ParamName3,ParamValue3,Code) \
ENQUEUE_UNIQUE_RENDER_COMMAND_THREEPARAMETER_CREATE(TypeName,ParamType1,ParamValue1,ParamType2,ParamValue2,ParamType3,ParamValue3)
#define ENQUEUE_UNIQUE_RENDER_COMMAND_THREEPARAMETER_DECLARE(TypeName,ParamType1,ParamName1,ParamValue1,ParamType2,ParamName2,ParamValue2,ParamType3,ParamName3,ParamValue3,Code) \
ENQUEUE_UNIQUE_RENDER_COMMAND_THREEPARAMETER_DECLARE_OPTTYPENAME(TypeName,ParamType1,ParamName1,ParamValue1,ParamType2,ParamName2,ParamValue2,ParamType3,ParamName3,ParamValue3,,Code)
/**
* Declares a rendering command type with 3 parameters.
*/
#define ENQUEUE_UNIQUE_RENDER_COMMAND_THREEPARAMETER_DECLARE_OPTTYPENAME(TypeName,ParamType1,ParamName1,ParamValue1,ParamType2,ParamName2,ParamValue2,ParamType3,ParamName3,ParamValue3,OptTypename,Code) \
class EURCMacro_##TypeName : public FRenderCommand \
{ \
public: \
EURCMacro_##TypeName(OptTypename TCallTraits<ParamType1>::ParamType In##ParamName1,OptTypename TCallTraits<ParamType2>::ParamType In##ParamName2,OptTypename TCallTraits<ParamType3>::ParamType In##ParamName3): \
ParamName1(In##ParamName1), \
ParamName2(In##ParamName2), \
ParamName3(In##ParamName3) \
{} \
TASK_FUNCTION(Code) \
TASKNAME_FUNCTION(TypeName) \
private: \
ParamType1 ParamName1; \
ParamType2 ParamName2; \
ParamType3 ParamName3; \
};
#define TASK_FUNCTION(Code) \
void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent) \
{ \
FRHICommandListImmediate& RHICmdList = GetImmediateCommandList_ForRenderCommand(); \
Code; \
}
#define TASKNAME_FUNCTION(TypeName) \
FORCEINLINE TStatId GetStatId() const \
{ \
RETURN_QUICK_DECLARE_CYCLE_STAT(TypeName, STATGROUP_RenderThreadCommands); \
}
#define ENQUEUE_UNIQUE_RENDER_COMMAND_THREEPARAMETER_CREATE(TypeName,ParamType1,ParamValue1,ParamType2,ParamValue2,ParamType3,ParamValue3) \
{ \
LogRenderCommand(TypeName); \
if(ShouldExecuteOnRenderThread()) \
{ \
CheckNotBlockedOnRenderThread(); \
TGraphTask<EURCMacro_##TypeName>::CreateTask().ConstructAndDispatchWhenReady(ParamValue1,ParamValue2,ParamValue3); \
} \
else \
{ \
EURCMacro_##TypeName TempCommand(ParamValue1,ParamValue2,ParamValue3); \
FScopeCycleCounter EURCMacro_Scope(TempCommand.GetStatId()); \
TempCommand.DoTask(ENamedThreads::GameThread, FGraphEventRef() ); \
} \
}
即创建了一个FRenderCommand的子类来保存传入渲染线程的参数,以及一个DoTask实现,来实现SetSelection_RenderThread指令。然后通过ConstructAndDispatchWhenReady来讲渲染线程的操作作为一个任务压入渲染线程的队列中,等待执行。其实现如下:
template<typename...T>
FGraphEventRef ConstructAndDispatchWhenReady(T&&... Args)
{
new ((void *)&Owner->TaskStorage) TTask(Forward<T>(Args)...);
return Owner->Setup(Prerequisites, CurrentThreadIfKnown);
}
FGraphEventRef Setup(const FGraphEventArray* Prerequisites = NULL, ENamedThreads::Type CurrentThreadIfKnown = ENamedThreads::AnyThread)
{
FGraphEventRef ReturnedEventRef = Subsequents; // very important so that this doesn't get destroyed before we return
SetupPrereqs(Prerequisites, CurrentThreadIfKnown, true);
return ReturnedEventRef;
}
void SetupPrereqs(const FGraphEventArray* Prerequisites, ENamedThreads::Type CurrentThreadIfKnown, bool bUnlock)
{
checkThreadGraph(!TaskConstructed);
TaskConstructed = true;
TTask& Task = *(TTask*)&TaskStorage;
SetThreadToExecuteOn(Task.GetDesiredThread());
int32 AlreadyCompletedPrerequisites = 0;
if (Prerequisites)
{
for (int32 Index = 0; Index < Prerequisites->Num(); Index++)
{
check((*Prerequisites)[Index]);
if (!(*Prerequisites)[Index]->AddSubsequent(this))
{
AlreadyCompletedPrerequisites++;
}
}
}
PrerequisitesComplete(CurrentThreadIfKnown, AlreadyCompletedPrerequisites, bUnlock);
}
void PrerequisitesComplete(ENamedThreads::Type CurrentThread, int32 NumAlreadyFinishedPrequistes, bool bUnlock = true)
{
checkThreadGraph(LifeStage.Increment() == int32(LS_PrequisitesSetup));
int32 NumToSub = NumAlreadyFinishedPrequistes + (bUnlock ? 1 : 0); // the +1 is for the "lock" we set up in the constructor
if (NumberOfPrerequistitesOutstanding.Subtract(NumToSub) == NumToSub)
{
QueueTask(CurrentThread);
}
}
void QueueTask(ENamedThreads::Type CurrentThreadIfKnown)
{
checkThreadGraph(LifeStage.Increment() == int32(LS_Queued));
FTaskGraphInterface::Get().QueueTask(this, ThreadToExecuteOn, CurrentThreadIfKnown);
}
virtual void QueueTask(FBaseGraphTask* Task, ENamedThreads::Type ThreadToExecuteOn, ENamedThreads::Type InCurrentThreadIfKnown = ENamedThreads::AnyThread) final override
{
TASKGRAPH_SCOPE_CYCLE_COUNTER(2, STAT_TaskGraph_QueueTask);
if (ENamedThreads::GetThreadIndex(ThreadToExecuteOn) == ENamedThreads::AnyThread)
{
TASKGRAPH_SCOPE_CYCLE_COUNTER(3, STAT_TaskGraph_QueueTask_AnyThread);
if (FPlatformProcess::SupportsMultithreading())
{
uint32 TaskPriority = ENamedThreads::GetTaskPriority(Task->ThreadToExecuteOn);
int32 Priority = ENamedThreads::GetThreadPriorityIndex(Task->ThreadToExecuteOn);
if (Priority == (ENamedThreads::BackgroundThreadPriority >> ENamedThreads::ThreadPriorityShift) && (!bCreatedBackgroundPriorityThreads || !ENamedThreads::bHasBackgroundThreads))
{
Priority = ENamedThreads::NormalThreadPriority >> ENamedThreads::ThreadPriorityShift; // we don't have background threads, promote to normal
TaskPriority = ENamedThreads::NormalTaskPriority >> ENamedThreads::TaskPriorityShift; // demote to normal task pri
}
else if (Priority == (ENamedThreads::HighThreadPriority >> ENamedThreads::ThreadPriorityShift) && (!bCreatedHiPriorityThreads || !ENamedThreads::bHasHighPriorityThreads))
{
Priority = ENamedThreads::NormalThreadPriority >> ENamedThreads::ThreadPriorityShift; // we don't have hi priority threads, demote to normal
TaskPriority = ENamedThreads::HighTaskPriority >> ENamedThreads::TaskPriorityShift; // promote to hi task pri
}
check(Priority >= 0 && Priority < MAX_THREAD_PRIORITIES);
{
TASKGRAPH_SCOPE_CYCLE_COUNTER(4, STAT_TaskGraph_QueueTask_IncomingAnyThreadTasks_Push);
int32 IndexToStart = IncomingAnyThreadTasks[Priority].Push(Task, TaskPriority);
if (IndexToStart >= 0)
{
StartTaskThread(Priority, IndexToStart);
}
}
return;
}
else
{
ThreadToExecuteOn = ENamedThreads::GameThread;
}
}
ENamedThreads::Type CurrentThreadIfKnown;
if (ENamedThreads::GetThreadIndex(InCurrentThreadIfKnown) == ENamedThreads::AnyThread)
{
CurrentThreadIfKnown = GetCurrentThread();
}
else
{
CurrentThreadIfKnown = ENamedThreads::GetThreadIndex(InCurrentThreadIfKnown);
checkThreadGraph(CurrentThreadIfKnown == ENamedThreads::GetThreadIndex(GetCurrentThread()));
}
{
int32 QueueToExecuteOn = ENamedThreads::GetQueueIndex(ThreadToExecuteOn);
ThreadToExecuteOn = ENamedThreads::GetThreadIndex(ThreadToExecuteOn);
FTaskThreadBase* Target = &Thread(ThreadToExecuteOn);
if (ThreadToExecuteOn == ENamedThreads::GetThreadIndex(CurrentThreadIfKnown))
{
Target->EnqueueFromThisThread(QueueToExecuteOn, Task);
}
else
{
Target->EnqueueFromOtherThread(QueueToExecuteOn, Task);
}
}
}
virtual bool EnqueueFromOtherThread(int32 QueueIndex, FBaseGraphTask* Task) override
{
TestRandomizedThreads();
checkThreadGraph(Task && Queue(QueueIndex).StallRestartEvent); // make sure we are started up
uint32 PriIndex = ENamedThreads::GetTaskPriority(Task->ThreadToExecuteOn) ? 0 : 1;
int32 ThreadToStart = Queue(QueueIndex).StallQueue.Push(Task, PriIndex);
if (ThreadToStart >= 0)
{
checkThreadGraph(ThreadToStart == 0);
TASKGRAPH_SCOPE_CYCLE_COUNTER(1, STAT_TaskGraph_EnqueueFromOtherThread_Trigger);
Queue(QueueIndex).StallRestartEvent->Trigger();
return true;
}
return false;
}
以上是自己对渲染线程使用的一个简单总结,因为没有跟特别深,相关代码也没全部过一遍,只能算是一个局部的个人理解,之后的使用过程中会进一步地优化这部分内容,希望其他UE4使用者也可以多交流下这部分的知识~