diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 736d7eb377f27..ad2323c004809 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -3638,7 +3638,7 @@ void CommandListCoreFamily::writeKernelTimestamp(uint64_t baseAdd } uint64_t address = ptrOffset(baseAddr, offset); - uint32_t registerOffset = isGlobalTimestamp ? RegisterOffsets::globalTimestampLdw : RegisterOffsets::gpThreadTimeRegAddressOffsetLow; + uint32_t registerOffset = isGlobalTimestamp ? RegisterOffsets::globalTimestampLdw : NEO::ContextTimestampRegister::getRegisterOffsetLow(); writeTimestamp(commandContainer, registerOffset, address, maskLsb, workloadPartition, postSyncCmdBuffer, copyOperation); pushTimestampPatch(outTimeStampSyncCmds, offset, postSyncCmd); adjustWriteKernelTimestamp(address, baseAddr, outTimeStampSyncCmds, workloadPartition, copyOperation, isGlobalTimestamp); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xe2_hpg_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xe2_hpg_and_later.inl index 78a5afd436453..b1a2e94d1cd3b 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xe2_hpg_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xe2_hpg_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024-2025 Intel Corporation + * Copyright (C) 2024-2026 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -31,7 +31,7 @@ void CommandListCoreFamily::adjustWriteKernelTimestamp(uint64_t a postSyncCmdBuffer = &postSyncCmd; } - uint32_t registerOffset = globalTimestamp ? RegisterOffsets::globalTimestampUn : RegisterOffsets::gpThreadTimeRegAddressOffsetHigh; + uint32_t registerOffset = globalTimestamp ? RegisterOffsets::globalTimestampUn : NEO::ContextTimestampRegister::getRegisterOffsetHigh(); writeTimestamp(commandContainer, registerOffset, highAddress, false, workloadPartition, postSyncCmdBuffer, copyOperation); pushTimestampPatch(outTimeStampSyncCmds, highAddress - baseAddress, postSyncCmd); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index dd5466fe0fa0c..7934dd29095af 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -3481,11 +3481,11 @@ HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenProfilingBeforeCommand EXPECT_NE(cmdList.end(), ++itor); cmd = genCmdCast(*itor); - EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetLow); + EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + ContextTimestampRegister::getRegisterOffsetLow()); EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset)); EXPECT_NE(cmdList.end(), ++itor); cmd = genCmdCast(*itor); - EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetHigh); + EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + ContextTimestampRegister::getRegisterOffsetHigh()); EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr + sizeof(uint32_t), contextOffset)); } @@ -3595,11 +3595,11 @@ HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenProfilingAfterCommandF EXPECT_NE(cmdList.end(), ++itor); cmd = genCmdCast(*itor); - EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetLow); + EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + ContextTimestampRegister::getRegisterOffsetLow()); EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset)); EXPECT_NE(cmdList.end(), ++itor); cmd = genCmdCast(*itor); - EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetHigh); + EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + ContextTimestampRegister::getRegisterOffsetHigh()); EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr + sizeof(uint32_t), contextOffset)); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp index c9530f721b64b..e383c01f3d1ee 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp @@ -390,7 +390,7 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix validateTimestampRegisters(cmdList, begin, RegisterOffsets::globalTimestampLdw, globalStartAddress, - RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress, + ContextTimestampRegister::getRegisterOffsetLow(), contextStartAddress, true, true); @@ -425,7 +425,7 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix validateTimestampRegisters(cmdList, begin, RegisterOffsets::globalTimestampLdw, globalEndAddress, - RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress, + ContextTimestampRegister::getRegisterOffsetLow(), contextEndAddress, true, true); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp index aadfb53115dfd..2781924a0c918 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp @@ -762,7 +762,7 @@ HWTEST_F(AppendMemoryCopyTests, givenCopyCommandListWhenTimestampPassedToMemoryC itor++; EXPECT_NE(cmdList.end(), itor); cmd = genCmdCast(*itor); - EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetLow); + EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + ContextTimestampRegister::getRegisterOffsetLow()); itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); @@ -774,7 +774,7 @@ HWTEST_F(AppendMemoryCopyTests, givenCopyCommandListWhenTimestampPassedToMemoryC itor++; EXPECT_NE(cmdList.end(), itor); cmd = genCmdCast(*itor); - EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetLow); + EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + ContextTimestampRegister::getRegisterOffsetLow()); itor++; EXPECT_EQ(cmdList.end(), itor); } @@ -1654,7 +1654,7 @@ HWTEST_F(StagingBuffersFixture, givenAppendMemoryCopyWithStagingAndProfilingThen validateTimestampRegisters(parsedCmdList, begin, RegisterOffsets::globalTimestampLdw, globalStartAddress, - RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress, + ContextTimestampRegister::getRegisterOffsetLow(), contextStartAddress, false, true); @@ -1663,7 +1663,7 @@ HWTEST_F(StagingBuffersFixture, givenAppendMemoryCopyWithStagingAndProfilingThen validateTimestampRegisters(parsedCmdList, itorWalkers[1], RegisterOffsets::globalTimestampLdw, globalEndAddress, - RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress, + ContextTimestampRegister::getRegisterOffsetLow(), contextEndAddress, false, true); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp index 9c57c51019b63..9b0e749aced7e 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp @@ -864,15 +864,15 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, startCmdList, RegisterOffsets::globalTimestampLdw, globalStartAddress, RegisterOffsets::globalTimestampUn, globalStartAddressHigh, - RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress, - RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, contextStartAddressHigh, + ContextTimestampRegister::getRegisterOffsetLow(), contextStartAddress, + ContextTimestampRegister::getRegisterOffsetHigh(), contextStartAddressHigh, true, true); } else { validateTimestampRegisters(cmdList, startCmdList, RegisterOffsets::globalTimestampLdw, globalStartAddress, - RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress, + ContextTimestampRegister::getRegisterOffsetLow(), contextStartAddress, true, true); } @@ -884,15 +884,15 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, startCmdList, RegisterOffsets::globalTimestampLdw, globalEndAddress, RegisterOffsets::globalTimestampUn, globalEndAddressHigh, - RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress, - RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, contextEndAddressHigh, + ContextTimestampRegister::getRegisterOffsetLow(), contextEndAddress, + ContextTimestampRegister::getRegisterOffsetHigh(), contextEndAddressHigh, true, true); } else { validateTimestampRegisters(cmdList, startCmdList, RegisterOffsets::globalTimestampLdw, globalEndAddress, - RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress, + ContextTimestampRegister::getRegisterOffsetLow(), contextEndAddress, true, true); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp index c87492a522330..920140dd8e402 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp @@ -470,7 +470,7 @@ HWTEST_F(AppendMemoryCopyTests, givenCopyCommandListWhenTimestampPassedToMemoryC itor++; EXPECT_NE(cmdList.end(), itor); cmd = genCmdCast(*itor); - EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetLow); + EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + ContextTimestampRegister::getRegisterOffsetLow()); EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextStartOffset)); itor++; itor = find(itor, cmdList.end()); @@ -481,7 +481,7 @@ HWTEST_F(AppendMemoryCopyTests, givenCopyCommandListWhenTimestampPassedToMemoryC itor++; EXPECT_NE(cmdList.end(), itor); cmd = genCmdCast(*itor); - EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetLow); + EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + ContextTimestampRegister::getRegisterOffsetLow()); EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextEndOffset)); itor++; EXPECT_EQ(cmdList.end(), itor); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xe2_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xe2_and_later.cpp index 2440393288215..6b99d19c23c12 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xe2_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xe2_and_later.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024-2025 Intel Corporation + * Copyright (C) 2024-2026 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -107,18 +107,18 @@ struct CommandListXe2AndLaterFixture : public DeviceFixture { validateSrmCommand(reinterpret_cast(*srmCommands[0]), globalAddress, RegisterOffsets::csGprR12); validateSrmCommand(reinterpret_cast(*srmCommands[1]), globalAddress + sizeof(uint32_t), RegisterOffsets::globalTimestampUn); validateSrmCommand(reinterpret_cast(*srmCommands[2]), contextAddress, RegisterOffsets::csGprR12); - validateSrmCommand(reinterpret_cast(*srmCommands[3]), contextAddress + sizeof(uint32_t), RegisterOffsets::gpThreadTimeRegAddressOffsetHigh); + validateSrmCommand(reinterpret_cast(*srmCommands[3]), contextAddress + sizeof(uint32_t), ContextTimestampRegister::getRegisterOffsetHigh()); validateLrrCommand(reinterpret_cast(*srmCommands[4]), RegisterOffsets::globalTimestampLdw); - validateLrrCommand(reinterpret_cast(*srmCommands[5]), RegisterOffsets::gpThreadTimeRegAddressOffsetLow); + validateLrrCommand(reinterpret_cast(*srmCommands[5]), ContextTimestampRegister::getRegisterOffsetLow()); } else { ASSERT_EQ(4u, srmCommands.size()); validateSrmCommand(reinterpret_cast(*srmCommands[0]), globalAddress, RegisterOffsets::globalTimestampLdw); validateSrmCommand(reinterpret_cast(*srmCommands[1]), globalAddress + sizeof(uint32_t), RegisterOffsets::globalTimestampUn); - validateSrmCommand(reinterpret_cast(*srmCommands[2]), contextAddress, RegisterOffsets::gpThreadTimeRegAddressOffsetLow); - validateSrmCommand(reinterpret_cast(*srmCommands[3]), contextAddress + sizeof(uint32_t), RegisterOffsets::gpThreadTimeRegAddressOffsetHigh); + validateSrmCommand(reinterpret_cast(*srmCommands[2]), contextAddress, ContextTimestampRegister::getRegisterOffsetLow()); + validateSrmCommand(reinterpret_cast(*srmCommands[3]), contextAddress + sizeof(uint32_t), ContextTimestampRegister::getRegisterOffsetHigh()); } } diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index e5a259eb3c67e..a0e747a4122c1 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -751,14 +751,14 @@ void CommandQueueHw::processDispatchForMarkerWithTimestampPacket(Comm auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(*currentTimestampPacketNode); auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(*currentTimestampPacketNode); bool isBcs = NEO::EngineHelpers::isBcs(getGpgpuCommandStreamReceiver().getOsContext().getEngineType()); - EncodeStoreMMIO::encode(*commandStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress, false, nullptr, isBcs); + EncodeStoreMMIO::encode(*commandStream, ContextTimestampRegister::getRegisterOffsetLow(), timestampContextStartGpuAddress, false, nullptr, isBcs); EncodeStoreMMIO::encode(*commandStream, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress, false, nullptr, isBcs); MemorySynchronizationCommands::encodeAdditionalTimestampOffsets(*commandStream, timestampContextStartGpuAddress, timestampGlobalStartAddress, isBcs); auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*currentTimestampPacketNode); auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(*currentTimestampPacketNode); - EncodeStoreMMIO::encode(*commandStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress, false, nullptr, isBcs); + EncodeStoreMMIO::encode(*commandStream, ContextTimestampRegister::getRegisterOffsetLow(), timestampContextEndGpuAddress, false, nullptr, isBcs); EncodeStoreMMIO::encode(*commandStream, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress, false, nullptr, isBcs); MemorySynchronizationCommands::encodeAdditionalTimestampOffsets(*commandStream, timestampContextEndGpuAddress, timestampGlobalEndAddress, isBcs); } diff --git a/opencl/test/unit_test/profiling/profiling_tests.cpp b/opencl/test/unit_test/profiling/profiling_tests.cpp index f41534c631992..644b2771229a8 100644 --- a/opencl/test/unit_test/profiling/profiling_tests.cpp +++ b/opencl/test/unit_test/profiling/profiling_tests.cpp @@ -306,13 +306,13 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, ProfilingTests, GivenCommandQueueWithProflingWhen auto pBeforeMI = genCmdCast(*itorBeforeMI); pBeforeMI = genCmdCast(*itorBeforeMI); ASSERT_NE(nullptr, pBeforeMI); - EXPECT_EQ(RegisterOffsets::gpThreadTimeRegAddressOffsetLow, pBeforeMI->getRegisterAddress()); + EXPECT_EQ(ContextTimestampRegister::getRegisterOffsetLow(), pBeforeMI->getRegisterAddress()); auto itorAfterMI = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterMI); auto pAfterMI = genCmdCast(*itorAfterMI); ASSERT_NE(nullptr, pAfterMI); - EXPECT_EQ(RegisterOffsets::gpThreadTimeRegAddressOffsetLow, pAfterMI->getRegisterAddress()); + EXPECT_EQ(ContextTimestampRegister::getRegisterOffsetLow(), pAfterMI->getRegisterAddress()); ++itorAfterMI; pAfterMI = genCmdCast(*itorAfterMI); EXPECT_EQ(nullptr, pAfterMI); @@ -425,13 +425,13 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, ProfilingTests, GivenCommandQueueBlockedWithProfi auto pBeforeMI = genCmdCast(*itorBeforeMI); pBeforeMI = genCmdCast(*itorBeforeMI); ASSERT_NE(nullptr, pBeforeMI); - EXPECT_EQ(RegisterOffsets::gpThreadTimeRegAddressOffsetLow, pBeforeMI->getRegisterAddress()); + EXPECT_EQ(ContextTimestampRegister::getRegisterOffsetLow(), pBeforeMI->getRegisterAddress()); auto itorAfterMI = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterMI); auto pAfterMI = genCmdCast(*itorAfterMI); ASSERT_NE(nullptr, pAfterMI); - EXPECT_EQ(RegisterOffsets::gpThreadTimeRegAddressOffsetLow, pAfterMI->getRegisterAddress()); + EXPECT_EQ(ContextTimestampRegister::getRegisterOffsetLow(), pAfterMI->getRegisterAddress()); ++itorAfterMI; EXPECT_EQ(itorAfterMI, cmdList.end()); clReleaseEvent(event); diff --git a/opencl/test/unit_test/xe3p_core/command_stream_receiver_hw_tests_xe3p_core.cpp b/opencl/test/unit_test/xe3p_core/command_stream_receiver_hw_tests_xe3p_core.cpp index 28056db90206d..a68206c5188cc 100644 --- a/opencl/test/unit_test/xe3p_core/command_stream_receiver_hw_tests_xe3p_core.cpp +++ b/opencl/test/unit_test/xe3p_core/command_stream_receiver_hw_tests_xe3p_core.cpp @@ -120,11 +120,11 @@ XE3P_CORETEST_F(CommandStreamReceiverXe3pCoreTests, givenProfilingEnabledWhenBli }; { - verifyLri(cmdIterator, RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, timestampContextStartGpuAddress + sizeof(uint32_t)); + verifyLri(cmdIterator, RegisterOffsets::queueTimestampRegAddressOffsetHigh, timestampContextStartGpuAddress + sizeof(uint32_t)); verifyLri(++cmdIterator, RegisterOffsets::globalTimestampUn, timestampGlobalStartAddress + sizeof(uint32_t)); - verifyLri(++cmdIterator, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress); + verifyLri(++cmdIterator, RegisterOffsets::queueTimestampRegAddressOffsetLow, timestampContextStartGpuAddress); verifyLri(++cmdIterator, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress); } @@ -137,11 +137,11 @@ XE3P_CORETEST_F(CommandStreamReceiverXe3pCoreTests, givenProfilingEnabledWhenBli { - verifyLri(++cmdIterator, RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, timestampContextEndGpuAddress + sizeof(uint32_t)); + verifyLri(++cmdIterator, RegisterOffsets::queueTimestampRegAddressOffsetHigh, timestampContextEndGpuAddress + sizeof(uint32_t)); verifyLri(++cmdIterator, RegisterOffsets::globalTimestampUn, timestampGlobalEndAddress + sizeof(uint32_t)); - verifyLri(++cmdIterator, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress); + verifyLri(++cmdIterator, RegisterOffsets::queueTimestampRegAddressOffsetLow, timestampContextEndGpuAddress); verifyLri(++cmdIterator, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress); } diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 1dc857ed0f5ae..3ff8b0164110e 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -806,4 +806,10 @@ struct EncodeUserInterrupt { static void encode(LinearStream &commandStream); }; +template +struct ContextTimestampRegister { + static uint32_t getRegisterOffsetLow(); + static uint32_t getRegisterOffsetHigh(); +}; + } // namespace NEO diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index 28b912e8355ca..3541d12d12ba2 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -1327,4 +1327,14 @@ void EncodeComputeMode::adjustPipelineSelect(CommandContainer &container container.getDevice()->getRootDeviceEnvironment()); } +template +inline uint32_t ContextTimestampRegister::getRegisterOffsetLow() { + return RegisterOffsets::gpThreadTimeRegAddressOffsetLow; +} + +template +inline uint32_t ContextTimestampRegister::getRegisterOffsetHigh() { + return RegisterOffsets::gpThreadTimeRegAddressOffsetHigh; +} + } // namespace NEO diff --git a/shared/source/command_container/command_encoder_enablers.inl b/shared/source/command_container/command_encoder_enablers.inl index 08fcc38e01380..69bf2b87394b3 100644 --- a/shared/source/command_container/command_encoder_enablers.inl +++ b/shared/source/command_container/command_encoder_enablers.inl @@ -8,6 +8,7 @@ #include "shared/source/command_container/command_encoder.h" template struct NEO::EncodeDispatchKernel; + template void NEO::EncodeDispatchKernel::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs); template void NEO::EncodeDispatchKernel::setGrfInfo(Family::DefaultWalkerType::InterfaceDescriptorType *pInterfaceDescriptor, uint32_t grfCount, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment); template void NEO::EncodeDispatchKernel::setupPreferredSlmSize(Family::DefaultWalkerType::InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); @@ -55,3 +56,6 @@ template struct NEO::EncodeStoreMemory; template struct NEO::EncodeDataMemory; template struct NEO::EncodeMemoryFence; template struct NEO::EncodeUserInterrupt; + +template uint32_t NEO::ContextTimestampRegister::getRegisterOffsetLow(); +template uint32_t NEO::ContextTimestampRegister::getRegisterOffsetHigh(); diff --git a/shared/source/helpers/blit_commands_helper_base.inl b/shared/source/helpers/blit_commands_helper_base.inl index 144f2dfc862e6..f38f8ab7731a1 100644 --- a/shared/source/helpers/blit_commands_helper_base.inl +++ b/shared/source/helpers/blit_commands_helper_base.inl @@ -599,7 +599,7 @@ void BlitCommandsHelper::encodeProfilingStartMmios(LinearStream &cmdS auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(timestampPacketNode); auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(timestampPacketNode); - EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress, false, nullptr, true); + EncodeStoreMMIO::encode(cmdStream, ContextTimestampRegister::getRegisterOffsetLow(), timestampContextStartGpuAddress, false, nullptr, true); EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress, false, nullptr, true); } @@ -608,7 +608,7 @@ void BlitCommandsHelper::encodeProfilingEndMmios(LinearStream &cmdStr auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(timestampPacketNode); auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(timestampPacketNode); - EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress, false, nullptr, true); + EncodeStoreMMIO::encode(cmdStream, ContextTimestampRegister::getRegisterOffsetLow(), timestampContextEndGpuAddress, false, nullptr, true); EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress, false, nullptr, true); } diff --git a/shared/source/helpers/gfx_core_helper_xe2_and_later.inl b/shared/source/helpers/gfx_core_helper_xe2_and_later.inl index b41b5a6a4e650..fd7ef2aa0fa5e 100644 --- a/shared/source/helpers/gfx_core_helper_xe2_and_later.inl +++ b/shared/source/helpers/gfx_core_helper_xe2_and_later.inl @@ -87,7 +87,7 @@ bool GfxCoreHelperHw::isTimestampShiftRequired() const { template <> void MemorySynchronizationCommands::encodeAdditionalTimestampOffsets(LinearStream &commandStream, uint64_t contextAddress, uint64_t globalAddress, bool isBcs) { - EncodeStoreMMIO::encode(commandStream, RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, contextAddress + sizeof(uint32_t), false, nullptr, isBcs); + EncodeStoreMMIO::encode(commandStream, ContextTimestampRegister::getRegisterOffsetHigh(), contextAddress + sizeof(uint32_t), false, nullptr, isBcs); EncodeStoreMMIO::encode(commandStream, RegisterOffsets::globalTimestampUn, globalAddress + sizeof(uint32_t), false, nullptr, isBcs); } diff --git a/shared/source/helpers/register_offsets.h b/shared/source/helpers/register_offsets.h index 8126152522c9d..cf7aa08966643 100644 --- a/shared/source/helpers/register_offsets.h +++ b/shared/source/helpers/register_offsets.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2025 Intel Corporation + * Copyright (C) 2019-2026 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -50,6 +50,9 @@ inline constexpr uint32_t semaWaitPoll = 0x0224c; inline constexpr uint32_t gpThreadTimeRegAddressOffsetLow = 0x23A8; inline constexpr uint32_t gpThreadTimeRegAddressOffsetHigh = 0x23AC; +inline constexpr uint32_t queueTimestampRegAddressOffsetLow = 0x24c0; +inline constexpr uint32_t queueTimestampRegAddressOffsetHigh = 0x24c4; + inline constexpr uint32_t globalTimestampLdw = 0x2358; inline constexpr uint32_t globalTimestampUn = 0x235c; } // namespace RegisterOffsets diff --git a/shared/source/xe3p_core/command_encoder_xe3p_core.cpp b/shared/source/xe3p_core/command_encoder_xe3p_core.cpp index 569154fc0f752..846b618bdb4ba 100644 --- a/shared/source/xe3p_core/command_encoder_xe3p_core.cpp +++ b/shared/source/xe3p_core/command_encoder_xe3p_core.cpp @@ -355,6 +355,16 @@ template template void EncodePostSync::setCommandLevelInterrupt(CommandType &cmd, bool interrupt) {} +template <> +uint32_t ContextTimestampRegister::getRegisterOffsetLow() { + return RegisterOffsets::queueTimestampRegAddressOffsetLow; +} + +template <> +uint32_t ContextTimestampRegister::getRegisterOffsetHigh() { + return RegisterOffsets::queueTimestampRegAddressOffsetHigh; +} + } // namespace NEO #include "shared/source/command_container/command_encoder_enablers.inl" diff --git a/shared/source/xe3p_core/command_stream_receiver_hw_xe3p_core.cpp b/shared/source/xe3p_core/command_stream_receiver_hw_xe3p_core.cpp index bc3df0a6174e1..8165aae136215 100644 --- a/shared/source/xe3p_core/command_stream_receiver_hw_xe3p_core.cpp +++ b/shared/source/xe3p_core/command_stream_receiver_hw_xe3p_core.cpp @@ -266,10 +266,10 @@ void BlitCommandsHelper::encodeProfilingStartMmios(LinearStream &cmdStre auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(timestampPacketNode); auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(timestampPacketNode); - EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, timestampContextStartGpuAddress + sizeof(uint32_t), false, nullptr, true); + EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::queueTimestampRegAddressOffsetHigh, timestampContextStartGpuAddress + sizeof(uint32_t), false, nullptr, true); EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::globalTimestampUn, timestampGlobalStartAddress + sizeof(uint32_t), false, nullptr, true); - EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress, false, nullptr, true); + EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::queueTimestampRegAddressOffsetLow, timestampContextStartGpuAddress, false, nullptr, true); EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress, false, nullptr, true); } @@ -278,10 +278,10 @@ void BlitCommandsHelper::encodeProfilingEndMmios(LinearStream &cmdStream auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(timestampPacketNode); auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(timestampPacketNode); - EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, timestampContextEndGpuAddress + sizeof(uint32_t), false, nullptr, true); + EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::queueTimestampRegAddressOffsetHigh, timestampContextEndGpuAddress + sizeof(uint32_t), false, nullptr, true); EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::globalTimestampUn, timestampGlobalEndAddress + sizeof(uint32_t), false, nullptr, true); - EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress, false, nullptr, true); + EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::queueTimestampRegAddressOffsetLow, timestampContextEndGpuAddress, false, nullptr, true); EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress, false, nullptr, true); } diff --git a/shared/test/unit_test/encoders/command_encoder_tests_xe3p_and_later.cpp b/shared/test/unit_test/encoders/command_encoder_tests_xe3p_and_later.cpp index a3824676ea4e0..8055132522bea 100644 --- a/shared/test/unit_test/encoders/command_encoder_tests_xe3p_and_later.cpp +++ b/shared/test/unit_test/encoders/command_encoder_tests_xe3p_and_later.cpp @@ -927,3 +927,8 @@ HWTEST2_F(CommandEncoderTestXe3pAndLater, GivenMiSemaphoreWaitLegacyWhenProgramm useSemaphore64bCmd); EXPECT_EQ(MI_SEMAPHORE_WAIT_LEGACY::WAIT_MODE::WAIT_MODE_SIGNAL_MODE, miSemaphoreLegacy->getWaitMode()); } + +HWTEST2_F(CommandEncoderTestXe3pAndLater, WhenGettingContextTimestampRegisterOffsetsThenQueueTimestampRegisterIsReturned, IsAtLeastXe3pCore) { + EXPECT_EQ(RegisterOffsets::queueTimestampRegAddressOffsetHigh, ContextTimestampRegister::getRegisterOffsetHigh()); + EXPECT_EQ(RegisterOffsets::queueTimestampRegAddressOffsetLow, ContextTimestampRegister::getRegisterOffsetLow()); +} diff --git a/shared/test/unit_test/encoders/test_command_encoder.cpp b/shared/test/unit_test/encoders/test_command_encoder.cpp index 2f974bdf29eef..132393a5e926a 100644 --- a/shared/test/unit_test/encoders/test_command_encoder.cpp +++ b/shared/test/unit_test/encoders/test_command_encoder.cpp @@ -1171,3 +1171,8 @@ HWTEST_F(CommandEncoderTest, givenEncodeDataInMemoryWhenProgrammingBbStartThenEx EncodeDataMemory::programBbStart(memoryPtr, dstGpuAddress, bbStartAddress, false, false, false); EXPECT_EQ(ptrOffset(baseMemoryPtr, offset), memoryPtr); } + +HWTEST2_F(CommandEncoderTest, WhenGettingContextTimestampRegisterOffsetsThenQueueTimestampRegisterIsReturned, IsAtMostXe3Core) { + EXPECT_EQ(RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, ContextTimestampRegister::getRegisterOffsetHigh()); + EXPECT_EQ(RegisterOffsets::gpThreadTimeRegAddressOffsetLow, ContextTimestampRegister::getRegisterOffsetLow()); +} diff --git a/shared/test/unit_test/helpers/gfx_core_helper_xe2_and_later.cpp b/shared/test/unit_test/helpers/gfx_core_helper_xe2_and_later.cpp index 3162fb8fedaa0..a1943cac449cc 100644 --- a/shared/test/unit_test/helpers/gfx_core_helper_xe2_and_later.cpp +++ b/shared/test/unit_test/helpers/gfx_core_helper_xe2_and_later.cpp @@ -55,7 +55,7 @@ HWTEST2_F(GfxCoreHelperXe2AndLaterTests, givenDebugFlagWhenCheckingIsResolveDepe EXPECT_TRUE(productHelper->isResolveDependenciesByPipeControlsSupported()); } -HWTEST2_F(GfxCoreHelperXe2AndLaterTests, givenAtLeastXe2HpgWhenEncodeAdditionalTimestampOffsetsThenOffsetsEncoded, IsAtLeastXe2HpgCore) { +HWTEST2_F(GfxCoreHelperXe2AndLaterTests, givenAtLeastXe2HpgWhenEncodeAdditionalTimestampOffsetsThenOffsetsEncoded, IsWithinXe2HpgCoreAndXe3Core) { using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; constexpr static auto bufferSize = sizeof(MI_STORE_REGISTER_MEM) * 2; diff --git a/shared/test/unit_test/helpers/gfx_core_helper_xe3p_and_later.cpp b/shared/test/unit_test/helpers/gfx_core_helper_xe3p_and_later.cpp index 84ed56d053256..27574c46aa1b4 100644 --- a/shared/test/unit_test/helpers/gfx_core_helper_xe3p_and_later.cpp +++ b/shared/test/unit_test/helpers/gfx_core_helper_xe3p_and_later.cpp @@ -7,6 +7,7 @@ #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/memory_manager/allocation_properties.h" +#include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/gfx_core_helper_tests.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/hw_test.h" @@ -45,3 +46,29 @@ HWTEST2_F(GfxCoreHelperXe3pAndLaterTests, givenAllocDataWhenSetExtraAllocationDa } } } + +HWTEST2_F(GfxCoreHelperXe3pAndLaterTests, givenAtLeastXe3pWhenEncodeAdditionalTimestampOffsetsThenOffsetsEncoded, IsAtLeastXe3pCore) { + using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; + constexpr static auto bufferSize = sizeof(MI_STORE_REGISTER_MEM) * 2; + + char streamBuffer[bufferSize]; + LinearStream stream(streamBuffer, bufferSize); + uint64_t fstAddress = 12; + uint64_t sndAddress = 100; + MemorySynchronizationCommands::encodeAdditionalTimestampOffsets(stream, fstAddress, sndAddress, false); + + HardwareParse hwParser; + hwParser.parseCommands(stream, 0); + GenCmdList storeRegMemList = hwParser.getCommandsList(); + EXPECT_EQ(2u, storeRegMemList.size()); + auto storeRegMemIt = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + EXPECT_NE(storeRegMemIt, hwParser.cmdList.end()); + + auto storeRegMem = genCmdCast(*storeRegMemIt); + EXPECT_EQ(storeRegMem->getRegisterAddress(), RegisterOffsets::queueTimestampRegAddressOffsetHigh); + EXPECT_EQ(storeRegMem->getMemoryAddress(), fstAddress + sizeof(uint32_t)); + + storeRegMem = genCmdCast(*(++storeRegMemIt)); + EXPECT_EQ(storeRegMem->getRegisterAddress(), RegisterOffsets::globalTimestampUn); + EXPECT_EQ(storeRegMem->getMemoryAddress(), sndAddress + sizeof(uint32_t)); +}