|
18 | 18 | #include "llvm/ADT/SmallVector.h" |
19 | 19 | #include "llvm/ADT/Statistic.h" |
20 | 20 | #include "llvm/ADT/StringRef.h" |
21 | | -#include "llvm/Analysis/MemoryBuiltins.h" |
22 | | -#include "llvm/Analysis/MemoryProfileInfo.h" |
23 | 21 | #include "llvm/Analysis/ValueTracking.h" |
24 | 22 | #include "llvm/IR/Constant.h" |
25 | 23 | #include "llvm/IR/DataLayout.h" |
26 | | -#include "llvm/IR/DiagnosticInfo.h" |
27 | 24 | #include "llvm/IR/Function.h" |
28 | 25 | #include "llvm/IR/GlobalValue.h" |
29 | 26 | #include "llvm/IR/IRBuilder.h" |
|
33 | 30 | #include "llvm/IR/Type.h" |
34 | 31 | #include "llvm/IR/Value.h" |
35 | 32 | #include "llvm/ProfileData/InstrProf.h" |
36 | | -#include "llvm/ProfileData/InstrProfReader.h" |
37 | | -#include "llvm/Support/BLAKE3.h" |
38 | 33 | #include "llvm/Support/CommandLine.h" |
39 | 34 | #include "llvm/Support/Debug.h" |
40 | | -#include "llvm/Support/HashBuilder.h" |
41 | 35 | #include "llvm/TargetParser/Triple.h" |
42 | 36 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
43 | 37 | #include "llvm/Transforms/Utils/ModuleUtils.h" |
44 | | -#include <map> |
45 | | -#include <set> |
46 | 38 |
|
47 | 39 | using namespace llvm; |
48 | | -using namespace llvm::memprof; |
49 | 40 |
|
50 | 41 | #define DEBUG_TYPE "memprof" |
51 | 42 |
|
52 | | -namespace llvm { |
53 | | -extern cl::opt<bool> PGOWarnMissing; |
54 | | -extern cl::opt<bool> NoPGOWarnMismatch; |
55 | | -extern cl::opt<bool> NoPGOWarnMismatchComdatWeak; |
56 | | -} // namespace llvm |
57 | | - |
58 | 43 | constexpr int LLVM_MEM_PROFILER_VERSION = 1; |
59 | 44 |
|
60 | 45 | // Size of memory mapped to a single shadow location. |
@@ -143,7 +128,6 @@ STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); |
143 | 128 | STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); |
144 | 129 | STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads"); |
145 | 130 | STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes"); |
146 | | -STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); |
147 | 131 |
|
148 | 132 | namespace { |
149 | 133 |
|
@@ -617,251 +601,3 @@ bool MemProfiler::instrumentFunction(Function &F) { |
617 | 601 |
|
618 | 602 | return FunctionModified; |
619 | 603 | } |
620 | | - |
621 | | -static void addCallsiteMetadata(Instruction &I, |
622 | | - std::vector<uint64_t> &InlinedCallStack, |
623 | | - LLVMContext &Ctx) { |
624 | | - I.setMetadata(LLVMContext::MD_callsite, |
625 | | - buildCallstackMetadata(InlinedCallStack, Ctx)); |
626 | | -} |
627 | | - |
628 | | -static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, |
629 | | - uint32_t Column) { |
630 | | - llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little> |
631 | | - HashBuilder; |
632 | | - HashBuilder.add(Function, LineOffset, Column); |
633 | | - llvm::BLAKE3Result<8> Hash = HashBuilder.final(); |
634 | | - uint64_t Id; |
635 | | - std::memcpy(&Id, Hash.data(), sizeof(Hash)); |
636 | | - return Id; |
637 | | -} |
638 | | - |
639 | | -static uint64_t computeStackId(const memprof::Frame &Frame) { |
640 | | - return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column); |
641 | | -} |
642 | | - |
643 | | -static void addCallStack(CallStackTrie &AllocTrie, |
644 | | - const AllocationInfo *AllocInfo) { |
645 | | - SmallVector<uint64_t> StackIds; |
646 | | - for (const auto &StackFrame : AllocInfo->CallStack) |
647 | | - StackIds.push_back(computeStackId(StackFrame)); |
648 | | - auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(), |
649 | | - AllocInfo->Info.getAllocCount(), |
650 | | - AllocInfo->Info.getTotalLifetime()); |
651 | | - AllocTrie.addCallStack(AllocType, StackIds); |
652 | | -} |
653 | | - |
654 | | -// Helper to compare the InlinedCallStack computed from an instruction's debug |
655 | | -// info to a list of Frames from profile data (either the allocation data or a |
656 | | -// callsite). For callsites, the StartIndex to use in the Frame array may be |
657 | | -// non-zero. |
658 | | -static bool |
659 | | -stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack, |
660 | | - ArrayRef<uint64_t> InlinedCallStack, |
661 | | - unsigned StartIndex = 0) { |
662 | | - auto StackFrame = ProfileCallStack.begin() + StartIndex; |
663 | | - auto InlCallStackIter = InlinedCallStack.begin(); |
664 | | - for (; StackFrame != ProfileCallStack.end() && |
665 | | - InlCallStackIter != InlinedCallStack.end(); |
666 | | - ++StackFrame, ++InlCallStackIter) { |
667 | | - uint64_t StackId = computeStackId(*StackFrame); |
668 | | - if (StackId != *InlCallStackIter) |
669 | | - return false; |
670 | | - } |
671 | | - // Return true if we found and matched all stack ids from the call |
672 | | - // instruction. |
673 | | - return InlCallStackIter == InlinedCallStack.end(); |
674 | | -} |
675 | | - |
676 | | -void llvm::readMemprof(Module &M, Function &F, |
677 | | - IndexedInstrProfReader *MemProfReader, |
678 | | - const TargetLibraryInfo &TLI) { |
679 | | - auto &Ctx = M.getContext(); |
680 | | - |
681 | | - auto FuncName = getPGOFuncName(F); |
682 | | - auto FuncGUID = Function::getGUID(FuncName); |
683 | | - Expected<memprof::MemProfRecord> MemProfResult = |
684 | | - MemProfReader->getMemProfRecord(FuncGUID); |
685 | | - if (Error E = MemProfResult.takeError()) { |
686 | | - handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { |
687 | | - auto Err = IPE.get(); |
688 | | - bool SkipWarning = false; |
689 | | - LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName |
690 | | - << ": "); |
691 | | - if (Err == instrprof_error::unknown_function) { |
692 | | - NumOfMemProfMissing++; |
693 | | - SkipWarning = !PGOWarnMissing; |
694 | | - LLVM_DEBUG(dbgs() << "unknown function"); |
695 | | - } else if (Err == instrprof_error::hash_mismatch) { |
696 | | - SkipWarning = |
697 | | - NoPGOWarnMismatch || |
698 | | - (NoPGOWarnMismatchComdatWeak && |
699 | | - (F.hasComdat() || |
700 | | - F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); |
701 | | - LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); |
702 | | - } |
703 | | - |
704 | | - if (SkipWarning) |
705 | | - return; |
706 | | - |
707 | | - std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() + |
708 | | - Twine(" Hash = ") + std::to_string(FuncGUID)) |
709 | | - .str(); |
710 | | - |
711 | | - Ctx.diagnose( |
712 | | - DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); |
713 | | - }); |
714 | | - return; |
715 | | - } |
716 | | - |
717 | | - // Build maps of the location hash to all profile data with that leaf location |
718 | | - // (allocation info and the callsites). |
719 | | - std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo; |
720 | | - // For the callsites we need to record the index of the associated frame in |
721 | | - // the frame array (see comments below where the map entries are added). |
722 | | - std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>> |
723 | | - LocHashToCallSites; |
724 | | - const auto MemProfRec = std::move(MemProfResult.get()); |
725 | | - for (auto &AI : MemProfRec.AllocSites) { |
726 | | - // Associate the allocation info with the leaf frame. The later matching |
727 | | - // code will match any inlined call sequences in the IR with a longer prefix |
728 | | - // of call stack frames. |
729 | | - uint64_t StackId = computeStackId(AI.CallStack[0]); |
730 | | - LocHashToAllocInfo[StackId].insert(&AI); |
731 | | - } |
732 | | - for (auto &CS : MemProfRec.CallSites) { |
733 | | - // Need to record all frames from leaf up to and including this function, |
734 | | - // as any of these may or may not have been inlined at this point. |
735 | | - unsigned Idx = 0; |
736 | | - for (auto &StackFrame : CS) { |
737 | | - uint64_t StackId = computeStackId(StackFrame); |
738 | | - LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++)); |
739 | | - // Once we find this function, we can stop recording. |
740 | | - if (StackFrame.Function == FuncGUID) |
741 | | - break; |
742 | | - } |
743 | | - assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); |
744 | | - } |
745 | | - |
746 | | - auto GetOffset = [](const DILocation *DIL) { |
747 | | - return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & |
748 | | - 0xffff; |
749 | | - }; |
750 | | - |
751 | | - // Now walk the instructions, looking up the associated profile data using |
752 | | - // dbug locations. |
753 | | - for (auto &BB : F) { |
754 | | - for (auto &I : BB) { |
755 | | - if (I.isDebugOrPseudoInst()) |
756 | | - continue; |
757 | | - // We are only interested in calls (allocation or interior call stack |
758 | | - // context calls). |
759 | | - auto *CI = dyn_cast<CallBase>(&I); |
760 | | - if (!CI) |
761 | | - continue; |
762 | | - auto *CalledFunction = CI->getCalledFunction(); |
763 | | - if (CalledFunction && CalledFunction->isIntrinsic()) |
764 | | - continue; |
765 | | - // List of call stack ids computed from the location hashes on debug |
766 | | - // locations (leaf to inlined at root). |
767 | | - std::vector<uint64_t> InlinedCallStack; |
768 | | - // Was the leaf location found in one of the profile maps? |
769 | | - bool LeafFound = false; |
770 | | - // If leaf was found in a map, iterators pointing to its location in both |
771 | | - // of the maps. It might exist in neither, one, or both (the latter case |
772 | | - // can happen because we don't currently have discriminators to |
773 | | - // distinguish the case when a single line/col maps to both an allocation |
774 | | - // and another callsite). |
775 | | - std::map<uint64_t, std::set<const AllocationInfo *>>::iterator |
776 | | - AllocInfoIter; |
777 | | - std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, |
778 | | - unsigned>>>::iterator CallSitesIter; |
779 | | - for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; |
780 | | - DIL = DIL->getInlinedAt()) { |
781 | | - // Use C++ linkage name if possible. Need to compile with |
782 | | - // -fdebug-info-for-profiling to get linkage name. |
783 | | - StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); |
784 | | - if (Name.empty()) |
785 | | - Name = DIL->getScope()->getSubprogram()->getName(); |
786 | | - auto CalleeGUID = Function::getGUID(Name); |
787 | | - auto StackId = |
788 | | - computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn()); |
789 | | - // LeafFound will only be false on the first iteration, since we either |
790 | | - // set it true or break out of the loop below. |
791 | | - if (!LeafFound) { |
792 | | - AllocInfoIter = LocHashToAllocInfo.find(StackId); |
793 | | - CallSitesIter = LocHashToCallSites.find(StackId); |
794 | | - // Check if the leaf is in one of the maps. If not, no need to look |
795 | | - // further at this call. |
796 | | - if (AllocInfoIter == LocHashToAllocInfo.end() && |
797 | | - CallSitesIter == LocHashToCallSites.end()) |
798 | | - break; |
799 | | - LeafFound = true; |
800 | | - } |
801 | | - InlinedCallStack.push_back(StackId); |
802 | | - } |
803 | | - // If leaf not in either of the maps, skip inst. |
804 | | - if (!LeafFound) |
805 | | - continue; |
806 | | - |
807 | | - // First add !memprof metadata from allocation info, if we found the |
808 | | - // instruction's leaf location in that map, and if the rest of the |
809 | | - // instruction's locations match the prefix Frame locations on an |
810 | | - // allocation context with the same leaf. |
811 | | - if (AllocInfoIter != LocHashToAllocInfo.end()) { |
812 | | - // Only consider allocations via new, to reduce unnecessary metadata, |
813 | | - // since those are the only allocations that will be targeted initially. |
814 | | - if (!isNewLikeFn(CI, &TLI)) |
815 | | - continue; |
816 | | - // We may match this instruction's location list to multiple MIB |
817 | | - // contexts. Add them to a Trie specialized for trimming the contexts to |
818 | | - // the minimal needed to disambiguate contexts with unique behavior. |
819 | | - CallStackTrie AllocTrie; |
820 | | - for (auto *AllocInfo : AllocInfoIter->second) { |
821 | | - // Check the full inlined call stack against this one. |
822 | | - // If we found and thus matched all frames on the call, include |
823 | | - // this MIB. |
824 | | - if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, |
825 | | - InlinedCallStack)) |
826 | | - addCallStack(AllocTrie, AllocInfo); |
827 | | - } |
828 | | - // We might not have matched any to the full inlined call stack. |
829 | | - // But if we did, create and attach metadata, or a function attribute if |
830 | | - // all contexts have identical profiled behavior. |
831 | | - if (!AllocTrie.empty()) { |
832 | | - // MemprofMDAttached will be false if a function attribute was |
833 | | - // attached. |
834 | | - bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); |
835 | | - assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); |
836 | | - if (MemprofMDAttached) { |
837 | | - // Add callsite metadata for the instruction's location list so that |
838 | | - // it simpler later on to identify which part of the MIB contexts |
839 | | - // are from this particular instruction (including during inlining, |
840 | | - // when the callsite metdata will be updated appropriately). |
841 | | - // FIXME: can this be changed to strip out the matching stack |
842 | | - // context ids from the MIB contexts and not add any callsite |
843 | | - // metadata here to save space? |
844 | | - addCallsiteMetadata(I, InlinedCallStack, Ctx); |
845 | | - } |
846 | | - } |
847 | | - continue; |
848 | | - } |
849 | | - |
850 | | - // Otherwise, add callsite metadata. If we reach here then we found the |
851 | | - // instruction's leaf location in the callsites map and not the allocation |
852 | | - // map. |
853 | | - assert(CallSitesIter != LocHashToCallSites.end()); |
854 | | - for (auto CallStackIdx : CallSitesIter->second) { |
855 | | - // If we found and thus matched all frames on the call, create and |
856 | | - // attach call stack metadata. |
857 | | - if (stackFrameIncludesInlinedCallStack( |
858 | | - *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) { |
859 | | - addCallsiteMetadata(I, InlinedCallStack, Ctx); |
860 | | - // Only need to find one with a matching call stack and add a single |
861 | | - // callsite metadata. |
862 | | - break; |
863 | | - } |
864 | | - } |
865 | | - } |
866 | | - } |
867 | | -} |
0 commit comments