Bug 1450185 - Implement DWARF stack walker for aarch64. r?mstange draft
authorMakoto Kato <m_kato@ga2.so-net.ne.jp>
Thu, 12 Apr 2018 18:22:03 +0900
changeset 781557 e970bda0b34dba840ce4c3d1f6d28945c671352e
parent 781489 da809ecceaf3a8ada0aa2d7115822d39d0439654
push id106339
push userbmo:m_kato@ga2.so-net.ne.jp
push dateFri, 13 Apr 2018 05:23:08 +0000
reviewersmstange
bugs1450185
milestone61.0a1
Bug 1450185 - Implement DWARF stack walker for aarch64. r?mstange Since aarch64's DWARF doesn't have pc register, I use x29 (link register) if not first frame. I test by gtest on Linux/aarch64, and profiler works on Android/aarch64. EM_AARCH64 might not be defined on our builders since headers are old, so this define is needed. And mAdminThreadId is unused on release build and aarch64 compiler detects error as unused, so I use Unused.h. MozReview-Commit-ID: 8VDb5i0vwBT
tools/profiler/core/PlatformMacros.h
tools/profiler/core/platform.cpp
tools/profiler/lul/LulDwarf.cpp
tools/profiler/lul/LulDwarfExt.h
tools/profiler/lul/LulDwarfSummariser.cpp
tools/profiler/lul/LulElf.cpp
tools/profiler/lul/LulMain.cpp
tools/profiler/lul/LulMain.h
tools/profiler/lul/LulMainInt.h
tools/profiler/moz.build
tools/profiler/tests/gtest/LulTestDwarf.cpp
tools/profiler/tests/gtest/moz.build
--- a/tools/profiler/core/PlatformMacros.h
+++ b/tools/profiler/core/PlatformMacros.h
@@ -63,16 +63,21 @@
 # define GP_ARCH_amd64 1
 # define GP_OS_linux 1
 
 #elif defined(__linux__) && defined(__arm__)
 # define GP_PLAT_arm_linux 1
 # define GP_ARCH_arm 1
 # define GP_OS_linux 1
 
+#elif defined(__linux__) && defined(__aarch64__)
+# define GP_PLAT_aarch64_linux 1
+# define GP_ARCH_aarch64 1
+# define GP_OS_linux 1
+
 #elif defined(__linux__) && defined(__mips64)
 # define GP_PLAT_mips64_linux 1
 # define GP_ARCH_mips64 1
 # define GP_OS_linux 1
 
 #elif defined(__APPLE__) && defined(__x86_64__)
 # define GP_PLAT_amd64_darwin 1
 # define GP_ARCH_amd64 1
--- a/tools/profiler/core/platform.cpp
+++ b/tools/profiler/core/platform.cpp
@@ -107,17 +107,18 @@
 #if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
 # define HAVE_NATIVE_UNWIND
 # define USE_EHABI_STACKWALK
 # include "EHABIStackWalk.h"
 #endif
 
 // Linux builds use LUL, which uses DWARF info to unwind stacks.
 #if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \
-    defined(GP_PLAT_mips64_linux)
+    defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_aarch64_linux) || \
+    defined(GP_PLAT_aarch64_android)
 # define HAVE_NATIVE_UNWIND
 # define USE_LUL_STACKWALK
 # include "lul/LulMain.h"
 # include "lul/platform-linux-lul.h"
 
 // On linux we use LUL for periodic samples and synchronous samples, but we use
 // FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled.
 // (See the comment at the top of the file for a definition of
@@ -1207,16 +1208,21 @@ DoLULBacktrace(PSLockRef aLock, const Re
   startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]);
 #elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
   startRegs.r15 = lul::TaggedUWord(mc->arm_pc);
   startRegs.r14 = lul::TaggedUWord(mc->arm_lr);
   startRegs.r13 = lul::TaggedUWord(mc->arm_sp);
   startRegs.r12 = lul::TaggedUWord(mc->arm_ip);
   startRegs.r11 = lul::TaggedUWord(mc->arm_fp);
   startRegs.r7  = lul::TaggedUWord(mc->arm_r7);
+#elif defined(GP_PLAT_aarch64_linux) || defined(GP_PLAT_aarch64_android)
+  startRegs.pc  = lul::TaggedUWord(mc->pc);
+  startRegs.x29 = lul::TaggedUWord(mc->regs[29]);
+  startRegs.x30 = lul::TaggedUWord(mc->regs[30]);
+  startRegs.sp  = lul::TaggedUWord(mc->sp);
 #elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
   startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]);
   startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]);
   startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]);
 #elif defined(GP_PLAT_mips64_linux)
   startRegs.pc = lul::TaggedUWord(mc->pc);
   startRegs.sp = lul::TaggedUWord(mc->gregs[29]);
   startRegs.fp = lul::TaggedUWord(mc->gregs[30]);
@@ -1259,16 +1265,19 @@ DoLULBacktrace(PSLockRef aLock, const Re
 
   {
 #if defined(GP_PLAT_amd64_linux)
     uintptr_t rEDZONE_SIZE = 128;
     uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
 #elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
     uintptr_t rEDZONE_SIZE = 0;
     uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE;
+#elif defined(GP_PLAT_aarch64_linux) || defined(GP_PLAT_aarch64_android)
+    uintptr_t rEDZONE_SIZE = 0;
+    uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
 #elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
     uintptr_t rEDZONE_SIZE = 0;
     uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
 #elif defined(GP_PLAT_mips64_linux)
     uintptr_t rEDZONE_SIZE = 0;
     uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
 #else
 #   error "Unknown plat"
--- a/tools/profiler/lul/LulDwarf.cpp
+++ b/tools/profiler/lul/LulDwarf.cpp
@@ -1897,16 +1897,35 @@ unsigned int DwarfCFIToModule::RegisterN
    8 "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
    8 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
    8 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
    8 "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7"
   */
   return 13 * 8;
 }
 
+// Per ARM IHI 0057A, section 3.1
+unsigned int DwarfCFIToModule::RegisterNames::ARM64() {
+  /*
+   8 "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
+   8 "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
+   8 "x16"  "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+   8 "x24", "x25", "x26", "x27", "x28", "x29",  "x30","sp",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
+   8 "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
+   8 "v16", "v17", "v18", "v19", "v20", "v21", "v22,  "v23",
+   8 "v24", "x25", "x26,  "x27", "v28", "v29", "v30", "v31",
+  */
+  return 12 * 8;
+}
+
 unsigned int DwarfCFIToModule::RegisterNames::MIPS() {
   /*
    8 "$zero", "$at",  "$v0",  "$v1",  "$a0",   "$a1",  "$a2",  "$a3",
    8 "$t0",   "$t1",  "$t2",  "$t3",  "$t4",   "$t5",  "$t6",  "$t7",
    8 "$s0",   "$s1",  "$s2",  "$s3",  "$s4",   "$s5",  "$s6",  "$s7",
    8 "$t8",   "$t9",  "$k0",  "$k1",  "$gp",   "$sp",  "$fp",  "$ra",
    9 "$lo",   "$hi",  "$pc",  "$f0",  "$f1",   "$f2",  "$f3",  "$f4",  "$f5",
    8 "$f6",   "$f7",  "$f8",  "$f9",  "$f10",  "$f11", "$f12", "$f13",
--- a/tools/profiler/lul/LulDwarfExt.h
+++ b/tools/profiler/lul/LulDwarfExt.h
@@ -1212,16 +1212,19 @@ class DwarfCFIToModule: public CallFrame
     static unsigned int I386();
 
     // AMD x86_64, AMD64, Intel EM64T, or Intel 64
     static unsigned int X86_64();
 
     // ARM.
     static unsigned int ARM();
 
+    // AARCH64.
+    static unsigned int ARM64();
+
     // MIPS.
     static unsigned int MIPS();
   };
 
   // Create a handler for the dwarf2reader::CallFrameInfo parser that
   // records the stack unwinding information it receives in SUMM.
   //
   // Use REGISTER_NAMES[I] as the name of register number I; *this
--- a/tools/profiler/lul/LulDwarfSummariser.cpp
+++ b/tools/profiler/lul/LulDwarfSummariser.cpp
@@ -232,16 +232,87 @@ Summariser::Rule(uintptr_t aAddress, int
   mCurrRules.mR13expr = LExpr(NODEREF, DW_REG_CFA, 0);
 
   // If there's no information about R15 (the return address), say
   // it's a copy of R14 (the link register).
   if (mCurrRules.mR15expr.mHow == UNKNOWN) {
     mCurrRules.mR15expr = LExpr(NODEREF, DW_REG_ARM_R14, 0);
   }
 
+#elif defined(GP_ARCH_aarch64)
+  switch (aNewReg) {
+    case DW_REG_CFA:
+      if (how != NODEREF) {
+        reason1 = "rule for DW_REG_CFA: invalid |how|";
+        goto cant_summarise;
+      }
+      switch (oldReg) {
+        case DW_REG_AARCH64_X29:
+        case DW_REG_AARCH64_SP:
+          break;
+        default:
+          reason1 = "rule for DW_REG_CFA: invalid |oldReg|";
+          goto cant_summarise;
+      }
+      mCurrRules.mCfaExpr = LExpr(how, oldReg, offset);
+      break;
+
+    case DW_REG_AARCH64_X29:
+    case DW_REG_AARCH64_X30:
+    case DW_REG_AARCH64_SP: {
+      switch (how) {
+        case NODEREF:
+        case DEREF:
+          // Check the old register is one we're tracking.
+          if (!registerIsTracked((DW_REG_NUMBER)oldReg) &&
+              oldReg != DW_REG_CFA) {
+            reason1 = "rule for X29/X30/SP: uses untracked reg";
+            goto cant_summarise;
+          }
+          break;
+        case PFXEXPR: {
+          // Check that the prefix expression only mentions tracked registers.
+          const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+          reason2 = checkPfxExpr(pfxInstrs, offset);
+          if (reason2) {
+            reason1 = "rule for X29/X30/SP: ";
+            goto cant_summarise;
+          }
+          break;
+        }
+        default:
+          goto cant_summarise;
+      }
+      LExpr expr = LExpr(how, oldReg, offset);
+      switch (aNewReg) {
+        case DW_REG_AARCH64_X29: mCurrRules.mX29expr = expr; break;
+        case DW_REG_AARCH64_X30: mCurrRules.mX30expr = expr; break;
+        case DW_REG_AARCH64_SP:  mCurrRules.mSPexpr  = expr; break;
+        default: MOZ_ASSERT(0);
+      }
+      break;
+    }
+    default:
+     // Leave |reason1| and |reason2| unset here, for the reasons explained
+     // in the analogous point
+     goto cant_summarise;
+  }
+
+  if (mCurrRules.mX29expr.mHow == UNKNOWN) {
+    mCurrRules.mX29expr = LExpr(NODEREF, DW_REG_AARCH64_X29, 0);
+  }
+  if (mCurrRules.mX30expr.mHow == UNKNOWN) {
+    mCurrRules.mX30expr = LExpr(NODEREF, DW_REG_AARCH64_X30, 0);
+  }
+  // On aarch64, it seems the old SP value before the call is always the
+  // same as the CFA.  Therefore, in the absence of any other way to
+  // recover the SP, specify that the CFA should be copied.
+  if (mCurrRules.mSPexpr.mHow == UNKNOWN) {
+    mCurrRules.mSPexpr = LExpr(NODEREF, DW_REG_CFA, 0);
+  }
 #elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
 
   // ---------------- x64/x86 ---------------- //
 
   // Now, can we add the rule to our summary?  This depends on whether
   // the registers and the overall expression are representable.  This
   // is the heart of the summarisation process.
   switch (aNewReg) {
--- a/tools/profiler/lul/LulElf.cpp
+++ b/tools/profiler/lul/LulElf.cpp
@@ -68,16 +68,20 @@
 #include "LulMainInt.h"
 
 
 #if defined(GP_PLAT_arm_android) && !defined(SHT_ARM_EXIDX)
 // bionic and older glibsc don't define it
 # define SHT_ARM_EXIDX (SHT_LOPROC + 1)
 #endif
 
+// Old Linux header doesn't define EM_AARCH64
+#ifndef EM_AARCH64
+#define EM_AARCH64 183
+#endif
 
 // This namespace contains helper functions.
 namespace {
 
 using lul::DwarfCFIToModule;
 using lul::FindElfSectionByName;
 using lul::GetOffset;
 using lul::IsValidElf;
@@ -162,16 +166,19 @@ bool DwarfCFIRegisterNames(const typenam
       *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM();
       return true;
     case EM_X86_64:
       *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64();
       return true;
     case EM_MIPS:
       *num_dw_regnames = DwarfCFIToModule::RegisterNames::MIPS();
       return true;
+    case EM_AARCH64:
+      *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM64();
+      return true;
     default:
       MOZ_ASSERT(0);
       return false;
   }
 }
 
 template<typename ElfClass>
 bool LoadDwarfCFI(const string& dwarf_filename,
@@ -449,16 +456,17 @@ bool LoadSymbols(const string& obj_file,
 // ELF_HEADER.
 template<typename ElfClass>
 const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
   typedef typename ElfClass::Half Half;
   Half arch = elf_header->e_machine;
   switch (arch) {
     case EM_386:        return "x86";
     case EM_ARM:        return "arm";
+    case EM_AARCH64:    return "arm64";
     case EM_MIPS:       return "mips";
     case EM_PPC64:      return "ppc64";
     case EM_PPC:        return "ppc";
     case EM_S390:       return "s390";
     case EM_SPARC:      return "sparc";
     case EM_SPARCV9:    return "sparcv9";
     case EM_X86_64:     return "x86_64";
     default: return NULL;
--- a/tools/profiler/lul/LulMain.cpp
+++ b/tools/profiler/lul/LulMain.cpp
@@ -71,16 +71,20 @@ NameOf_DW_REG(int16_t aReg)
     case DW_REG_INTEL_XIP: return "xip";
 #elif defined(GP_ARCH_arm)
     case DW_REG_ARM_R7:    return "r7";
     case DW_REG_ARM_R11:   return "r11";
     case DW_REG_ARM_R12:   return "r12";
     case DW_REG_ARM_R13:   return "r13";
     case DW_REG_ARM_R14:   return "r14";
     case DW_REG_ARM_R15:   return "r15";
+#elif defined(GP_ARCH_aarch64)
+    case DW_REG_AARCH64_X29: return "x29";
+    case DW_REG_AARCH64_X30: return "x30";
+    case DW_REG_AARCH64_SP:  return "sp";
 #elif defined(GP_ARCH_mips64)
     case DW_REG_MIPS_SP:   return "sp";
     case DW_REG_MIPS_FP:   return "fp";
     case DW_REG_MIPS_PC:   return "pc";
 #else
 # error "Unsupported arch"
 #endif
     default: return "???";
@@ -134,16 +138,20 @@ RuleSet::Print(void(*aLog)(const char*))
   res += mXbpExpr.ShowRule(" BP");
 #elif defined(GP_ARCH_arm)
   res += mR15expr.ShowRule(" R15");
   res += mR7expr .ShowRule(" R7" );
   res += mR11expr.ShowRule(" R11");
   res += mR12expr.ShowRule(" R12");
   res += mR13expr.ShowRule(" R13");
   res += mR14expr.ShowRule(" R14");
+#elif defined(GP_ARCH_aarch64)
+  res += mX29expr.ShowRule(" X29");
+  res += mX30expr.ShowRule(" X30");
+  res += mSPexpr .ShowRule(" SP");
 #elif defined(GP_ARCH_mips64)
   res += mPCexpr.ShowRule(" PC");
   res += mSPexpr.ShowRule(" SP");
   res += mFPexpr.ShowRule(" FP");
 #else
 # error "Unsupported arch"
 #endif
   aLog(res.c_str());
@@ -159,16 +167,20 @@ RuleSet::ExprForRegno(DW_REG_NUMBER aReg
     case DW_REG_INTEL_XBP: return &mXbpExpr;
 #   elif defined(GP_ARCH_arm)
     case DW_REG_ARM_R15:   return &mR15expr;
     case DW_REG_ARM_R14:   return &mR14expr;
     case DW_REG_ARM_R13:   return &mR13expr;
     case DW_REG_ARM_R12:   return &mR12expr;
     case DW_REG_ARM_R11:   return &mR11expr;
     case DW_REG_ARM_R7:    return &mR7expr;
+#   elif defined(GP_ARCH_aarch64)
+    case DW_REG_AARCH64_X29: return &mX29expr;
+    case DW_REG_AARCH64_X30: return &mX30expr;
+    case DW_REG_AARCH64_SP:  return &mSPexpr;
 #elif defined(GP_ARCH_mips64)
     case DW_REG_MIPS_SP:    return &mSPexpr;
     case DW_REG_MIPS_FP:    return &mFPexpr;
     case DW_REG_MIPS_PC:    return &mPCexpr;
 #   else
 #     error "Unknown arch"
 #   endif
     default: return nullptr;
@@ -679,16 +691,17 @@ LUL::LUL(void (*aLog)(const char*))
   : mLog(aLog)
   , mAdminMode(true)
   , mAdminThreadId(gettid())
   , mPriMap(new PriMap(aLog))
   , mSegArray(new SegArray())
   , mUSU(new UniqueStringUniverse())
 {
   LUL_LOG("LUL::LUL: Created object");
+  Unused << mAdminThreadId;
 }
 
 
 LUL::~LUL()
 {
   LUL_LOG("LUL::~LUL: Destroyed object");
   delete mPriMap;
   delete mSegArray;
@@ -912,16 +925,20 @@ TaggedUWord EvaluateReg(int16_t aReg, co
     case DW_REG_INTEL_XIP: return aOldRegs->xip;
 #elif defined(GP_ARCH_arm)
     case DW_REG_ARM_R7:    return aOldRegs->r7;
     case DW_REG_ARM_R11:   return aOldRegs->r11;
     case DW_REG_ARM_R12:   return aOldRegs->r12;
     case DW_REG_ARM_R13:   return aOldRegs->r13;
     case DW_REG_ARM_R14:   return aOldRegs->r14;
     case DW_REG_ARM_R15:   return aOldRegs->r15;
+#elif defined(GP_ARCH_aarch64)
+    case DW_REG_AARCH64_X29: return aOldRegs->x29;
+    case DW_REG_AARCH64_X30: return aOldRegs->x30;
+    case DW_REG_AARCH64_SP:  return aOldRegs->sp;
 #elif defined(GP_ARCH_mips64)
     case DW_REG_MIPS_SP:   return aOldRegs->sp;
     case DW_REG_MIPS_FP:   return aOldRegs->fp;
     case DW_REG_MIPS_PC:   return aOldRegs->pc;
 #else
 # error "Unsupported arch"
 #endif
     default: MOZ_ASSERT(0); return TaggedUWord();
@@ -1108,16 +1125,21 @@ void UseRuleSet(/*MOD*/UnwindRegs* aRegs
   aRegs->xip = TaggedUWord();
 #elif defined(GP_ARCH_arm)
   aRegs->r7  = TaggedUWord();
   aRegs->r11 = TaggedUWord();
   aRegs->r12 = TaggedUWord();
   aRegs->r13 = TaggedUWord();
   aRegs->r14 = TaggedUWord();
   aRegs->r15 = TaggedUWord();
+#elif defined(GP_ARCH_aarch64)
+  aRegs->x29 = TaggedUWord();
+  aRegs->x30 = TaggedUWord();
+  aRegs->sp  = TaggedUWord();
+  aRegs->pc  = TaggedUWord();
 #elif defined(GP_ARCH_mips64)
   aRegs->sp  = TaggedUWord();
   aRegs->fp  = TaggedUWord();
   aRegs->pc  = TaggedUWord();
 #else
 #  error "Unsupported arch"
 #endif
 
@@ -1149,16 +1171,23 @@ void UseRuleSet(/*MOD*/UnwindRegs* aRegs
   aRegs->r12
     = aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
   aRegs->r13
     = aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
   aRegs->r14
     = aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
   aRegs->r15
     = aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+#elif defined(GP_ARCH_aarch64)
+  aRegs->x29
+    = aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->x30
+    = aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->sp
+    = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
 #elif defined(GP_ARCH_mips64)
   aRegs->sp
     = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
   aRegs->fp
     = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
   aRegs->pc
     = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
 #else
@@ -1208,16 +1237,26 @@ LUL::Unwind(/*OUT*/uintptr_t* aFramePCs,
                      (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
                      (int)regs.r7.Valid(),  (unsigned long long int)regs.r7.Value(),
                      (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
                      (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
                      (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
                      (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
       buf[sizeof(buf)-1] = 0;
       mLog(buf);
+#elif defined(GP_ARCH_aarch64)
+      SprintfLiteral(buf,
+                     "LoopTop: pc %d/%llx  x29 %d/%llx  x30 %d/%llx"
+                     "  sp %d/%llx\n",
+                     (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
+                     (int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(),
+                     (int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(),
+                     (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value());
+      buf[sizeof(buf)-1] = 0;
+      mLog(buf);
 #elif defined(GP_ARCH_mips64)
       SprintfLiteral(buf,
                      "LoopTop: pc %d/%llx  sp %d/%llx  fp %d/%llx\n",
                      (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
                      (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(),
                      (int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value());
       buf[sizeof(buf)-1] = 0;
       mLog(buf);
@@ -1227,16 +1266,19 @@ LUL::Unwind(/*OUT*/uintptr_t* aFramePCs,
     }
 
 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
     TaggedUWord ia = regs.xip;
     TaggedUWord sp = regs.xsp;
 #elif defined(GP_ARCH_arm)
     TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
     TaggedUWord sp = regs.r13;
+#elif defined(GP_ARCH_aarch64)
+    TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30);
+    TaggedUWord sp = regs.sp;
 #elif defined(GP_ARCH_mips64)
     TaggedUWord ia = regs.pc;
     TaggedUWord sp = regs.sp;
 #else
 # error "Unsupported arch"
 #endif
 
     if (*aFramesUsed >= aFramesAvail) {
@@ -1525,16 +1567,36 @@ bool GetAndCheckStackTrace(LUL* aLUL, co
   startRegs.r15 = TaggedUWord(block[0]);
   startRegs.r14 = TaggedUWord(block[1]);
   startRegs.r13 = TaggedUWord(block[2]);
   startRegs.r12 = TaggedUWord(block[3]);
   startRegs.r11 = TaggedUWord(block[4]);
   startRegs.r7  = TaggedUWord(block[5]);
   const uintptr_t REDZONE_SIZE = 0;
   uintptr_t start = block[1] - REDZONE_SIZE;
+#elif defined(GP_ARCH_aarch64)
+  volatile uintptr_t block[4];
+  MOZ_ASSERT(sizeof(block) == 32);
+  __asm__ __volatile__(
+    "adr x0, . \n\t"
+    "str x0, [%0, #0] \n\t"
+    "str x29, [%0, #8] \n\t"
+    "str x30, [%0, #16] \n\t"
+    "mov x0, sp \n\t"
+    "str x0, [%0, #24] \n\t"
+    :
+    : "r"(&block[0])
+    : "memory", "x0"
+  );
+  startRegs.pc = TaggedUWord(block[0]);
+  startRegs.x29 = TaggedUWord(block[1]);
+  startRegs.x30 = TaggedUWord(block[2]);
+  startRegs.sp = TaggedUWord(block[3]);
+  const uintptr_t REDZONE_SIZE = 0;
+  uintptr_t start = block[1] - REDZONE_SIZE;
 #elif defined(GP_ARCH_mips64)
   volatile uintptr_t block[3];
   MOZ_ASSERT(sizeof(block) == 24);
   __asm__ __volatile__(
     "sd $29, 8(%0)     \n"
     "sd $30, 16(%0)    \n"
     :
     :"r"(block)
--- a/tools/profiler/lul/LulMain.h
+++ b/tools/profiler/lul/LulMain.h
@@ -143,16 +143,21 @@ private:
 struct UnwindRegs {
 #if defined(GP_ARCH_arm)
   TaggedUWord r7;
   TaggedUWord r11;
   TaggedUWord r12;
   TaggedUWord r13;
   TaggedUWord r14;
   TaggedUWord r15;
+#elif defined(GP_ARCH_aarch64)
+  TaggedUWord x29;
+  TaggedUWord x30;
+  TaggedUWord sp;
+  TaggedUWord pc;
 #elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
   TaggedUWord xbp;
   TaggedUWord xsp;
   TaggedUWord xip;
 #elif defined(GP_ARCH_mips64)
   TaggedUWord sp;
   TaggedUWord fp;
   TaggedUWord pc;
--- a/tools/profiler/lul/LulMainInt.h
+++ b/tools/profiler/lul/LulMainInt.h
@@ -38,16 +38,21 @@ enum DW_REG_NUMBER {
 #if defined(GP_ARCH_arm)
   // ARM registers
   DW_REG_ARM_R7  = 7,
   DW_REG_ARM_R11 = 11,
   DW_REG_ARM_R12 = 12,
   DW_REG_ARM_R13 = 13,
   DW_REG_ARM_R14 = 14,
   DW_REG_ARM_R15 = 15,
+#elif defined(GP_ARCH_aarch64)
+  // aarch64 registers
+  DW_REG_AARCH64_X29 = 29,
+  DW_REG_AARCH64_X30 = 30,
+  DW_REG_AARCH64_SP  = 31,
 #elif defined(GP_ARCH_amd64)
   // Because the X86 (32 bit) and AMD64 (64 bit) summarisers are
   // combined, a merged set of register constants is needed.
   DW_REG_INTEL_XBP = 6,
   DW_REG_INTEL_XSP = 7,
   DW_REG_INTEL_XIP = 16,
 #elif defined(GP_ARCH_x86)
   DW_REG_INTEL_XBP = 5,
@@ -274,16 +279,20 @@ public:
   LExpr  mXbpExpr;
 #elif defined(GP_ARCH_arm)
   LExpr  mR15expr; // return address
   LExpr  mR14expr;
   LExpr  mR13expr;
   LExpr  mR12expr;
   LExpr  mR11expr;
   LExpr  mR7expr;
+#elif defined(GP_ARCH_aarch64)
+  LExpr  mX29expr; // frame pointer register
+  LExpr  mX30expr; // link register
+  LExpr  mSPexpr;
 #elif defined(GP_ARCH_mips64)
   LExpr  mPCexpr;
   LExpr  mFPexpr;
   LExpr  mSPexpr;
 #else
 #   error "Unknown arch"
 #endif
 };
@@ -294,16 +303,19 @@ static inline bool registerIsTracked(DW_
   switch (reg) {
 #   if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
     case DW_REG_INTEL_XBP: case DW_REG_INTEL_XSP: case DW_REG_INTEL_XIP:
       return true;
 #   elif defined(GP_ARCH_arm)
     case DW_REG_ARM_R7:  case DW_REG_ARM_R11: case DW_REG_ARM_R12:
     case DW_REG_ARM_R13: case DW_REG_ARM_R14: case DW_REG_ARM_R15:
       return true;
+#   elif defined(GP_ARCH_aarch64)
+    case DW_REG_AARCH64_X29:  case DW_REG_AARCH64_X30: case DW_REG_AARCH64_SP:
+      return true;
 #elif defined(GP_ARCH_mips64)
     case DW_REG_MIPS_FP:  case DW_REG_MIPS_SP: case DW_REG_MIPS_PC:
       return true;
 #   else
 #     error "Unknown arch"
 #   endif
     default:
       return false;
--- a/tools/profiler/moz.build
+++ b/tools/profiler/moz.build
@@ -43,17 +43,17 @@ if CONFIG['MOZ_GECKO_PROFILER']:
             'gecko/nsProfiler.cpp',
         ]
     else:
         UNIFIED_SOURCES += [
             'gecko/nsProfiler.cpp',
         ]
 
     if CONFIG['OS_TARGET'] in ('Android', 'Linux'):
-        if CONFIG['CPU_ARCH'] in ('arm', 'x86', 'x86_64', 'mips64'):
+        if CONFIG['CPU_ARCH'] in ('arm', 'aarch64', 'x86', 'x86_64', 'mips64'):
             UNIFIED_SOURCES += [
                 'lul/AutoObjectMapper.cpp',
                 'lul/LulCommon.cpp',
                 'lul/LulDwarf.cpp',
                 'lul/LulDwarfSummariser.cpp',
                 'lul/LulElf.cpp',
                 'lul/LulMain.cpp',
                 'lul/platform-linux-lul.cpp',
--- a/tools/profiler/tests/gtest/LulTestDwarf.cpp
+++ b/tools/profiler/tests/gtest/LulTestDwarf.cpp
@@ -2401,16 +2401,19 @@ TEST_F(LulDwarfExpr, ExpressionOverrun) 
   EXPECT_TRUE(ix == -1);
 }
 
 // We'll need to mention specific Dwarf registers in the EvaluatePfxExpr tests,
 // and those names are arch-specific, so a bit of macro magic is helpful.
 #if defined(GP_ARCH_arm)
 # define TESTED_REG_STRUCT_NAME  r11
 # define TESTED_REG_DWARF_NAME   DW_REG_ARM_R11
+#elif defined(GP_ARCH_aarch64)
+# define TESTED_REG_STRUCT_NAME  x29
+# define TESTED_REG_DWARF_NAME   DW_REG_AARCH64_X29
 #elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
 # define TESTED_REG_STRUCT_NAME  xbp
 # define TESTED_REG_DWARF_NAME   DW_REG_INTEL_XBP
 #else
 # error "Unknown plat"
 #endif
 
 struct EvaluatePfxExprFixture {
--- a/tools/profiler/tests/gtest/moz.build
+++ b/tools/profiler/tests/gtest/moz.build
@@ -1,16 +1,16 @@
 # -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
 # vim: set filetype=python:
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, you can obtain one at http://mozilla.org/MPL/2.0/.
 
 if (CONFIG['OS_TARGET'] in ('Android', 'Linux') and
-    CONFIG['CPU_ARCH'] in ('arm', 'x86', 'x86_64')):
+    CONFIG['CPU_ARCH'] in ('arm', 'aarch64', 'x86', 'x86_64')):
     UNIFIED_SOURCES += [
         'LulTest.cpp',
         'LulTestDwarf.cpp',
         'LulTestInfrastructure.cpp',
     ]
 
 LOCAL_INCLUDES += [
     '/tools/profiler/core',