//===- Relocations.h -------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLD_ELF_RELOCATIONS_H
#define LLD_ELF_RELOCATIONS_H

#include "lld/Common/LLVM.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Object/ELFTypes.h"
#include <vector>

namespace lld::elf {
struct Ctx;
class Defined;
class Symbol;
class InputSection;
class InputSectionBase;
class OutputSection;
class RelocationBaseSection;
class SectionBase;

// Represents a relocation type, such as R_X86_64_PC32 or R_ARM_THM_CALL.
struct RelType {
  uint32_t v = 0;
  /*implicit*/ constexpr RelType(uint32_t v = 0) : v(v) {}
  /*implicit*/ operator uint32_t() const { return v; }
};

using JumpModType = uint32_t;

// List of target-independent relocation types. Relocations read
// from files are converted to these types so that the main code
// doesn't have to know about architecture-specific details.
enum RelExpr {
  R_ABS,
  R_ADDEND,
  R_DTPREL,
  R_GOT,
  R_GOT_OFF,
  R_GOT_PC,
  R_GOTONLY_PC,
  R_GOTPLTONLY_PC,
  R_GOTPLT,
  R_GOTPLTREL,
  R_GOTREL,
  R_GOTPLT_GOTREL,
  R_GOTPLT_PC,
  R_NONE,
  R_PC,
  R_PLT,
  R_PLT_PC,
  R_PLT_GOTPLT,
  R_PLT_GOTREL,
  R_RELAX_HINT,
  R_RELAX_GOT_PC,
  R_RELAX_GOT_PC_NOPIC,
  R_RELAX_TLS_GD_TO_IE,
  R_RELAX_TLS_GD_TO_IE_ABS,
  R_RELAX_TLS_GD_TO_IE_GOT_OFF,
  R_RELAX_TLS_GD_TO_IE_GOTPLT,
  R_RELAX_TLS_GD_TO_LE,
  R_RELAX_TLS_GD_TO_LE_NEG,
  R_RELAX_TLS_IE_TO_LE,
  R_RELAX_TLS_LD_TO_LE,
  R_RELAX_TLS_LD_TO_LE_ABS,
  R_SIZE,
  R_TPREL,
  R_TPREL_NEG,
  R_TLSDESC,
  R_TLSDESC_CALL,
  R_TLSDESC_PC,
  R_TLSDESC_GOTPLT,
  R_TLSGD_GOT,
  R_TLSGD_GOTPLT,
  R_TLSGD_PC,
  R_TLSIE_HINT,
  R_TLSLD_GOT,
  R_TLSLD_GOTPLT,
  R_TLSLD_GOT_OFF,
  R_TLSLD_HINT,
  R_TLSLD_PC,

  // The following is abstract relocation types used for only one target.
  //
  // Even though RelExpr is intended to be a target-neutral representation
  // of a relocation type, there are some relocations whose semantics are
  // unique to a target. Such relocation are marked with RE_<TARGET_NAME>.
  RE_AARCH64_GOT_PAGE_PC,
  RE_AARCH64_AUTH_GOT_PAGE_PC,
  RE_AARCH64_GOT_PAGE,
  RE_AARCH64_AUTH_GOT,
  RE_AARCH64_AUTH_GOT_PC,
  RE_AARCH64_PAGE_PC,
  RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC,
  RE_AARCH64_TLSDESC_PAGE,
  RE_AARCH64_AUTH_TLSDESC_PAGE,
  RE_AARCH64_AUTH_TLSDESC,
  RE_AARCH64_AUTH,
  RE_ARM_PCA,
  RE_ARM_SBREL,
  RE_MIPS_GOTREL,
  RE_MIPS_GOT_GP,
  RE_MIPS_GOT_GP_PC,
  RE_MIPS_GOT_LOCAL_PAGE,
  RE_MIPS_GOT_OFF,
  RE_MIPS_GOT_OFF32,
  RE_MIPS_TLSGD,
  RE_MIPS_TLSLD,
  RE_PPC32_PLTREL,
  RE_PPC64_CALL,
  RE_PPC64_CALL_PLT,
  RE_PPC64_RELAX_TOC,
  RE_PPC64_TOCBASE,
  RE_PPC64_RELAX_GOT_PC,
  RE_RISCV_ADD,
  RE_RISCV_LEB128,
  RE_RISCV_PC_INDIRECT,
  // Same as R_PC but with page-aligned semantics.
  RE_LOONGARCH_PAGE_PC,
  // Same as R_PLT_PC but with page-aligned semantics.
  RE_LOONGARCH_PLT_PAGE_PC,
  // In addition to having page-aligned semantics, LoongArch GOT relocs are
  // also reused for TLS, making the semantics differ from other architectures.
  RE_LOONGARCH_GOT,
  RE_LOONGARCH_GOT_PAGE_PC,
  RE_LOONGARCH_TLSGD_PAGE_PC,
  RE_LOONGARCH_TLSDESC_PAGE_PC,
  RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC,
};

// Architecture-neutral representation of relocation.
struct Relocation {
  RelExpr expr;
  RelType type;
  uint64_t offset;
  int64_t addend;
  Symbol *sym;
};

// Manipulate jump instructions with these modifiers.  These are used to relax
// jump instruction opcodes at basic block boundaries and are particularly
// useful when basic block sections are enabled.
struct JumpInstrMod {
  uint64_t offset;
  JumpModType original;
  unsigned size;
};

// This function writes undefined symbol diagnostics to an internal buffer.
// Call reportUndefinedSymbols() after calling scanRelocations() to emit
// the diagnostics.
template <class ELFT> void scanRelocations(Ctx &ctx);
template <class ELFT> void checkNoCrossRefs(Ctx &ctx);
void reportUndefinedSymbols(Ctx &);
void postScanRelocations(Ctx &ctx);
void addGotEntry(Ctx &ctx, Symbol &sym);

void hexagonTLSSymbolUpdate(Ctx &ctx);
bool hexagonNeedsTLSSymbol(ArrayRef<OutputSection *> outputSections);

class ThunkSection;
class Thunk;
class InputSectionDescription;

class ThunkCreator {
public:
  // Thunk may be incomplete. Avoid inline ctor/dtor.
  ThunkCreator(Ctx &ctx);
  ~ThunkCreator();
  // Return true if Thunks have been added to OutputSections
  bool createThunks(uint32_t pass, ArrayRef<OutputSection *> outputSections);

private:
  void mergeThunks(ArrayRef<OutputSection *> outputSections);

  ThunkSection *getISDThunkSec(OutputSection *os, InputSection *isec,
                               InputSectionDescription *isd,
                               const Relocation &rel, uint64_t src);

  ThunkSection *getISThunkSec(InputSection *isec);

  void createInitialThunkSections(ArrayRef<OutputSection *> outputSections);

  std::pair<Thunk *, bool> getThunk(InputSection *isec, Relocation &rel,
                                    uint64_t src);

  std::pair<Thunk *, bool> getSyntheticLandingPad(Defined &d, int64_t a);

  ThunkSection *addThunkSection(OutputSection *os, InputSectionDescription *,
                                uint64_t off);

  bool normalizeExistingThunk(Relocation &rel, uint64_t src);

  bool addSyntheticLandingPads();

  Ctx &ctx;

  // Record all the available Thunks for a (Symbol, addend) pair, where Symbol
  // is represented as a (section, offset) pair. There may be multiple
  // relocations sharing the same (section, offset + addend) pair. We may revert
  // a relocation back to its original non-Thunk target, and restore the
  // original addend, so we cannot fold offset + addend. A nested pair is used
  // because DenseMapInfo is not specialized for std::tuple.
  llvm::DenseMap<std::pair<std::pair<SectionBase *, uint64_t>, int64_t>,
                 SmallVector<std::unique_ptr<Thunk>, 0>>
      thunkedSymbolsBySectionAndAddend;
  llvm::DenseMap<std::pair<Symbol *, int64_t>,
                 SmallVector<std::unique_ptr<Thunk>, 0>>
      thunkedSymbols;

  // Find a Thunk from the Thunks symbol definition, we can use this to find
  // the Thunk from a relocation to the Thunks symbol definition.
  llvm::DenseMap<Symbol *, Thunk *> thunks;

  // Track InputSections that have an inline ThunkSection placed in front
  // an inline ThunkSection may have control fall through to the section below
  // so we need to make sure that there is only one of them.
  // The Mips LA25 Thunk is an example of an inline ThunkSection, as is
  // the AArch64BTLandingPadThunk.
  llvm::DenseMap<InputSection *, ThunkSection *> thunkedSections;

  // Record landing pads, generated for a section + offset destination.
  // Landling pads are alternative entry points for destinations that need
  // to be reached via thunks that use indirect branches. A destination
  // needs at most one landing pad as that can be reused by all callers.
  llvm::DenseMap<std::pair<std::pair<SectionBase *, uint64_t>, int64_t>,
                 std::unique_ptr<Thunk>>
      landingPadsBySectionAndAddend;

  // All the nonLandingPad thunks that have been created, in order of creation.
  std::vector<Thunk *> allThunks;

  // The number of completed passes of createThunks this permits us
  // to do one time initialization on Pass 0 and put a limit on the
  // number of times it can be called to prevent infinite loops.
  uint32_t pass = 0;
};

// Decode LEB128 without error checking. Only used by performance critical code
// like RelocsCrel.
inline uint64_t readLEB128(const uint8_t *&p, uint64_t leb) {
  uint64_t acc = 0, shift = 0, byte;
  do {
    byte = *p++;
    acc |= (byte - 128 * (byte >= leb)) << shift;
    shift += 7;
  } while (byte >= 128);
  return acc;
}
inline uint64_t readULEB128(const uint8_t *&p) { return readLEB128(p, 128); }
inline int64_t readSLEB128(const uint8_t *&p) { return readLEB128(p, 64); }

// This class implements a CREL iterator that does not allocate extra memory.
template <bool is64> struct RelocsCrel {
  using uint = std::conditional_t<is64, uint64_t, uint32_t>;
  struct const_iterator {
    using iterator_category = std::forward_iterator_tag;
    using value_type = llvm::object::Elf_Crel_Impl<is64>;
    using difference_type = ptrdiff_t;
    using pointer = value_type *;
    using reference = const value_type &;
    uint32_t count;
    uint8_t flagBits, shift;
    const uint8_t *p;
    llvm::object::Elf_Crel_Impl<is64> crel{};
    const_iterator(size_t hdr, const uint8_t *p)
        : count(hdr / 8), flagBits(hdr & 4 ? 3 : 2), shift(hdr % 4), p(p) {
      if (count)
        step();
    }
    void step() {
      // See object::decodeCrel.
      const uint8_t b = *p++;
      crel.r_offset += b >> flagBits << shift;
      if (b >= 0x80)
        crel.r_offset +=
            ((readULEB128(p) << (7 - flagBits)) - (0x80 >> flagBits)) << shift;
      if (b & 1)
        crel.r_symidx += readSLEB128(p);
      if (b & 2)
        crel.r_type += readSLEB128(p);
      if (b & 4 && flagBits == 3)
        crel.r_addend += static_cast<uint>(readSLEB128(p));
    }
    llvm::object::Elf_Crel_Impl<is64> operator*() const { return crel; };
    const llvm::object::Elf_Crel_Impl<is64> *operator->() const {
      return &crel;
    }
    // For llvm::enumerate.
    bool operator==(const const_iterator &r) const { return count == r.count; }
    bool operator!=(const const_iterator &r) const { return count != r.count; }
    const_iterator &operator++() {
      if (--count)
        step();
      return *this;
    }
    // For RelocationScanner::scanOne.
    void operator+=(size_t n) {
      for (; n; --n)
        operator++();
    }
  };

  size_t hdr = 0;
  const uint8_t *p = nullptr;

  constexpr RelocsCrel() = default;
  RelocsCrel(const uint8_t *p) : hdr(readULEB128(p)) { this->p = p; }
  size_t size() const { return hdr / 8; }
  const_iterator begin() const { return {hdr, p}; }
  const_iterator end() const { return {0, nullptr}; }
};

template <class RelTy> struct Relocs : ArrayRef<RelTy> {
  Relocs() = default;
  Relocs(ArrayRef<RelTy> a) : ArrayRef<RelTy>(a) {}
};

template <bool is64>
struct Relocs<llvm::object::Elf_Crel_Impl<is64>> : RelocsCrel<is64> {
  using RelocsCrel<is64>::RelocsCrel;
};

// Return a int64_t to make sure we get the sign extension out of the way as
// early as possible.
template <class ELFT>
static inline int64_t getAddend(const typename ELFT::Rel &rel) {
  return 0;
}
template <class ELFT>
static inline int64_t getAddend(const typename ELFT::Rela &rel) {
  return rel.r_addend;
}
template <class ELFT>
static inline int64_t getAddend(const typename ELFT::Crel &rel) {
  return rel.r_addend;
}

template <typename RelTy>
inline Relocs<RelTy> sortRels(Relocs<RelTy> rels,
                              SmallVector<RelTy, 0> &storage) {
  auto cmp = [](const RelTy &a, const RelTy &b) {
    return a.r_offset < b.r_offset;
  };
  if (!llvm::is_sorted(rels, cmp)) {
    storage.assign(rels.begin(), rels.end());
    llvm::stable_sort(storage, cmp);
    rels = Relocs<RelTy>(storage);
  }
  return rels;
}

template <bool is64>
inline Relocs<llvm::object::Elf_Crel_Impl<is64>>
sortRels(Relocs<llvm::object::Elf_Crel_Impl<is64>> rels,
         SmallVector<llvm::object::Elf_Crel_Impl<is64>, 0> &storage) {
  return {};
}

RelocationBaseSection &getIRelativeSection(Ctx &ctx);

// Returns true if Expr refers a GOT entry. Note that this function returns
// false for TLS variables even though they need GOT, because TLS variables uses
// GOT differently than the regular variables.
bool needsGot(RelExpr expr);
} // namespace lld::elf

#endif
