[CIR] Implement support for emitting label address constants#203644
[CIR] Implement support for emitting label address constants#203644andykaylor wants to merge 1 commit into
Conversation
The evalloop.c test in the llvm-test-suite single source tests contains a static array that is initialized with the address of labels within the enclosing function. This wasn't implemented in CIR. This change adds an implementation. The constant emitter change was trivial. We just needed to create a #cir.block_addr_info attribute. However, using that attribute as an initializer for a global requires some additional handling and special lowering for the initializer. The goto solver also needed to be updated to consider uses of labels in global initializers. The test case here was copied over directly from classic codegen. The original test has an additional test case for the difference between two label addresses. Support for that case will be added in a future change. Assisted-by: Cursor / claude-opus-4.8
|
@llvm/pr-subscribers-clangir @llvm/pr-subscribers-clang Author: Andy Kaylor (andykaylor) ChangesThe evalloop.c test in the llvm-test-suite single source tests contains a static array that is initialized with the address of labels within the enclosing function. This wasn't implemented in CIR. This change adds an implementation. The constant emitter change was trivial. We just needed to create a #cir.block_addr_info attribute. However, using that attribute as an initializer for a global requires some additional handling and special lowering for the initializer. The goto solver also needed to be updated to consider uses of labels in global initializers. The test case here was copied over directly from classic codegen. The original test has an additional test case for the difference between two label addresses. Support for that case will be added in a future change. Assisted-by: Cursor / claude-opus-4.8 Full diff: https://github.com/llvm/llvm-project/pull/203644.diff 8 Files Affected:
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
index 4032d8219fff3..abfa73ca2a611 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
@@ -1454,25 +1454,37 @@ def CIR_UnwindAttr : CIR_UnitAttr<"Unwind", "unwind"> {
// CIR_BlockAddrInfoAttr
//===----------------------------------------------------------------------===//
-def CIR_BlockAddrInfoAttr : CIR_Attr<"BlockAddrInfo", "block_addr_info"> {
- let summary = "Block Addres attribute";
+def CIR_BlockAddrInfoAttr
+ : CIR_ValueLikeAttr<"BlockAddrInfo", "block_addr_info"> {
+ let summary = "Block address attribute";
let description = [{
This attribute is used to represent the address of a basic block
within a function. It combines the symbol reference to a function
with the name of a label inside that function.
}];
- let parameters = (ins "mlir::FlatSymbolRefAttr":$func,
- "mlir::StringAttr":$label);
+ let parameters = (ins
+ AttributeSelfTypeParameter<
+ "", "cir::PointerType",
+ "cir::PointerType::get(cir::VoidType::get($_ctxt))">:$type,
+ "mlir::FlatSymbolRefAttr":$func,
+ "mlir::StringAttr":$label);
let assemblyFormat = "`<` $func `,` $label `>`";
let builders = [
AttrBuilder<(ins "llvm::StringRef":$func_name,
- "llvm::StringRef":$label_name
- ), [{
- return $_get($_ctxt, mlir::FlatSymbolRefAttr::get($_ctxt, func_name),
+ "llvm::StringRef":$label_name), [{
+ return $_get($_ctxt,
+ cir::PointerType::get(cir::VoidType::get($_ctxt)),
+ mlir::FlatSymbolRefAttr::get($_ctxt, func_name),
mlir::StringAttr::get($_ctxt, label_name));
}]>
];
+
+ // Block addresses require deferred basic-block resolution during the
+ // LowerToLLVM pass, so they are not handled by the generic attribute-to-value
+ // lowering.
+ let hasAttrToValueLowering = 0;
+
let canHaveIllegalCXXABIType = 0;
}
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 557e279d9bc71..2f07f0c783328 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -3134,6 +3134,9 @@ def CIR_GlobalOp : CIR_Op<"global", [
mlir::SymbolRefAttr getComdatAttr(cir::GlobalOp &op,
mlir::OpBuilder &builder) const;
}];
+
+ let customLLVMLoweringConstructorDecl =
+ LoweringBuilders<(ins "LLVMBlockAddressInfo &":$blockInfoAddr)>;
}
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
index 5208af44412a3..610a0e780cda5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
@@ -1234,6 +1234,8 @@ struct ConstantLValue {
: value(nullptr), hasOffsetApplied(false) {}
/*implicit*/ ConstantLValue(cir::GlobalViewAttr address)
: value(address), hasOffsetApplied(false) {}
+ /*implicit*/ ConstantLValue(cir::BlockAddrInfoAttr address)
+ : value(address), hasOffsetApplied(true) {}
ConstantLValue() : value(nullptr), hasOffsetApplied(false) {}
};
@@ -1514,8 +1516,10 @@ ConstantLValueEmitter::VisitPredefinedExpr(const PredefinedExpr *e) {
ConstantLValue
ConstantLValueEmitter::VisitAddrLabelExpr(const AddrLabelExpr *e) {
- cgm.errorNYI(e->getSourceRange(), "ConstantLValueEmitter: addr label expr");
- return {};
+ auto func = cast<cir::FuncOp>(emitter.cgf->curFn);
+ return cir::BlockAddrInfoAttr::get(cgm.getBuilder().getContext(),
+ func.getSymName(),
+ e->getLabel()->getName());
}
ConstantLValue ConstantLValueEmitter::VisitCallExpr(const CallExpr *e) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
index 922140a93aa5a..101ecfab21b2d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
@@ -745,8 +745,8 @@ mlir::LogicalResult CIRGenFunction::emitLabel(const clang::LabelDecl &d) {
builder.setInsertionPointToEnd(labelBlock);
auto func = cast<cir::FuncOp>(curFn);
cgm.mapBlockAddress(cir::BlockAddrInfoAttr::get(builder.getContext(),
- func.getSymNameAttr(),
- label.getLabelAttr()),
+ func.getSymName(),
+ label.getLabel()),
label);
// FIXME: emit debug info for labels, incrementProfileCounter
assert(!cir::MissingFeatures::incrementProfileCounter());
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index 660bed1544aac..8fb737b133efb 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -582,10 +582,10 @@ static LogicalResult checkConstantTypes(mlir::Operation *op, mlir::Type opType,
return success();
}
- if (mlir::isa<cir::ConstArrayAttr, cir::ConstVectorAttr,
- cir::ConstComplexAttr, cir::ConstRecordAttr,
- cir::GlobalViewAttr, cir::PoisonAttr, cir::TypeInfoAttr,
- cir::VTableAttr>(attrType))
+ if (mlir::isa<cir::BlockAddrInfoAttr, cir::ConstArrayAttr,
+ cir::ConstVectorAttr, cir::ConstComplexAttr,
+ cir::ConstRecordAttr, cir::GlobalViewAttr, cir::PoisonAttr,
+ cir::TypeInfoAttr, cir::VTableAttr>(attrType))
return success();
assert(isa<TypedAttr>(attrType) && "What else could we be looking at here?");
@@ -2144,8 +2144,7 @@ static ParseResult parseGlobalOpTypeAndInitialValue(OpAsmParser &parser,
assert(mlir::isa<mlir::TypedAttr>(initialValueAttr) &&
"Non-typed attrs shouldn't appear here.");
- auto typedAttr = mlir::cast<mlir::TypedAttr>(initialValueAttr);
- opTy = typedAttr.getType();
+ opTy = mlir::cast<mlir::TypedAttr>(initialValueAttr).getType();
}
// Parse destructor, example:
diff --git a/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp b/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp
index d590ccce1f540..e2a561cb3a003 100644
--- a/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp
@@ -9,6 +9,8 @@
#include "clang/CIR/Dialect/IR/CIRDialect.h"
#include "clang/CIR/Dialect/Passes.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/Support/TimeProfiler.h"
#include <memory>
@@ -27,7 +29,8 @@ struct GotoSolverPass : public impl::GotoSolverBase<GotoSolverPass> {
void runOnOperation() override;
};
-static void process(cir::FuncOp func) {
+static void process(cir::FuncOp func,
+ const llvm::StringSet<> &globalBlockAddrLabel) {
mlir::OpBuilder rewriter(func.getContext());
llvm::StringMap<Block *> labels;
llvm::SmallVector<cir::GotoOp, 4> gotos;
@@ -46,7 +49,11 @@ static void process(cir::FuncOp func) {
for (auto &lab : labels) {
StringRef labelName = lab.getKey();
Block *block = lab.getValue();
- if (!blockAddrLabel.contains(labelName)) {
+ // Keep labels whose address is taken either by a cir.block_address op in
+ // this function or by a block-address attribute used elsewhere (e.g. in a
+ // global initializer).
+ if (!blockAddrLabel.contains(labelName) &&
+ !globalBlockAddrLabel.contains(labelName)) {
// erase the LabelOp inside the block if safe
if (auto lab = dyn_cast<cir::LabelOp>(&block->front())) {
lab.erase();
@@ -65,7 +72,25 @@ static void process(cir::FuncOp func) {
void GotoSolverPass::runOnOperation() {
llvm::TimeTraceScope scope("Goto Solver");
- getOperation()->walk(&process);
+
+ // Block addresses can also appear in attributes outside of any function body,
+ // such as global variable initializers. Collect, per target function, the
+ // labels referenced this way so their LabelOps are not erased below.
+ llvm::StringMap<llvm::StringSet<>> globalBlockAddrLabels;
+ getOperation()->walk([&](mlir::Operation *op) {
+ for (const mlir::NamedAttribute &namedAttr : op->getAttrs()) {
+ namedAttr.getValue().walk([&](cir::BlockAddrInfoAttr info) {
+ globalBlockAddrLabels[info.getFunc().getValue()].insert(
+ info.getLabel());
+ });
+ }
+ });
+
+ static const llvm::StringSet<> emptySet;
+ getOperation()->walk([&](cir::FuncOp func) {
+ auto it = globalBlockAddrLabels.find(func.getSymName());
+ process(func, it == globalBlockAddrLabels.end() ? emptySet : it->second);
+ });
}
} // namespace
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 1579e967885d8..119f6db905c04 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -2396,16 +2396,37 @@ CIRToLLVMGlobalOpLowering::matchAndRewriteRegionInitializedGlobal(
cir::GlobalOp op, mlir::Attribute init,
mlir::ConversionPatternRewriter &rewriter) const {
// TODO: Generalize this handling when more types are needed here.
- assert((isa<cir::ConstArrayAttr, cir::ConstRecordAttr, cir::ConstVectorAttr,
- cir::ConstPtrAttr, cir::ConstComplexAttr, cir::GlobalViewAttr,
- cir::TypeInfoAttr, cir::UndefAttr, cir::PoisonAttr,
- cir::VTableAttr, cir::ZeroAttr>(init)));
+ assert((isa<cir::BlockAddrInfoAttr, cir::ConstArrayAttr, cir::ConstRecordAttr,
+ cir::ConstVectorAttr, cir::ConstPtrAttr, cir::ConstComplexAttr,
+ cir::GlobalViewAttr, cir::TypeInfoAttr, cir::UndefAttr,
+ cir::PoisonAttr, cir::VTableAttr, cir::ZeroAttr>(init)));
// TODO(cir): once LLVM's dialect has proper equivalent attributes this
// should be updated. For now, we use a custom op to initialize globals
// to the appropriate value.
const mlir::Location loc = op.getLoc();
setupRegionInitializedLLVMGlobalOp(op, rewriter);
+
+ // A block address initializer is lowered to an llvm.blockaddress op that
+ // references a block tag inside the target function. The matching block tag
+ // may not have been emitted yet, in which case the address is recorded as
+ // unresolved and patched up later in resolveBlockAddressOp.
+ if (auto blockAddrInfo = mlir::dyn_cast<cir::BlockAddrInfoAttr>(init)) {
+ mlir::LLVM::BlockTagOp matchLabel =
+ blockInfoAddr.lookupBlockTag(blockAddrInfo);
+ mlir::LLVM::BlockTagAttr tagAttr =
+ matchLabel ? matchLabel.getTag() : mlir::LLVM::BlockTagAttr{};
+ auto blkAddr = mlir::LLVM::BlockAddressAttr::get(
+ rewriter.getContext(), blockAddrInfo.getFunc(), tagAttr);
+ auto blockAddressOp = mlir::LLVM::BlockAddressOp::create(
+ rewriter, loc, mlir::LLVM::LLVMPointerType::get(rewriter.getContext()),
+ blkAddr);
+ if (!matchLabel)
+ blockInfoAddr.addUnresolvedBlockAddress(blockAddressOp, blockAddrInfo);
+ mlir::LLVM::ReturnOp::create(rewriter, loc, blockAddressOp);
+ return mlir::success();
+ }
+
CIRAttrToValue valueConverter(op, rewriter, typeConverter);
mlir::Value value = valueConverter.visit(init);
mlir::LLVM::ReturnOp::create(rewriter, loc, value);
@@ -2495,11 +2516,11 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite(
}
}
return matchAndRewriteRegionInitializedGlobal(op, init.value(), rewriter);
- } else if (mlir::isa<cir::ConstVectorAttr, cir::ConstRecordAttr,
- cir::ConstPtrAttr, cir::ConstComplexAttr,
- cir::GlobalViewAttr, cir::TypeInfoAttr, cir::UndefAttr,
- cir::PoisonAttr, cir::VTableAttr, cir::ZeroAttr>(
- init.value())) {
+ } else if (mlir::isa<cir::BlockAddrInfoAttr, cir::ConstVectorAttr,
+ cir::ConstRecordAttr, cir::ConstPtrAttr,
+ cir::ConstComplexAttr, cir::GlobalViewAttr,
+ cir::TypeInfoAttr, cir::UndefAttr, cir::PoisonAttr,
+ cir::VTableAttr, cir::ZeroAttr>(init.value())) {
// TODO(cir): once LLVM's dialect has proper equivalent attributes this
// should be updated. For now, we use a custom op to initialize globals
// to the appropriate value.
@@ -3656,8 +3677,9 @@ void ConvertCIRToLLVMPass::runOnOperation() {
/// repeated O(M) module-wide symbol scans for every call site.
mlir::SymbolTableCollection symbolTables;
mlir::RewritePatternSet patterns(&getContext());
- patterns.add<CIRToLLVMBlockAddressOpLowering, CIRToLLVMLabelOpLowering>(
- converter, patterns.getContext(), dl, blockInfoAddr);
+ patterns.add<CIRToLLVMBlockAddressOpLowering, CIRToLLVMGlobalOpLowering,
+ CIRToLLVMLabelOpLowering>(converter, patterns.getContext(), dl,
+ blockInfoAddr);
patterns.add<CIRToLLVMCallOpLowering, CIRToLLVMTryCallOpLowering>(
converter, patterns.getContext(), dl, symbolTables);
diff --git a/clang/test/CIR/CodeGen/const-label-addr.c b/clang/test/CIR/CodeGen/const-label-addr.c
new file mode 100644
index 0000000000000..8541b23d3d4f6
--- /dev/null
+++ b/clang/test/CIR/CodeGen/const-label-addr.c
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+void a(void) {
+A:;
+ static void *a = &&A;
+}
+
+// CIR: cir.global "private" internal dso_local @a.a = #cir.block_addr_info<@a, "A"> : !cir.ptr<!void>
+// CIR: cir.func{{.*}} @a()
+// CIR: cir.br ^[[A_BLOCK:bb[0-9]+]]
+// CIR: ^[[A_BLOCK]]:
+// CIR: cir.label "A"
+// CIR: %[[STATIC_A:.*]] = cir.get_global @a.a : !cir.ptr<!cir.ptr<!void>>
+// CIR: cir.return
+
+// LLVM: @a.a = internal global ptr blockaddress(@a, %[[A_BLOCK:.*]]), align 8
+// LLVM: define dso_local void @a()
+// LLVM: br label %[[A_BLOCK]]
+// LLVM: [[A_BLOCK]]:
+// LLVM: ret void
|
The evalloop.c test in the llvm-test-suite single source tests contains a static array that is initialized with the address of labels within the enclosing function. This wasn't implemented in CIR.
This change adds an implementation. The constant emitter change was trivial. We just needed to create a #cir.block_addr_info attribute. However, using that attribute as an initializer for a global requires some additional handling and special lowering for the initializer.
The goto solver also needed to be updated to consider uses of labels in global initializers.
The test case here was copied over directly from classic codegen. The original test has an additional test case for the difference between two label addresses. Support for that case will be added in a future change.
Assisted-by: Cursor / claude-opus-4.8