From 7a7d6bd7864f04bcdc9200aab78080a7a84ac5bd Mon Sep 17 00:00:00 2001 From: Niclas Schad Date: Thu, 7 May 2026 14:04:59 +0200 Subject: [PATCH 1/2] Add support for MutableCSINodeAllocatableCount The CSI list's all PCIe devices that are not of type VIRTIO_BLOCK_DEVICE and subtracts them from the theoretically maximum, so kubernetes can report a correct dynamic max volume count that can be attached for each node. Signed-off-by: Niclas Schad --- pkg/csi/blockstorage/controllerserver.go | 5 ++ pkg/csi/blockstorage/nodeserver.go | 14 +++- pkg/csi/blockstorage/utils.go | 2 +- pkg/csi/util/mount/mount.go | 83 ++++++++++++++++++++++++ pkg/stackit/stackiterrors/errors.go | 14 +++- 5 files changed, 113 insertions(+), 5 deletions(-) diff --git a/pkg/csi/blockstorage/controllerserver.go b/pkg/csi/blockstorage/controllerserver.go index 8de6237e..23b3fca5 100644 --- a/pkg/csi/blockstorage/controllerserver.go +++ b/pkg/csi/blockstorage/controllerserver.go @@ -370,6 +370,11 @@ func (cs *controllerServer) ControllerPublishVolume(ctx context.Context, req *cs _, err = cloud.AttachVolume(ctx, instanceID, volumeID) if err != nil { + // Trigger's an immediate `NodeGetInfo` RPC call when MutableCSINodeAllocatableCount is enabled + // TODO: Finish Implementation of IsTooManyDevicesError + //if stackiterrors.IsTooManyDevicesError(err) { + // return nil, status.Errorf(codes.ResourceExhausted, "[ControllerPublishVolume] Node can't accept any more volumes %v. All PCIe lanes are exhausted!", err) + //} klog.Errorf("Failed to AttachVolume: %v", err) return nil, status.Errorf(codes.Internal, "[ControllerPublishVolume] Attach Volume failed with error %v", err) } diff --git a/pkg/csi/blockstorage/nodeserver.go b/pkg/csi/blockstorage/nodeserver.go index 648e5df3..f6d0a25f 100644 --- a/pkg/csi/blockstorage/nodeserver.go +++ b/pkg/csi/blockstorage/nodeserver.go @@ -308,9 +308,17 @@ func (ns *nodeServer) NodeGetInfo(ctx context.Context, _ *csi.NodeGetInfoRequest } maxVolumesPerNode := DetermineMaxVolumesByFlavor(flavor) - // Subtract 1 for root disk and another for configDrive/spare - maxVolumesPerNode -= 2 - klog.V(4).Infof("Determined node to support %d volumes", maxVolumesPerNode) + + // Subtract already mounted Volumes + emptyPCIeRootPorts, err := mount.CountNonVirtioBlockDevices() + if err != nil { + klog.Errorf("[NodeGetInfo] unable to retrieve PCIe root ports %v", err) + emptyPCIeRootPorts = 0 + } + + maxVolumesPerNode -= emptyPCIeRootPorts + klog.Infof("Determined %d PCIe ports occupied by non virtio block devices", emptyPCIeRootPorts) + klog.Infof("Determined node to support %d volumes", maxVolumesPerNode) nodeInfo := &csi.NodeGetInfoResponse{ NodeId: nodeID, diff --git a/pkg/csi/blockstorage/utils.go b/pkg/csi/blockstorage/utils.go index aaafc864..eacb77f7 100644 --- a/pkg/csi/blockstorage/utils.go +++ b/pkg/csi/blockstorage/utils.go @@ -97,7 +97,7 @@ func DetermineMaxVolumesByFlavor(flavor string) int64 { return 159 default: // All other flavors can mount 28 volumes - return 25 + return 28 } } diff --git a/pkg/csi/util/mount/mount.go b/pkg/csi/util/mount/mount.go index 0e458cc5..ab050220 100644 --- a/pkg/csi/util/mount/mount.go +++ b/pkg/csi/util/mount/mount.go @@ -20,6 +20,8 @@ import ( "fmt" "os" "path" + "path/filepath" + "regexp" "slices" "strings" "time" @@ -41,6 +43,15 @@ const ( operationFinishSteps = 15 ) +var ( + pciAddressRegex = regexp.MustCompile(`^[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F]$`) +) + +const ( + REDHAT_VENDOR = "0x1af4" + VIRTIO_BLOCK_DEVICE = "0x1042" +) + type IMount interface { Mounter() *mount.SafeFormatAndMount ScanForAttach(devicePath string) error @@ -119,6 +130,78 @@ func probeVolume() error { return nil } +// CountNonVirtioBlockDevices returns the number of PCIe Root ports who +// are currently occupied by anything else than an VIRTIO 1.0 Block Device +// returns zero when something went wrong +func CountNonVirtioBlockDevices() (int64, error) { + const pciPath = "/sys/bus/pci/devices" + + // Get all PCI devices + devices, err := os.ReadDir(pciPath) + if err != nil { + return 0, fmt.Errorf("failed to read PCI bus: %w", err) + } + + pcieSlotsOccupiedByNonBlockDevice := 0 + + for _, dev := range devices { + devPath := filepath.Join(pciPath, dev.Name()) + + // 1. Identify if it's a Root Port / Bridge + // We check the 'class' file. PCI Bridge class code starts with 0x0604 + classBuf, err := os.ReadFile(filepath.Join(devPath, "class")) + if err != nil { + klog.Errorf("failed to read PCI device class %s : %v", devPath, err) + continue + } + class := strings.TrimSpace(string(classBuf)) + + // Class 0x060400 is a PCI-to-PCI bridge (standard for Root Ports) + if strings.HasPrefix(class, "0x0604") { + + // 2. Check if the port has downstream devices + // If the bridge has children, they appear as subdirectories + // matching the PCI address format (e.g., 0000:01:00.0) + files, err2 := os.ReadDir(devPath) + if err2 != nil { + klog.Errorf("failed to read dir %s : %v", devPath, err2) + } + for _, file := range files { + // Ignore PCI bus directories such as pci001 pci002 and pci010 + // Devices must follow format + if pciAddressRegex.MatchString(file.Name()) { + isNonBlockDevice := IsNonBlockDevice(devPath, file) + if isNonBlockDevice { + pcieSlotsOccupiedByNonBlockDevice++ + } + break + } + } + } else { + klog.Infof("skipping class %s: path: %s", class, devPath) + } + } + + return int64(pcieSlotsOccupiedByNonBlockDevice), nil +} + +func IsNonBlockDevice(devPath string, file os.DirEntry) bool { + var isNonBlockDevice bool + pciDevicePath := filepath.Join(devPath, file.Name()) + vendorBuf, err := os.ReadFile(filepath.Join(pciDevicePath, "vendor")) + if err != nil { + klog.Errorf("failed to read PCI device vendor %s : %v", pciDevicePath, err) + } + deviceBuf, err := os.ReadFile(filepath.Join(pciDevicePath, "device")) + if err != nil { + klog.Errorf("failed to read PCI device file %s : %v", pciDevicePath, err) + } + if strings.TrimSpace(string(vendorBuf)) == REDHAT_VENDOR && strings.TrimSpace(string(deviceBuf)) != VIRTIO_BLOCK_DEVICE { + isNonBlockDevice = true + } + return isNonBlockDevice +} + // GetDevicePath returns the path of an attached block storage volume, specified by its id. func (m *Mount) GetDevicePath(volumeID string) (string, error) { backoff := wait.Backoff{ diff --git a/pkg/stackit/stackiterrors/errors.go b/pkg/stackit/stackiterrors/errors.go index ae19b7d7..b09570d8 100644 --- a/pkg/stackit/stackiterrors/errors.go +++ b/pkg/stackit/stackiterrors/errors.go @@ -4,9 +4,10 @@ import ( "errors" "fmt" "net/http" + "strings" oapiError "github.com/stackitcloud/stackit-sdk-go/core/oapierror" - wait "github.com/stackitcloud/stackit-sdk-go/services/iaas/v2api/wait" + "github.com/stackitcloud/stackit-sdk-go/services/iaas/v2api/wait" ) var ErrNotFound = errors.New("failed to find object") @@ -20,6 +21,17 @@ func IsNotFound(err error) bool { return oAPIError.StatusCode == http.StatusNotFound } +func IsTooManyDevicesError(err error) bool { + var oAPIError *oapiError.GenericOpenAPIError + if ok := errors.As(err, &oAPIError); !ok { + return false + } + + // TODO: This is just a placeholder. Implement this correctly + return oAPIError.StatusCode == http.StatusInternalServerError && + strings.Contains(oAPIError.ErrorMessage, "devices") +} + func IgnoreNotFound(err error) error { if IsNotFound(err) { return nil From 127f8d7da0f4942d9cd15f8a72663daf9d9fa4aa Mon Sep 17 00:00:00 2001 From: Niclas Schad Date: Fri, 8 May 2026 12:52:24 +0200 Subject: [PATCH 2/2] hide useful debug-level information behind log-level Signed-off-by: Niclas Schad --- pkg/csi/blockstorage/nodeserver.go | 4 ++-- pkg/csi/util/mount/mount.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/csi/blockstorage/nodeserver.go b/pkg/csi/blockstorage/nodeserver.go index f6d0a25f..0b390d2b 100644 --- a/pkg/csi/blockstorage/nodeserver.go +++ b/pkg/csi/blockstorage/nodeserver.go @@ -317,8 +317,8 @@ func (ns *nodeServer) NodeGetInfo(ctx context.Context, _ *csi.NodeGetInfoRequest } maxVolumesPerNode -= emptyPCIeRootPorts - klog.Infof("Determined %d PCIe ports occupied by non virtio block devices", emptyPCIeRootPorts) - klog.Infof("Determined node to support %d volumes", maxVolumesPerNode) + klog.V(4).Infof("Determined %d PCIe ports occupied by non virtio block devices", emptyPCIeRootPorts) + klog.V(4).Infof("Determined node to support %d volumes", maxVolumesPerNode) nodeInfo := &csi.NodeGetInfoResponse{ NodeId: nodeID, diff --git a/pkg/csi/util/mount/mount.go b/pkg/csi/util/mount/mount.go index ab050220..ce74d420 100644 --- a/pkg/csi/util/mount/mount.go +++ b/pkg/csi/util/mount/mount.go @@ -178,7 +178,7 @@ func CountNonVirtioBlockDevices() (int64, error) { } } } else { - klog.Infof("skipping class %s: path: %s", class, devPath) + klog.V(4).Infof("skipping class %s: path: %s", class, devPath) } }