Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pkg/csi/blockstorage/controllerserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,11 @@ func (cs *controllerServer) ControllerPublishVolume(ctx context.Context, req *cs

_, err = cloud.AttachVolume(ctx, instanceID, volumeID)
if err != nil {
// Trigger's an immediate `NodeGetInfo` RPC call when MutableCSINodeAllocatableCount is enabled
// TODO: Finish Implementation of IsTooManyDevicesError
//if stackiterrors.IsTooManyDevicesError(err) {
// return nil, status.Errorf(codes.ResourceExhausted, "[ControllerPublishVolume] Node can't accept any more volumes %v. All PCIe lanes are exhausted!", err)
//}
klog.Errorf("Failed to AttachVolume: %v", err)
return nil, status.Errorf(codes.Internal, "[ControllerPublishVolume] Attach Volume failed with error %v", err)
}
Expand Down
12 changes: 10 additions & 2 deletions pkg/csi/blockstorage/nodeserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,16 @@ func (ns *nodeServer) NodeGetInfo(ctx context.Context, _ *csi.NodeGetInfoRequest
}

maxVolumesPerNode := DetermineMaxVolumesByFlavor(flavor)
// Subtract 1 for root disk and another for configDrive/spare
maxVolumesPerNode -= 2

// Subtract already mounted Volumes
emptyPCIeRootPorts, err := mount.CountNonVirtioBlockDevices()
if err != nil {
klog.Errorf("[NodeGetInfo] unable to retrieve PCIe root ports %v", err)
emptyPCIeRootPorts = 0
}

maxVolumesPerNode -= emptyPCIeRootPorts
klog.V(4).Infof("Determined %d PCIe ports occupied by non virtio block devices", emptyPCIeRootPorts)
klog.V(4).Infof("Determined node to support %d volumes", maxVolumesPerNode)

nodeInfo := &csi.NodeGetInfoResponse{
Expand Down
2 changes: 1 addition & 1 deletion pkg/csi/blockstorage/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ func DetermineMaxVolumesByFlavor(flavor string) int64 {
return 159
default:
// All other flavors can mount 28 volumes
return 25
return 28
}
}

Expand Down
83 changes: 83 additions & 0 deletions pkg/csi/util/mount/mount.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
"fmt"
"os"
"path"
"path/filepath"
"regexp"
"slices"
"strings"
"time"
Expand All @@ -41,6 +43,15 @@ const (
operationFinishSteps = 15
)

var (
pciAddressRegex = regexp.MustCompile(`^[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F]$`)
)

const (
REDHAT_VENDOR = "0x1af4"
VIRTIO_BLOCK_DEVICE = "0x1042"
)

type IMount interface {
Mounter() *mount.SafeFormatAndMount
ScanForAttach(devicePath string) error
Expand Down Expand Up @@ -119,6 +130,78 @@ func probeVolume() error {
return nil
}

// CountNonVirtioBlockDevices returns the number of PCIe Root ports who
// are currently occupied by anything else than an VIRTIO 1.0 Block Device
// returns zero when something went wrong
func CountNonVirtioBlockDevices() (int64, error) {
const pciPath = "/sys/bus/pci/devices"

// Get all PCI devices
devices, err := os.ReadDir(pciPath)
if err != nil {
return 0, fmt.Errorf("failed to read PCI bus: %w", err)
}

pcieSlotsOccupiedByNonBlockDevice := 0

for _, dev := range devices {
devPath := filepath.Join(pciPath, dev.Name())

// 1. Identify if it's a Root Port / Bridge
// We check the 'class' file. PCI Bridge class code starts with 0x0604
classBuf, err := os.ReadFile(filepath.Join(devPath, "class"))
if err != nil {
klog.Errorf("failed to read PCI device class %s : %v", devPath, err)
continue
}
class := strings.TrimSpace(string(classBuf))

// Class 0x060400 is a PCI-to-PCI bridge (standard for Root Ports)
if strings.HasPrefix(class, "0x0604") {

// 2. Check if the port has downstream devices
// If the bridge has children, they appear as subdirectories
// matching the PCI address format (e.g., 0000:01:00.0)
files, err2 := os.ReadDir(devPath)
if err2 != nil {
klog.Errorf("failed to read dir %s : %v", devPath, err2)
}
for _, file := range files {
// Ignore PCI bus directories such as pci001 pci002 and pci010
// Devices must follow <domain:bus:device.function> format
if pciAddressRegex.MatchString(file.Name()) {
isNonBlockDevice := IsNonBlockDevice(devPath, file)
if isNonBlockDevice {
pcieSlotsOccupiedByNonBlockDevice++
}
break
}
}
} else {
klog.V(4).Infof("skipping class %s: path: %s", class, devPath)
}
}

return int64(pcieSlotsOccupiedByNonBlockDevice), nil
}

func IsNonBlockDevice(devPath string, file os.DirEntry) bool {
var isNonBlockDevice bool
pciDevicePath := filepath.Join(devPath, file.Name())
vendorBuf, err := os.ReadFile(filepath.Join(pciDevicePath, "vendor"))
if err != nil {
klog.Errorf("failed to read PCI device vendor %s : %v", pciDevicePath, err)
}
deviceBuf, err := os.ReadFile(filepath.Join(pciDevicePath, "device"))
if err != nil {
klog.Errorf("failed to read PCI device file %s : %v", pciDevicePath, err)
}
if strings.TrimSpace(string(vendorBuf)) == REDHAT_VENDOR && strings.TrimSpace(string(deviceBuf)) != VIRTIO_BLOCK_DEVICE {
isNonBlockDevice = true
}
return isNonBlockDevice
}

// GetDevicePath returns the path of an attached block storage volume, specified by its id.
func (m *Mount) GetDevicePath(volumeID string) (string, error) {
backoff := wait.Backoff{
Expand Down
14 changes: 13 additions & 1 deletion pkg/stackit/stackiterrors/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ import (
"errors"
"fmt"
"net/http"
"strings"

oapiError "github.com/stackitcloud/stackit-sdk-go/core/oapierror"
wait "github.com/stackitcloud/stackit-sdk-go/services/iaas/v2api/wait"
"github.com/stackitcloud/stackit-sdk-go/services/iaas/v2api/wait"
)

var ErrNotFound = errors.New("failed to find object")
Expand All @@ -20,6 +21,17 @@ func IsNotFound(err error) bool {
return oAPIError.StatusCode == http.StatusNotFound
}

func IsTooManyDevicesError(err error) bool {
var oAPIError *oapiError.GenericOpenAPIError
if ok := errors.As(err, &oAPIError); !ok {
return false
}

// TODO: This is just a placeholder. Implement this correctly
return oAPIError.StatusCode == http.StatusInternalServerError &&
strings.Contains(oAPIError.ErrorMessage, "devices")
}

func IgnoreNotFound(err error) error {
if IsNotFound(err) {
return nil
Expand Down