Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,9 @@ dockerfiles/Courses/DL/data/FashionMNIST/raw/
# Local config overrides (any file containing 'local')
*local*
*.local.*
# Exceptions: PXE agent disk-persistence templates would otherwise match *local*
!deploy/ansible/roles/pxe_controller/templates/mount-local-disk.service.j2
!deploy/ansible/roles/pxe_controller/templates/mount-local-disk.sh.j2

# Offline bundle artifacts
auplc-bundle-*/
Expand Down
93 changes: 93 additions & 0 deletions deploy/ansible/playbooks/pb-pxe-controller.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

---
- name: Setup PXE Controller for diskless k3s agent boot
hosts: pxe_controller
become: true
gather_facts: true
vars:
# ── config.env equivalent ──────────────────────────────
pxe_rootfs_force_rebuild: false
# Network
pxe_network_interface: "ens18"
pxe_subnet: "192.168.1.0/24"
pxe_gateway: "192.168.1.254"
pxe_dns_servers: "8.8.8.8,8.8.4.4"

# PXE Controller -- REQUIRED: replace with your service host IP
pxe_controller_ip: "<PXE_CONTROLLER_IP>"

# k3s Server Nodes -- REQUIRED: replace with your k3s server node IP(s)
pxe_k3s_server_ips:
- "<K3S_SERVER_IP>"

# Root password for PXE-booted machines (leave empty to disable password login)
# Use ansible-vault: ansible-vault encrypt_string 'MyPass' --name 'pxe_rootfs_password'
pxe_rootfs_password: ""

# SSH keys for rootfs login (at least one required -- fill in before running).
# The playbook fails fast if this list is empty.
pxe_rootfs_authorized_keys: []
# - "ssh-ed25519 AAAA... you@workstation"
# - "ssh-rsa AAAA... ci@runner"

# APT Mirror
pxe_apt_mirror: "http://tw.archive.ubuntu.com/ubuntu"

pxe_rootfs_packages:
- linux-image-6.14.0-1018-oem
- linux-headers-6.14.0-1018-oem
- initramfs-tools
- linux-firmware
- nfs-common
- overlayroot
- openssh-server
- curl
- net-tools
- iproute2
- systemd-sysv
- sudo
- kmod
- pciutils
- e2fsprogs
- dkms
- build-essential
- libatomic1
- libquadmath0

pxe_initramfs_modules:
- nfs
- nfsv3
- overlay
- r8125
- amdgpu
# See pxe_initramfs_modules notes in role defaults: the nft_reject
# modules must come from the initramfs because, on this NFS+overlayroot
# rootfs, modprobe at runtime fails with ESTALE on modules.dep.bin
# whenever the PXE controller has rebuilt the rootfs underneath a
# running node. Pre-loading from initramfs avoids that path entirely.
- nft_reject_inet
- nft_reject_ipv4
- nft_reject_ipv6

pxe_k3s_data_dir: "/var/lib/rancher/k3s"

roles:
- role: pxe_controller
138 changes: 138 additions & 0 deletions deploy/ansible/roles/pxe_controller/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
# Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# Role defaults -- override these in the playbook vars: section.
# Maps 1:1 to config.env from the shell scripts.

---
# ============================================================
# Network
# ============================================================
pxe_network_interface: "ens18"
pxe_subnet: "192.168.1.0/24"
pxe_gateway: "192.168.1.254"
pxe_dns_servers: "8.8.8.8,8.8.4.4"

# ============================================================
# PXE Controller
# ============================================================
# REQUIRED -- set to your PXE controller / service host IP in the playbook.
# Left empty on purpose so site addresses are not baked into the repo;
# the role asserts this is set before running.
pxe_controller_ip: ""

# ============================================================
# k3s Server Nodes
# ============================================================
# REQUIRED -- set to your k3s server node IP(s) in the playbook.
# Left empty on purpose; the role asserts the list is non-empty.
pxe_k3s_server_ips: []

# ============================================================
# Credentials
# ============================================================

# Root password for rootfs (optional -- leave empty to disable password login)
# Use ansible-vault to encrypt: ansible-vault encrypt_string 'MyPass' --name 'pxe_rootfs_password'
pxe_rootfs_password: ""

# SSH public keys injected into rootfs /root/.ssh/authorized_keys.
# At least one key is REQUIRED (playbook will fail without it).
# pxe_rootfs_authorized_keys:
# - "ssh-ed25519 AAAA... you@workstation"
# - "ssh-rsa AAAA... ci@runner"
pxe_rootfs_authorized_keys: []

# ============================================================
# APT Mirror
# ============================================================
pxe_apt_mirror: "http://tw.archive.ubuntu.com/ubuntu"

# ============================================================
# Rootfs lifecycle
# ============================================================
# Force delete + rebuild rootfs from scratch (debootstrap)
pxe_rootfs_force_rebuild: true
# Run apt-get upgrade inside rootfs during chroot setup
pxe_rootfs_upgrade: false

# ============================================================
# Paths
# ============================================================
pxe_nfs_root: "/srv/nfs/rootfs"
pxe_tftp_root: "/srv/tftp"
pxe_web_root: "/var/www/html"

# ============================================================
# Rootfs build settings
# ============================================================
pxe_ubuntu_codename: "noble"

pxe_rootfs_packages:
- linux-image-6.14.0-1018-oem
- linux-headers-6.14.0-1018-oem
- initramfs-tools
- linux-firmware
- nfs-common
- overlayroot
- openssh-server
- curl
- net-tools
- iproute2
- systemd-sysv
- sudo
- kmod
- pciutils
- e2fsprogs
- dkms
- build-essential
- libatomic1
- libquadmath0

pxe_initramfs_modules:
- nfs
- nfsv3
- overlay
- r8125
- amdgpu
# nftables `reject` modules. Required by Cloudflare WARP firewall rules.
# Loaded from the initramfs (BEFORE pivot_root) so they end up resident in
# kernel memory before the overlayroot+NFS rootfs is mounted. This sidesteps
# an overlayfs failure mode where, after the PXE controller rebuilds the
# NFS rootfs, the overlay's cached inode of /lib/modules/<KVER>/modules.dep.bin
# becomes ESTALE and modprobe stops working entirely on running nodes.
- nft_reject_inet
- nft_reject_ipv4
- nft_reject_ipv6

pxe_k3s_data_dir: "/var/lib/rancher/k3s"

# ============================================================
# Host packages (installed on the PXE controller itself)
# ============================================================
pxe_host_packages:
- dnsmasq
- pxelinux
- syslinux-common
- apache2
- nfs-kernel-server
- debootstrap
- qemu-guest-agent
- grub-efi-amd64-signed
- shim-signed
Binary file not shown.
39 changes: 39 additions & 0 deletions deploy/ansible/roles/pxe_controller/handlers/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

---
- name: Restart dnsmasq
ansible.builtin.systemd:
name: dnsmasq
state: restarted
daemon_reload: true

- name: Restart nfs-kernel-server
ansible.builtin.systemd:
name: nfs-kernel-server
state: restarted

- name: Restart apache2
ansible.builtin.systemd:
name: apache2
state: restarted

- name: Reload NFS exports
ansible.builtin.command: exportfs -a
changed_when: true
Loading
Loading