diff --git a/.gitignore b/.gitignore index 0bb458a3..3b03455b 100644 --- a/.gitignore +++ b/.gitignore @@ -392,6 +392,9 @@ dockerfiles/Courses/DL/data/FashionMNIST/raw/ # Local config overrides (any file containing 'local') *local* *.local.* +# Exceptions: PXE agent disk-persistence templates would otherwise match *local* +!deploy/ansible/roles/pxe_controller/templates/mount-local-disk.service.j2 +!deploy/ansible/roles/pxe_controller/templates/mount-local-disk.sh.j2 # Offline bundle artifacts auplc-bundle-*/ diff --git a/deploy/ansible/playbooks/pb-pxe-controller.yml b/deploy/ansible/playbooks/pb-pxe-controller.yml new file mode 100644 index 00000000..78313de3 --- /dev/null +++ b/deploy/ansible/playbooks/pb-pxe-controller.yml @@ -0,0 +1,93 @@ +# Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved. +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +--- +- name: Setup PXE Controller for diskless k3s agent boot + hosts: pxe_controller + become: true + gather_facts: true + vars: + # ── config.env equivalent ────────────────────────────── + pxe_rootfs_force_rebuild: false + # Network + pxe_network_interface: "ens18" + pxe_subnet: "192.168.1.0/24" + pxe_gateway: "192.168.1.254" + pxe_dns_servers: "8.8.8.8,8.8.4.4" + + # PXE Controller -- REQUIRED: replace with your service host IP + pxe_controller_ip: "" + + # k3s Server Nodes -- REQUIRED: replace with your k3s server node IP(s) + pxe_k3s_server_ips: + - "" + + # Root password for PXE-booted machines (leave empty to disable password login) + # Use ansible-vault: ansible-vault encrypt_string 'MyPass' --name 'pxe_rootfs_password' + pxe_rootfs_password: "" + + # SSH keys for rootfs login (at least one required -- fill in before running). + # The playbook fails fast if this list is empty. + pxe_rootfs_authorized_keys: [] + # - "ssh-ed25519 AAAA... you@workstation" + # - "ssh-rsa AAAA... ci@runner" + + # APT Mirror + pxe_apt_mirror: "http://tw.archive.ubuntu.com/ubuntu" + + pxe_rootfs_packages: + - linux-image-6.14.0-1018-oem + - linux-headers-6.14.0-1018-oem + - initramfs-tools + - linux-firmware + - nfs-common + - overlayroot + - openssh-server + - curl + - net-tools + - iproute2 + - systemd-sysv + - sudo + - kmod + - pciutils + - e2fsprogs + - dkms + - build-essential + - libatomic1 + - libquadmath0 + + pxe_initramfs_modules: + - nfs + - nfsv3 + - overlay + - r8125 + - amdgpu + # See pxe_initramfs_modules notes in role defaults: the nft_reject + # modules must come from the initramfs because, on this NFS+overlayroot + # rootfs, modprobe at runtime fails with ESTALE on modules.dep.bin + # whenever the PXE controller has rebuilt the rootfs underneath a + # running node. Pre-loading from initramfs avoids that path entirely. + - nft_reject_inet + - nft_reject_ipv4 + - nft_reject_ipv6 + + pxe_k3s_data_dir: "/var/lib/rancher/k3s" + + roles: + - role: pxe_controller diff --git a/deploy/ansible/roles/pxe_controller/defaults/main.yml b/deploy/ansible/roles/pxe_controller/defaults/main.yml new file mode 100644 index 00000000..4fd657e0 --- /dev/null +++ b/deploy/ansible/roles/pxe_controller/defaults/main.yml @@ -0,0 +1,138 @@ +# Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved. +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Role defaults -- override these in the playbook vars: section. +# Maps 1:1 to config.env from the shell scripts. + +--- +# ============================================================ +# Network +# ============================================================ +pxe_network_interface: "ens18" +pxe_subnet: "192.168.1.0/24" +pxe_gateway: "192.168.1.254" +pxe_dns_servers: "8.8.8.8,8.8.4.4" + +# ============================================================ +# PXE Controller +# ============================================================ +# REQUIRED -- set to your PXE controller / service host IP in the playbook. +# Left empty on purpose so site addresses are not baked into the repo; +# the role asserts this is set before running. +pxe_controller_ip: "" + +# ============================================================ +# k3s Server Nodes +# ============================================================ +# REQUIRED -- set to your k3s server node IP(s) in the playbook. +# Left empty on purpose; the role asserts the list is non-empty. +pxe_k3s_server_ips: [] + +# ============================================================ +# Credentials +# ============================================================ + +# Root password for rootfs (optional -- leave empty to disable password login) +# Use ansible-vault to encrypt: ansible-vault encrypt_string 'MyPass' --name 'pxe_rootfs_password' +pxe_rootfs_password: "" + +# SSH public keys injected into rootfs /root/.ssh/authorized_keys. +# At least one key is REQUIRED (playbook will fail without it). +# pxe_rootfs_authorized_keys: +# - "ssh-ed25519 AAAA... you@workstation" +# - "ssh-rsa AAAA... ci@runner" +pxe_rootfs_authorized_keys: [] + +# ============================================================ +# APT Mirror +# ============================================================ +pxe_apt_mirror: "http://tw.archive.ubuntu.com/ubuntu" + +# ============================================================ +# Rootfs lifecycle +# ============================================================ +# Force delete + rebuild rootfs from scratch (debootstrap) +pxe_rootfs_force_rebuild: true +# Run apt-get upgrade inside rootfs during chroot setup +pxe_rootfs_upgrade: false + +# ============================================================ +# Paths +# ============================================================ +pxe_nfs_root: "/srv/nfs/rootfs" +pxe_tftp_root: "/srv/tftp" +pxe_web_root: "/var/www/html" + +# ============================================================ +# Rootfs build settings +# ============================================================ +pxe_ubuntu_codename: "noble" + +pxe_rootfs_packages: + - linux-image-6.14.0-1018-oem + - linux-headers-6.14.0-1018-oem + - initramfs-tools + - linux-firmware + - nfs-common + - overlayroot + - openssh-server + - curl + - net-tools + - iproute2 + - systemd-sysv + - sudo + - kmod + - pciutils + - e2fsprogs + - dkms + - build-essential + - libatomic1 + - libquadmath0 + +pxe_initramfs_modules: + - nfs + - nfsv3 + - overlay + - r8125 + - amdgpu + # nftables `reject` modules. Required by Cloudflare WARP firewall rules. + # Loaded from the initramfs (BEFORE pivot_root) so they end up resident in + # kernel memory before the overlayroot+NFS rootfs is mounted. This sidesteps + # an overlayfs failure mode where, after the PXE controller rebuilds the + # NFS rootfs, the overlay's cached inode of /lib/modules//modules.dep.bin + # becomes ESTALE and modprobe stops working entirely on running nodes. + - nft_reject_inet + - nft_reject_ipv4 + - nft_reject_ipv6 + +pxe_k3s_data_dir: "/var/lib/rancher/k3s" + +# ============================================================ +# Host packages (installed on the PXE controller itself) +# ============================================================ +pxe_host_packages: + - dnsmasq + - pxelinux + - syslinux-common + - apache2 + - nfs-kernel-server + - debootstrap + - qemu-guest-agent + - grub-efi-amd64-signed + - shim-signed diff --git a/deploy/ansible/roles/pxe_controller/files/r8125-9.016.01.tar.bz2 b/deploy/ansible/roles/pxe_controller/files/r8125-9.016.01.tar.bz2 new file mode 100644 index 00000000..a5227097 Binary files /dev/null and b/deploy/ansible/roles/pxe_controller/files/r8125-9.016.01.tar.bz2 differ diff --git a/deploy/ansible/roles/pxe_controller/handlers/main.yml b/deploy/ansible/roles/pxe_controller/handlers/main.yml new file mode 100644 index 00000000..0e196e59 --- /dev/null +++ b/deploy/ansible/roles/pxe_controller/handlers/main.yml @@ -0,0 +1,39 @@ +# Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved. +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +--- +- name: Restart dnsmasq + ansible.builtin.systemd: + name: dnsmasq + state: restarted + daemon_reload: true + +- name: Restart nfs-kernel-server + ansible.builtin.systemd: + name: nfs-kernel-server + state: restarted + +- name: Restart apache2 + ansible.builtin.systemd: + name: apache2 + state: restarted + +- name: Reload NFS exports + ansible.builtin.command: exportfs -a + changed_when: true diff --git a/deploy/ansible/roles/pxe_controller/tasks/main.yml b/deploy/ansible/roles/pxe_controller/tasks/main.yml new file mode 100644 index 00000000..a9e5a235 --- /dev/null +++ b/deploy/ansible/roles/pxe_controller/tasks/main.yml @@ -0,0 +1,525 @@ +# Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved. +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +--- +# ========================================================== +# 0. Pre-flight validation of required site-specific values +# ========================================================== + +- name: Assert PXE controller IP is set + ansible.builtin.assert: + that: + - pxe_controller_ip | default('') | length > 0 + - "'<' not in (pxe_controller_ip | string)" + fail_msg: >- + pxe_controller_ip is not set. Set it to your PXE controller / service + host IP in the playbook vars (no value ships in the repo). + +- name: Assert k3s server node IPs are set + ansible.builtin.assert: + that: + - pxe_k3s_server_ips | default([]) | length > 0 + - pxe_k3s_server_ips | map('string') | select('match', '^<') | list | length == 0 + fail_msg: >- + pxe_k3s_server_ips is empty or still a placeholder. Set at least one + k3s server node IP in the playbook vars (no value ships in the repo). + +# ========================================================== +# 1. Install PXE controller host packages +# ========================================================== + +# - name: Install PXE controller packages +# ansible.builtin.apt: +# name: "{{ pxe_host_packages }}" +# state: present +# update_cache: true +# cache_valid_time: 3600 + +# ========================================================== +# 2. Build NFS rootfs with debootstrap +# ========================================================== + +- name: Stop NFS before rootfs rebuild + when: pxe_rootfs_force_rebuild | bool + ansible.builtin.systemd: + name: nfs-kernel-server + state: stopped + ignore_errors: true + +- name: Remove existing rootfs (force rebuild) + when: pxe_rootfs_force_rebuild | bool + ansible.builtin.shell: | + mountpoint -q {{ pxe_nfs_root }}/dev && umount {{ pxe_nfs_root }}/dev || true + mountpoint -q {{ pxe_nfs_root }}/sys && umount {{ pxe_nfs_root }}/sys || true + mountpoint -q {{ pxe_nfs_root }}/proc && umount {{ pxe_nfs_root }}/proc || true + rm -rf {{ pxe_nfs_root }} + changed_when: true + +- name: Check if NFS rootfs already exists + ansible.builtin.stat: + path: "{{ pxe_nfs_root }}/bin/bash" + register: _rootfs_exists + +- name: Build NFS rootfs + when: not _rootfs_exists.stat.exists + block: + - name: Create NFS root directory + ansible.builtin.file: + path: "{{ pxe_nfs_root }}" + state: directory + mode: "0755" + + - name: Run debootstrap to create base rootfs + ansible.builtin.command: + cmd: >- + debootstrap --arch=amd64 + {{ pxe_ubuntu_codename }} + {{ pxe_nfs_root }} + {{ pxe_apt_mirror }} + changed_when: true + +# ========================================================== +# 3. Deploy agent service files into rootfs +# ========================================================== + +- name: Create systemd directory in rootfs + ansible.builtin.file: + path: "{{ pxe_nfs_root }}/etc/systemd/system" + state: directory + mode: "0755" + +- name: Create bin directory in rootfs + ansible.builtin.file: + path: "{{ pxe_nfs_root }}/usr/local/bin" + state: directory + mode: "0755" + +- name: Deploy set-hostname service unit + ansible.builtin.template: + src: set-hostname.service.j2 + dest: "{{ pxe_nfs_root }}/etc/systemd/system/set-hostname.service" + mode: "0644" + +- name: Deploy set-hostname script + ansible.builtin.template: + src: set-hostname.sh.j2 + dest: "{{ pxe_nfs_root }}/usr/local/bin/set-hostname.sh" + mode: "0755" + +- name: Deploy k3s-auto-join service unit + ansible.builtin.template: + src: k3s-auto-join.service.j2 + dest: "{{ pxe_nfs_root }}/etc/systemd/system/k3s-auto-join.service" + mode: "0644" + +- name: Deploy k3s-auto-join script + ansible.builtin.template: + src: k3s-auto-join.sh.j2 + dest: "{{ pxe_nfs_root }}/usr/local/bin/k3s-auto-join.sh" + mode: "0755" + +- name: Deploy mount-local-disk service unit + ansible.builtin.template: + src: mount-local-disk.service.j2 + dest: "{{ pxe_nfs_root }}/etc/systemd/system/mount-local-disk.service" + mode: "0644" + +- name: Deploy mount-local-disk script + ansible.builtin.template: + src: mount-local-disk.sh.j2 + dest: "{{ pxe_nfs_root }}/usr/local/bin/mount-local-disk.sh" + mode: "0755" + +- name: Copy r8125 driver source tarball into rootfs + ansible.builtin.copy: + src: r8125-9.016.01.tar.bz2 + dest: "{{ pxe_nfs_root }}/tmp/r8125-9.016.01.tar.bz2" + mode: "0644" + +# ========================================================== +# 4. Configure initramfs modules in rootfs +# ========================================================== + +- name: Ensure initramfs modules directory exists + ansible.builtin.file: + path: "{{ pxe_nfs_root }}/etc/initramfs-tools" + state: directory + mode: "0755" + +- name: Add required modules to initramfs + ansible.builtin.lineinfile: + path: "{{ pxe_nfs_root }}/etc/initramfs-tools/modules" + line: "{{ item }}" + create: true + mode: "0644" + loop: "{{ pxe_initramfs_modules }}" + +# Eagerly load the nftables `reject` modules at boot. +# +# These modules are only needed if you later add an optional Cloudflare WARP +# egress proxy, which is OUT OF SCOPE for this base deployment (see the +# "Out Of Scope" section of the Multi-AIPC PXE deployment guide). WARP installs +# nftables rules using `ip protocol tcp reject with tcp reset`, provided by +# nft_reject_inet plus nf_reject_ipv4/ipv6. The .ko files already ship with +# linux-modules-, but on this overlayroot+tmpfs PXE rootfs udev +# demand-loading does not reliably fire, so we list them in modules-load.d for +# systemd-modules-load.service to load eagerly. Pre-loading is harmless when +# WARP is not used. modprobe pulls in nf_tables/nft_reject transitively, so +# listing the leaves is sufficient. +# +# This is deliberately written as a host-side task (not inside the chroot +# script) so it cannot perturb the r8125 DKMS build, which has shown to be +# sensitive to seemingly unrelated changes in the chroot environment. +- name: Ensure modules-load.d directory exists in rootfs + ansible.builtin.file: + path: "{{ pxe_nfs_root }}/etc/modules-load.d" + state: directory + mode: "0755" + +- name: Eagerly load nftables reject modules at boot (optional Cloudflare WARP egress) + ansible.builtin.copy: + dest: "{{ pxe_nfs_root }}/etc/modules-load.d/nf-reject.conf" + content: | + # Only needed by the optional Cloudflare WARP egress proxy, which is out + # of scope for the base deployment. Harmless to pre-load otherwise. + nft_reject_inet + nft_reject_ipv4 + nft_reject_ipv6 + mode: "0644" + +# ========================================================== +# 5. Run chroot setup (apt install, systemctl enable, initramfs) +# ========================================================== + +- name: Deploy chroot setup script + ansible.builtin.template: + src: chroot-setup.sh.j2 + dest: "{{ pxe_nfs_root }}/tmp/chroot-setup.sh" + mode: "0755" + +- name: Execute chroot setup + block: + - name: Mount virtual filesystems for chroot + ansible.builtin.shell: | + mountpoint -q {{ pxe_nfs_root }}/proc || mount --bind /proc {{ pxe_nfs_root }}/proc + mountpoint -q {{ pxe_nfs_root }}/sys || mount --bind /sys {{ pxe_nfs_root }}/sys + mountpoint -q {{ pxe_nfs_root }}/dev || mount --bind /dev {{ pxe_nfs_root }}/dev + changed_when: true + + - name: Run chroot setup script + ansible.builtin.command: + cmd: "chroot {{ pxe_nfs_root }} /tmp/chroot-setup.sh" + changed_when: true + register: _chroot_result + + always: + - name: Unmount virtual filesystems from chroot + ansible.builtin.shell: | + mountpoint -q {{ pxe_nfs_root }}/dev && umount {{ pxe_nfs_root }}/dev || true + mountpoint -q {{ pxe_nfs_root }}/sys && umount {{ pxe_nfs_root }}/sys || true + mountpoint -q {{ pxe_nfs_root }}/proc && umount {{ pxe_nfs_root }}/proc || true + changed_when: true + +- name: Remove chroot setup script + ansible.builtin.file: + path: "{{ pxe_nfs_root }}/tmp/chroot-setup.sh" + state: absent + +# ========================================================== +# 6. Copy kernel and initrd to TFTP +# ========================================================== + +- name: Find latest kernel in rootfs + ansible.builtin.shell: | + ls {{ pxe_nfs_root }}/boot/vmlinuz-* | sort -V | tail -1 | sed 's|.*/vmlinuz-||' + register: _kver + changed_when: false + +- name: Set kernel version fact + ansible.builtin.set_fact: + pxe_kernel_version: "{{ _kver.stdout }}" + +- name: Create TFTP directory structure + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: "0755" + loop: + - "{{ pxe_tftp_root }}" + - "{{ pxe_tftp_root }}/pxelinux.cfg" + - "{{ pxe_tftp_root }}/grub" + +- name: Copy kernel to TFTP root + ansible.builtin.copy: + src: "{{ pxe_nfs_root }}/boot/vmlinuz-{{ pxe_kernel_version }}" + dest: "{{ pxe_tftp_root }}/vmlinuz" + remote_src: true + mode: "0644" + +- name: Copy initrd to TFTP root + ansible.builtin.copy: + src: "{{ pxe_nfs_root }}/boot/initrd.img-{{ pxe_kernel_version }}" + dest: "{{ pxe_tftp_root }}/initrd.img" + remote_src: true + mode: "0644" + +- name: Copy pxelinux.0 bootloader + ansible.builtin.copy: + src: /usr/lib/PXELINUX/pxelinux.0 + dest: "{{ pxe_tftp_root }}/pxelinux.0" + remote_src: true + mode: "0644" + +- name: Copy syslinux menu modules + ansible.builtin.shell: | + cp /usr/lib/syslinux/modules/bios/ldlinux.c32 {{ pxe_tftp_root }}/ + cp /usr/lib/syslinux/modules/bios/menu.c32 {{ pxe_tftp_root }}/ + cp /usr/lib/syslinux/modules/bios/libutil.c32 {{ pxe_tftp_root }}/ + cp /usr/lib/syslinux/modules/bios/libcom32.c32 {{ pxe_tftp_root }}/ + changed_when: true + +- name: Copy GRUB EFI signed binary to TFTP root + ansible.builtin.copy: + src: /usr/lib/grub/x86_64-efi-signed/grubnetx64.efi.signed + dest: "{{ pxe_tftp_root }}/grubnetx64.efi" + remote_src: true + mode: "0644" + +# ========================================================== +# 7. Create PXE boot menus +# ========================================================== + +- name: Deploy BIOS PXE boot menu + ansible.builtin.template: + src: pxelinux-default.cfg.j2 + dest: "{{ pxe_tftp_root }}/pxelinux.cfg/default" + mode: "0644" + +- name: Deploy UEFI GRUB boot menu + ansible.builtin.template: + src: grub.cfg.j2 + dest: "{{ pxe_tftp_root }}/grub/grub.cfg" + mode: "0644" + +# ========================================================== +# 8. Configure NFS server +# ========================================================== + +- name: Add NFS export for rootfs + ansible.builtin.lineinfile: + path: /etc/exports + line: "{{ pxe_nfs_root }} {{ pxe_subnet }}(ro,sync,no_subtree_check,no_root_squash)" + regexp: "^{{ pxe_nfs_root | regex_escape }}\\s" + create: true + mode: "0644" + +- name: Re-export and restart NFS server + ansible.builtin.shell: | + exportfs -ra + systemctl restart nfs-kernel-server + changed_when: true + +- name: Enable NFS server + ansible.builtin.systemd: + name: nfs-kernel-server + enabled: true + +- name: Wait for NFS server to be ready + ansible.builtin.command: showmount -e localhost + register: _nfs_ready + retries: 5 + delay: 2 + until: _nfs_ready.rc == 0 + changed_when: false + +- name: Flush handlers for NFS + ansible.builtin.meta: flush_handlers + +- name: Verify NFS exports + ansible.builtin.command: showmount -e localhost + register: _nfs_exports + changed_when: false + +- name: Display NFS exports + ansible.builtin.debug: + var: _nfs_exports.stdout_lines + +# ========================================================== +# 9. Configure dnsmasq (Proxy DHCP + TFTP) +# ========================================================== + +- name: Deploy dnsmasq PXE configuration + ansible.builtin.template: + src: dnsmasq-pxe.conf.j2 + dest: /etc/dnsmasq.d/pxe.conf + mode: "0644" + notify: Restart dnsmasq + +- name: Enable and start dnsmasq + ansible.builtin.systemd: + name: dnsmasq + state: started + enabled: true + +- name: Flush handlers for dnsmasq + ansible.builtin.meta: flush_handlers + +- name: Verify TFTP boot files + ansible.builtin.stat: + path: "{{ pxe_tftp_root }}/{{ item }}" + loop: + - pxelinux.0 + - grubnetx64.efi + - vmlinuz + - initrd.img + - grub/grub.cfg + register: _tftp_files + +- name: Report TFTP boot file status + ansible.builtin.debug: + msg: "{{ item.item }}: {{ 'OK' if item.stat.exists else 'MISSING' }}" + loop: "{{ _tftp_files.results }}" + loop_control: + label: "{{ item.item }}" + +# ========================================================== +# 10. Inject SSH authorized keys into rootfs +# ========================================================== + +- name: Fail if no SSH keys provided + ansible.builtin.assert: + that: pxe_rootfs_authorized_keys | default([]) | length > 0 + fail_msg: "pxe_rootfs_authorized_keys is empty -- at least one SSH public key is required" + +- name: Create .ssh directory in rootfs + ansible.builtin.file: + path: "{{ pxe_nfs_root }}/root/.ssh" + state: directory + mode: "0700" + +- name: Inject authorized keys into rootfs + ansible.builtin.copy: + content: "{{ pxe_rootfs_authorized_keys | join('\n') }}\n" + dest: "{{ pxe_nfs_root }}/root/.ssh/authorized_keys" + mode: "0600" + +# ========================================================== +# 11. Prepare HTTP directory for k3s tokens +# ========================================================== + +- name: Create k3s token directory + ansible.builtin.file: + path: "{{ pxe_web_root }}/k3s" + state: directory + mode: "0755" + +- name: Deploy Apache ACL for k3s tokens + ansible.builtin.template: + src: k3s-acl.conf.j2 + dest: /etc/apache2/conf-available/k3s-acl.conf + mode: "0644" + notify: Restart apache2 + +- name: Enable k3s-acl Apache configuration + ansible.builtin.command: + cmd: a2enconf k3s-acl + register: _a2enconf + changed_when: "'Enabling' in _a2enconf.stdout" + notify: Restart apache2 + +- name: Enable and start Apache + ansible.builtin.systemd: + name: apache2 + state: started + enabled: true + +# ========================================================== +# 12. Verify PXE server status +# ========================================================== + +- name: Collect service status + ansible.builtin.service_facts: + +- name: Check critical services are active + ansible.builtin.assert: + that: + - ansible_facts.services[item + '.service'] is defined + - ansible_facts.services[item + '.service'].state in ['running', 'exited'] + - ansible_facts.services[item + '.service'].status == 'enabled' + fail_msg: "{{ item }} is NOT active (state: {{ ansible_facts.services[item + '.service'].state | default('unknown') }})" + success_msg: "{{ item }} is active ({{ ansible_facts.services[item + '.service'].state }})" + quiet: true + loop: + - dnsmasq + - apache2 + +- name: Check NFS export is active + ansible.builtin.command: showmount -e localhost + register: _verify_nfs + changed_when: false + +- name: Verify rootfs is exported + ansible.builtin.assert: + that: "pxe_nfs_root in _verify_nfs.stdout" + fail_msg: "NFS export for {{ pxe_nfs_root }} not found" + success_msg: "NFS export OK: {{ pxe_nfs_root }}" + +- name: Check TFTP boot files exist + ansible.builtin.stat: + path: "{{ pxe_tftp_root }}/{{ item }}" + loop: + - pxelinux.0 + - grubnetx64.efi + - vmlinuz + - initrd.img + - grub/grub.cfg + - pxelinux.cfg/default + register: _verify_tftp + +- name: Verify no TFTP files are missing + ansible.builtin.assert: + that: item.stat.exists + fail_msg: "MISSING: {{ item.item }}" + success_msg: "OK: {{ item.item }} ({{ item.stat.size | default(0) }} bytes)" + loop: "{{ _verify_tftp.results }}" + loop_control: + label: "{{ item.item }}" + +- name: Check HTTP token endpoint is reachable + ansible.builtin.uri: + url: "http://127.0.0.1/k3s/" + status_code: [200, 403] + register: _verify_http + +- name: Print PXE server summary + ansible.builtin.debug: + msg: | + ══════════════════════════════════════════ + PXE Controller ready on {{ pxe_controller_ip }} + ══════════════════════════════════════════ + dnsmasq : Proxy DHCP + TFTP on {{ pxe_network_interface }} + NFS : {{ pxe_nfs_root }} → {{ pxe_subnet }} + Apache : http://{{ pxe_controller_ip }}/k3s/ + Kernel : {{ pxe_kernel_version | default('unknown') }} + ────────────────────────────────────────── + Next steps (see the Multi-AIPC PXE deployment guide): + 1. Install the K3s server and configure kubectl + 2. Publish the K3s token + kubeconfig under {{ pxe_web_root }}/k3s/ + 3. Netboot the agents; they auto-join via k3s-auto-join.service + ══════════════════════════════════════════ diff --git a/deploy/ansible/roles/pxe_controller/templates/chroot-setup.sh.j2 b/deploy/ansible/roles/pxe_controller/templates/chroot-setup.sh.j2 new file mode 100644 index 00000000..b5900615 --- /dev/null +++ b/deploy/ansible/roles/pxe_controller/templates/chroot-setup.sh.j2 @@ -0,0 +1,125 @@ +#!/bin/bash +set -euo pipefail + +# -- Remove debootstrap legacy sources.list to avoid duplicates -- +rm -f /etc/apt/sources.list + +# -- APT sources (DEB822 format) -- +cat > /etc/apt/sources.list.d/ubuntu.sources << 'APT_EOF' +Types: deb +URIs: {{ pxe_apt_mirror }} +Suites: {{ pxe_ubuntu_codename }} {{ pxe_ubuntu_codename }}-updates {{ pxe_ubuntu_codename }}-security +Components: main restricted universe multiverse +Signed-By: /usr/share/keyrings/ubuntu-archive-keyring.gpg +APT_EOF + +# -- DNS (needed for apt and runtime) -- +cat > /etc/resolv.conf << 'DNS_EOF' +{% for ns in pxe_dns_servers.split(',') %} +nameserver {{ ns | trim }} +{% endfor %} +DNS_EOF + +# -- Install packages -- +export DEBIAN_FRONTEND=noninteractive +apt-get update +apt-get install -y --no-install-recommends \ +{% for pkg in pxe_rootfs_packages %} + {{ pkg }}{{ '' if loop.last else ' \\' }} +{% endfor %} + +{% if pxe_rootfs_upgrade | default(false) | bool %} +apt-get dist-upgrade -y +{% endif %} + +apt-get clean +rm -rf /var/lib/apt/lists/* + +# -- Root credentials -- +{% if pxe_rootfs_password | default('') | length > 0 %} +echo "root:{{ pxe_rootfs_password }}" | chpasswd +{% else %} +passwd -l root +{% endif %} + +# -- Allow root SSH login -- +mkdir -p /etc/ssh/sshd_config.d +{% if pxe_rootfs_password | default('') | length > 0 %} +echo "PermitRootLogin yes" > /etc/ssh/sshd_config.d/allow-root.conf +{% else %} +echo "PermitRootLogin prohibit-password" > /etc/ssh/sshd_config.d/allow-root.conf +{% endif %} + +# -- RTL8125 2.5GbE driver (build from vendor source) -- +KVER=$(ls /lib/modules/) +apt-get remove -y r8125-dkms 2>/dev/null || true +dkms remove r8125/9.016.01 --all 2>/dev/null || true +rm -rf /usr/src/r8125-9.016.01 +tar xjf /tmp/r8125-9.016.01.tar.bz2 -C /usr/src/ +cat > /usr/src/r8125-9.016.01/dkms.conf << 'DKMSEOF' +PACKAGE_NAME="r8125" +PACKAGE_VERSION="9.016.01" +BUILT_MODULE_NAME[0]="r8125" +BUILT_MODULE_LOCATION[0]="src/" +DEST_MODULE_LOCATION[0]="/kernel/drivers/net/ethernet/realtek/" +AUTOINSTALL="yes" +MAKE="'make' -j$(nproc) -C src/ KERNELDIR=/lib/modules/${kernelver}/build" +CLEAN="'make' -C src/ clean" +DKMSEOF +dkms add -m r8125 -v 9.016.01 +dkms build -m r8125 -v 9.016.01 -k "$KVER" +dkms install -m r8125 -v 9.016.01 -k "$KVER" +dkms status +rm -f /tmp/r8125-9.016.01.tar.bz2 + +# -- Blacklist r8169 so the DKMS r8125 driver claims RTL8125 devices -- +echo "blacklist r8169" > /etc/modprobe.d/blacklist-r8169.conf + +# -- dhcpcd: longer timeout for 2.5GbE link negotiation + switch STP -- +cat > /etc/dhcpcd.conf << 'DHCPCD_EOF' +timeout 60 +reboot 5 +noarp +DHCPCD_EOF + +cat > /etc/initramfs-tools/hooks/pxe-net-tweaks << 'HOOK' +#!/bin/sh +PREREQ="" +prereqs() { echo "$PREREQ"; } +case "$1" in prereqs) prereqs; exit 0;; esac +. /usr/share/initramfs-tools/hook-functions +mkdir -p "${DESTDIR}/etc/modprobe.d" +cp /etc/modprobe.d/blacklist-r8169.conf "${DESTDIR}/etc/modprobe.d/" +mkdir -p "${DESTDIR}/etc" +cp /etc/dhcpcd.conf "${DESTDIR}/etc/dhcpcd.conf" +HOOK +chmod +x /etc/initramfs-tools/hooks/pxe-net-tweaks + +# -- GPU udev rules (let containers access AMD GPUs) -- +tee /etc/udev/rules.d/70-amdgpu.rules << RULES +KERNEL=="kfd", MODE="0666" +KERNEL=="renderD[0-9]*", MODE="0666" +RULES + +# -- Disable systemd-networkd (kernel ip=dhcp handles NFS root networking) -- +rm -f /etc/netplan/*.yaml +systemctl disable systemd-networkd 2>/dev/null || true +systemctl disable systemd-networkd-wait-online 2>/dev/null || true +echo "nameserver 8.8.8.8" > /etc/resolv.conf + +# -- overlayroot: make rootfs read-only with tmpfs writable layer -- +echo 'overlayroot="tmpfs:swap=1"' > /etc/overlayroot.conf + +# -- Clear hostname inherited from PXE controller build host -- +echo "localhost" > /etc/hostname + +# -- Install k3s agent binary (skip start, skip enable) -- +curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_SKIP_ENABLE=true sh -s - agent + +# -- Enable boot-time services -- +systemctl enable set-hostname.service +systemctl enable k3s-auto-join.service +systemctl enable mount-local-disk.service + +# -- Rebuild initramfs with NFS/overlay/driver modules -- +update-initramfs -u diff --git a/deploy/ansible/roles/pxe_controller/templates/dnsmasq-pxe.conf.j2 b/deploy/ansible/roles/pxe_controller/templates/dnsmasq-pxe.conf.j2 new file mode 100644 index 00000000..e266ebb1 --- /dev/null +++ b/deploy/ansible/roles/pxe_controller/templates/dnsmasq-pxe.conf.j2 @@ -0,0 +1,25 @@ +port=0 + +interface={{ pxe_network_interface }} +bind-interfaces + +dhcp-range={{ pxe_subnet | regex_replace('/.*', '') }},proxy + +# UEFI client detection (client-arch: 6=IA32, 7=BC/EFI, 9=x86-64) +dhcp-match=set:efi-x86_64,option:client-arch,6 +dhcp-match=set:efi-x86_64,option:client-arch,7 +dhcp-match=set:efi-x86_64,option:client-arch,9 + +dhcp-boot=tag:efi-x86_64,grubnetx64.efi,,{{ pxe_controller_ip }} +dhcp-boot=tag:!efi-x86_64,pxelinux.0,,{{ pxe_controller_ip }} + +pxe-service=x86PC,"Diskless Boot",pxelinux +pxe-service=IA32_EFI,"Diskless Boot",grubnetx64.efi +pxe-service=BC_EFI,"Diskless Boot",grubnetx64.efi +pxe-service=x86-64_EFI,"Diskless Boot",grubnetx64.efi + +enable-tftp +tftp-root={{ pxe_tftp_root }} + +log-dhcp +log-queries diff --git a/deploy/ansible/roles/pxe_controller/templates/grub.cfg.j2 b/deploy/ansible/roles/pxe_controller/templates/grub.cfg.j2 new file mode 100644 index 00000000..2f9d618d --- /dev/null +++ b/deploy/ansible/roles/pxe_controller/templates/grub.cfg.j2 @@ -0,0 +1,11 @@ +set default=0 +set timeout=5 + +menuentry "Diskless Boot (NFS root + overlayfs)" { + linux vmlinuz root=/dev/nfs nfsroot={{ pxe_controller_ip }}:{{ pxe_nfs_root }},ro,vers=3 ip=dhcp rootdelay=10 rw udev.event_timeout=10 + initrd initrd.img +} + +menuentry "Boot from local disk" { + exit +} diff --git a/deploy/ansible/roles/pxe_controller/templates/k3s-acl.conf.j2 b/deploy/ansible/roles/pxe_controller/templates/k3s-acl.conf.j2 new file mode 100644 index 00000000..d46d88a2 --- /dev/null +++ b/deploy/ansible/roles/pxe_controller/templates/k3s-acl.conf.j2 @@ -0,0 +1,4 @@ + + Require ip {{ pxe_subnet }} + Require ip 127.0.0.1 + diff --git a/deploy/ansible/roles/pxe_controller/templates/k3s-auto-join.service.j2 b/deploy/ansible/roles/pxe_controller/templates/k3s-auto-join.service.j2 new file mode 100644 index 00000000..60475c3f --- /dev/null +++ b/deploy/ansible/roles/pxe_controller/templates/k3s-auto-join.service.j2 @@ -0,0 +1,13 @@ +[Unit] +Description=Auto-join k3s cluster +After=network-online.target set-hostname.service mount-local-disk.service +Wants=network-online.target +Requires=set-hostname.service + +[Service] +Type=oneshot +ExecStart=/usr/local/bin/k3s-auto-join.sh +RemainAfterExit=yes + +[Install] +WantedBy=multi-user.target diff --git a/deploy/ansible/roles/pxe_controller/templates/k3s-auto-join.sh.j2 b/deploy/ansible/roles/pxe_controller/templates/k3s-auto-join.sh.j2 new file mode 100644 index 00000000..64f4c8a2 --- /dev/null +++ b/deploy/ansible/roles/pxe_controller/templates/k3s-auto-join.sh.j2 @@ -0,0 +1,87 @@ +#!/bin/bash +set -euo pipefail + +PXE_SERVER="{{ pxe_controller_ip }}" +K3S_SERVERS="{{ pxe_k3s_server_ips | join(' ') }}" +TOKEN_URL="http://${PXE_SERVER}/k3s/token" +KUBECONFIG_URL="http://${PXE_SERVER}/k3s/kubeconfig" +K3S_DATA="{{ pxe_k3s_data_dir }}" + +HOSTNAME=$(hostname) +NEED_JOIN=true + +find_active_server() { + for srv in $K3S_SERVERS; do + if curl -sf --connect-timeout 3 -k "https://${srv}:6443/ping" &>/dev/null; then + echo "$srv" + return + fi + done + echo "$K3S_SERVERS" | awk '{print $1}' +} + +node_exists_in_cluster() { + local kc="/tmp/admin-kubeconfig" + curl -sf "$KUBECONFIG_URL" -o "$kc" 2>/dev/null || return 1 + k3s kubectl --kubeconfig="$kc" get node "$HOSTNAME" &>/dev/null + local rc=$? + rm -f "$kc" + return $rc +} + +# ── Persist node password on SSD (volatile rootfs loses /etc on reboot) ── +NODE_PWD_PERSIST="${K3S_DATA}/node-password" +mkdir -p "$(dirname "$NODE_PWD_PERSIST")" +mkdir -p /etc/rancher/node +if [ -f "$NODE_PWD_PERSIST" ]; then + cp "$NODE_PWD_PERSIST" /etc/rancher/node/password + chmod 600 /etc/rancher/node/password + echo "Restored node password from SSD" +fi + +# ── Check if this node already exists in the cluster ── +if node_exists_in_cluster; then + echo "Node ${HOSTNAME} already exists in cluster, skipping join." + NEED_JOIN=false +else + echo "Node ${HOSTNAME} not found in cluster, will join." +fi + +# ── Always fetch token and write config (volatile rootfs loses /etc on reboot) ── + +TOKEN="" +for i in $(seq 1 30); do + TOKEN=$(curl -sf "$TOKEN_URL" 2>/dev/null) || true + [ -n "$TOKEN" ] && break + echo "Waiting for k3s token... ($i/30)" + sleep 5 +done +[ -z "$TOKEN" ] && { echo "ERROR: Failed to get k3s token after 30 attempts"; exit 1; } + +K3S_SERVER=$(find_active_server) + +mkdir -p /etc/rancher/k3s +cat > /etc/rancher/k3s/config.yaml << CONF +server: https://${K3S_SERVER}:6443 +token: ${TOKEN} +CONF + +# ── Only clean agent state when actually joining as a new node ── +if [ "$NEED_JOIN" = true ]; then + if [ -d "$K3S_DATA/agent/etc" ]; then + echo "Cleaning old agent state (keeping containerd cache)" + rm -rf "$K3S_DATA/agent/etc" "$K3S_DATA/agent/client-"* \ + "$K3S_DATA/agent/server-"* "$K3S_DATA/agent/"*.kubeconfig 2>/dev/null || true + fi + rm -f "$NODE_PWD_PERSIST" + echo "Joining k3s server: ${K3S_SERVER}" +fi + +systemctl enable --now k3s-agent + +# ── After agent starts, persist the node password for next boot ── +if [ ! -f "$NODE_PWD_PERSIST" ] && [ -f /etc/rancher/node/password ]; then + cp /etc/rancher/node/password "$NODE_PWD_PERSIST" + chmod 600 "$NODE_PWD_PERSIST" + echo "Saved node password to SSD" +fi diff --git a/deploy/ansible/roles/pxe_controller/templates/mount-local-disk.service.j2 b/deploy/ansible/roles/pxe_controller/templates/mount-local-disk.service.j2 new file mode 100644 index 00000000..dd5bb976 --- /dev/null +++ b/deploy/ansible/roles/pxe_controller/templates/mount-local-disk.service.j2 @@ -0,0 +1,12 @@ +[Unit] +Description=Mount local disk for k3s container image cache +Before=k3s-auto-join.service +After=local-fs.target + +[Service] +Type=oneshot +ExecStart=/usr/local/bin/mount-local-disk.sh +RemainAfterExit=yes + +[Install] +WantedBy=multi-user.target diff --git a/deploy/ansible/roles/pxe_controller/templates/mount-local-disk.sh.j2 b/deploy/ansible/roles/pxe_controller/templates/mount-local-disk.sh.j2 new file mode 100644 index 00000000..15f104ba --- /dev/null +++ b/deploy/ansible/roles/pxe_controller/templates/mount-local-disk.sh.j2 @@ -0,0 +1,25 @@ +#!/bin/bash +set -euo pipefail + +K3S_DATA="{{ pxe_k3s_data_dir }}" + +DISK="" +for dev in /dev/sda /dev/vda /dev/nvme0n1; do + [ -b "$dev" ] && { DISK="$dev"; break; } +done + +if [ -z "$DISK" ]; then + echo "No local disk found, using tmpfs for k3s data" + mkdir -p "$K3S_DATA" + mount -t tmpfs -o size=4G tmpfs "$K3S_DATA" + exit 0 +fi + +if ! blkid "$DISK" | grep -q 'TYPE="ext4"'; then + echo "Formatting $DISK as ext4" + mkfs.ext4 -F -L k3s-cache "$DISK" +fi + +mkdir -p "$K3S_DATA" +mount "$DISK" "$K3S_DATA" +echo "Mounted $DISK to $K3S_DATA" diff --git a/deploy/ansible/roles/pxe_controller/templates/pxelinux-default.cfg.j2 b/deploy/ansible/roles/pxe_controller/templates/pxelinux-default.cfg.j2 new file mode 100644 index 00000000..ad8f250d --- /dev/null +++ b/deploy/ansible/roles/pxe_controller/templates/pxelinux-default.cfg.j2 @@ -0,0 +1,15 @@ +DEFAULT menu.c32 +PROMPT 0 +TIMEOUT 50 +MENU TITLE K3s Cluster - Diskless PXE Boot + +LABEL diskless + MENU LABEL Diskless Boot (NFS root + overlayfs) + MENU DEFAULT + KERNEL vmlinuz + INITRD initrd.img + APPEND root=/dev/nfs nfsroot={{ pxe_controller_ip }}:{{ pxe_nfs_root }},ro,vers=3 ip=dhcp rootdelay=10 rw udev.event_timeout=10 + +LABEL local + MENU LABEL Boot from local disk + LOCALBOOT 0 diff --git a/deploy/ansible/roles/pxe_controller/templates/set-hostname.service.j2 b/deploy/ansible/roles/pxe_controller/templates/set-hostname.service.j2 new file mode 100644 index 00000000..09be5c14 --- /dev/null +++ b/deploy/ansible/roles/pxe_controller/templates/set-hostname.service.j2 @@ -0,0 +1,12 @@ +[Unit] +Description=Set hostname based on MAC address +After=network-online.target +Wants=network-online.target + +[Service] +Type=oneshot +ExecStart=/usr/local/bin/set-hostname.sh +RemainAfterExit=yes + +[Install] +WantedBy=multi-user.target diff --git a/deploy/ansible/roles/pxe_controller/templates/set-hostname.sh.j2 b/deploy/ansible/roles/pxe_controller/templates/set-hostname.sh.j2 new file mode 100644 index 00000000..0a86f348 --- /dev/null +++ b/deploy/ansible/roles/pxe_controller/templates/set-hostname.sh.j2 @@ -0,0 +1,7 @@ +#!/bin/bash +IFACE=$(ip route show default 2>/dev/null | awk '/default/ {print $5}' | head -1) +if [ -z "$IFACE" ]; then + IFACE=$(ls /sys/class/net/ | grep -v lo | head -1) +fi +MAC=$(cat /sys/class/net/$IFACE/address | tr ':' '-') +hostnamectl set-hostname "agent-${MAC}"