Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ model within the kernel context:
* Full task management including both UP and SMP scheduling via EEVDF and task
migration via IPIs.
* Capable of running dynamically linked ELF binaries from Arch Linux.
* Currently implements [105 Linux syscalls](./etc/syscalls_linux_aarch64.md)
* Currently implements [109 Linux syscalls](./etc/syscalls_linux_aarch64.md)
* `fork()`, `execve()`, `clone()`, and full process lifecycle management.
* Job control support (process groups, waitpid, background tasks).
* Signal delivery, masking, and propagation (SIGTERM, SIGSTOP, SIGCONT, SIGCHLD,
Expand Down Expand Up @@ -172,7 +172,7 @@ moss is under active development. Current focus areas include:

* Networking Stack: TCP/IP implementation.
* A fully read/write capable filesystem driver.
* Expanding coverage beyond the current 105 calls.
* Expanding coverage beyond the current 109 calls.
* systemd bringup.

## Non-Goals (for now)
Expand Down
8 changes: 4 additions & 4 deletions etc/syscalls_linux_aarch64.md
Original file line number Diff line number Diff line change
Expand Up @@ -304,14 +304,14 @@
| 0x1be (446) | landlock_restrict_self | (const int ruleset_fd, const __u32 flags) | __arm64_sys_landlock_restrict_self | false |
| 0x1bf (447) | memfd_secret | (unsigned int flags) | __arm64_sys_memfd_secret | false |
| 0x1c0 (448) | process_mrelease | (int pidfd, unsigned int flags) | __arm64_sys_process_mrelease | false |
| 0x1c1 (449) | futex_waitv | (struct futex_waitv *waiters, unsigned int nr_futexes, unsigned int flags, struct __kernel_timespec *timeout, clockid_t clockid) | __arm64_sys_futex_waitv | false |
| 0x1c1 (449) | futex_waitv | (struct futex_waitv *waiters, unsigned int nr_futexes, unsigned int flags, struct __kernel_timespec *timeout, clockid_t clockid) | __arm64_sys_futex_waitv | true |
| 0x1c2 (450) | set_mempolicy_home_node | (unsigned long start, unsigned long len, unsigned long home_node, unsigned long flags) | __arm64_sys_set_mempolicy_home_node | false |
| 0x1c3 (451) | cachestat | (unsigned int fd, struct cachestat_range *cstat_range, struct cachestat *cstat, unsigned int flags) | __arm64_sys_cachestat | false |
| 0x1c4 (452) | fchmodat2 | (int dfd, const char *filename, umode_t mode, unsigned int flags) | __arm64_sys_fchmodat2 | false |
| 0x1c5 (453) | map_shadow_stack | (unsigned long addr, unsigned long size, unsigned int flags) | __arm64_sys_map_shadow_stack | false |
| 0x1c6 (454) | futex_wake | (void *uaddr, unsigned long mask, int nr, unsigned int flags) | __arm64_sys_futex_wake | false |
| 0x1c7 (455) | futex_wait | (void *uaddr, unsigned long val, unsigned long mask, unsigned int flags, struct __kernel_timespec *timeout, clockid_t clockid) | __arm64_sys_futex_wait | false |
| 0x1c8 (456) | futex_requeue | (struct futex_waitv *waiters, unsigned int flags, int nr_wake, int nr_requeue) | __arm64_sys_futex_requeue | false |
| 0x1c6 (454) | futex_wake | (void *uaddr, unsigned long mask, int nr, unsigned int flags) | __arm64_sys_futex_wake | true |
| 0x1c7 (455) | futex_wait | (void *uaddr, unsigned long val, unsigned long mask, unsigned int flags, struct __kernel_timespec *timeout, clockid_t clockid) | __arm64_sys_futex_wait | true |
| 0x1c8 (456) | futex_requeue | (struct futex_waitv *waiters, unsigned int flags, int nr_wake, int nr_requeue) | __arm64_sys_futex_requeue | true |
| 0x1c9 (457) | statmount | (const struct mnt_id_req *req, struct statmount *buf, size_t bufsize, unsigned int flags) | __arm64_sys_statmount | false |
| 0x1ca (458) | listmount | (const struct mnt_id_req *req, u64 *mnt_ids, size_t nr_mnt_ids, unsigned int flags) | __arm64_sys_listmount | false |
| 0x1cb (459) | lsm_get_self_attr | (unsigned int attr, struct lsm_ctx *ctx, u32 *size, u32 flags) | __arm64_sys_lsm_get_self_attr | false |
Expand Down
79 changes: 78 additions & 1 deletion libkernel/src/sync/waker_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,40 @@ impl<T> WakerSet<T> {
}
}

/// Removes and returns the first (lowest-token, i.e. FIFO) entry whose
/// data matches `predicate`, without waking it.
pub fn take_if(&mut self, predicate: impl Fn(&T) -> bool) -> Option<(Waker, T)> {
let key = self
.waiters
.iter()
.find(|(_, (_, data))| predicate(data))
.map(|(key, _)| *key)?;

self.waiters.remove(&key)
}

/// Removes and returns the first (lowest-token, i.e. FIFO) entry, without
/// waking it.
pub fn take_first(&mut self) -> Option<(Waker, T)> {
self.waiters.pop_first().map(|(_, entry)| entry)
}

/// Returns `true` if no wakers are registered.
pub fn is_empty(&self) -> bool {
self.waiters.is_empty()
}

/// Registers a waker together with associated data, returning its token.
pub fn register_with_data(&mut self, waker: &Waker, data: T) -> u64 {
self.insert(waker.clone(), data)
}

/// Inserts an already-owned waker with associated data, returning its
/// token.
pub fn insert(&mut self, waker: Waker, data: T) -> u64 {
let id = self.allocate_id();

self.waiters.insert(id, (waker.clone(), data));
self.waiters.insert(id, (waker, data));

id
}
Expand Down Expand Up @@ -193,6 +222,54 @@ where
}
}

#[cfg(test)]
mod waker_set_tests {
use super::*;

fn set_with_data(data: &[u32]) -> WakerSet<u32> {
let mut set = WakerSet::new();
for &d in data {
set.insert(Waker::noop().clone(), d);
}
set
}

#[test]
fn take_if_removes_first_match_in_fifo_order() {
let mut set = set_with_data(&[0b01, 0b10, 0b11]);

let (_, data) = set.take_if(|d| d & 0b10 != 0).unwrap();
assert_eq!(data, 0b10);

let (_, data) = set.take_if(|d| d & 0b10 != 0).unwrap();
assert_eq!(data, 0b11);

assert!(set.take_if(|d| d & 0b10 != 0).is_none());
assert!(!set.is_empty());
}

#[test]
fn take_first_is_fifo() {
let mut set = set_with_data(&[1, 2, 3]);

assert_eq!(set.take_first().unwrap().1, 1);
assert_eq!(set.take_first().unwrap().1, 2);
assert_eq!(set.take_first().unwrap().1, 3);
assert!(set.take_first().is_none());
assert!(set.is_empty());
}

#[test]
fn insert_then_remove_by_token() {
let mut set = WakerSet::new();
let token = set.insert(Waker::noop().clone(), 7u32);

assert!(set.contains_token(token));
set.remove(token);
assert!(set.is_empty());
}
}

#[cfg(test)]
mod wait_until_tests {
use super::*;
Expand Down
40 changes: 39 additions & 1 deletion src/arch/arm64/exceptions/syscall.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,11 @@ use crate::{
umask::sys_umask,
wait::{sys_wait4, sys_waitid},
},
threading::{futex::sys_futex, sys_set_robust_list, sys_set_tid_address},
threading::{
futex::futex2::{sys_futex_requeue, sys_futex_wait, sys_futex_waitv, sys_futex_wake},
futex::sys_futex,
sys_set_robust_list, sys_set_tid_address,
},
},
sched::{
self,
Expand Down Expand Up @@ -824,6 +828,40 @@ pub async fn handle_syscall(mut ctx: ProcessCtx) {
.await
}
0x1b8 => Ok(0), // process_madvise is a no-op
0x1c1 => {
sys_futex_waitv(
&ctx,
TUA::from_value(arg1 as _),
arg2 as _,
arg3 as _,
TUA::from_value(arg4 as _),
arg5 as _,
)
.await
}
0x1c6 => sys_futex_wake(&ctx, arg1, arg2, arg3 as _, arg4 as _),
0x1c7 => {
sys_futex_wait(
&ctx,
arg1,
arg2,
arg3,
arg4 as _,
TUA::from_value(arg5 as _),
arg6 as _,
)
.await
}
0x1c8 => {
sys_futex_requeue(
&ctx,
TUA::from_value(arg1 as _),
arg2 as _,
arg3 as _,
arg4 as _,
)
.await
}
_ => panic!(
"Unhandled syscall 0x{nr:x}, PC: 0x{:x}",
ctx.task().ctx.user().elr_el1
Expand Down
80 changes: 80 additions & 0 deletions src/clock/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,86 @@ pub mod syscalls;
pub mod timer;
pub mod timespec;

use core::time::Duration;

use futures::FutureExt;

use crate::drivers::timer::{sleep, uptime};
use realtime::{clock_set_generation, clock_was_set_since, date};

/// An absolute deadline expressed against a particular clock.
///
/// Keeping the clock alongside the deadline (rather than pre-flattening to a
/// relative duration) lets [`Deadline::sleep`] re-evaluate against the live
/// clock, so a `CLOCK_REALTIME` deadline still fires at the right wall-clock
/// instant even if the clock is stepped (e.g. by `clock_settime`) while a
/// wait is in progress.
#[derive(Clone, Copy)]
pub enum Deadline {
/// Absolute instant on the monotonic clock (`CLOCK_MONOTONIC`).
Monotonic(Duration),
/// Absolute instant on the realtime clock (`CLOCK_REALTIME`).
Realtime(Duration),
}

impl Deadline {
/// The clock's current reading.
fn clock_now(self) -> Duration {
match self {
Deadline::Monotonic(_) => uptime(),
Deadline::Realtime(_) => date(),
}
}

/// The absolute deadline value.
fn target(self) -> Duration {
match self {
Deadline::Monotonic(d) | Deadline::Realtime(d) => d,
}
}

/// Sleeps until this deadline.
///
/// The monotonic clock advances uniformly, so a single relative sleep is
/// exact. The realtime clock can be stepped by `clock_settime`, so a
/// realtime wait races the timer against a clock-was-set notification: on
/// either it re-evaluates the deadline against the live clock and re-arms
/// if the target has not yet been reached. This retargets an in-progress
/// wait in both directions across a step.
pub async fn sleep(self) {
loop {
let now = self.clock_now();
let target = self.target();

if now >= target {
return;
}

let remaining = target - now;

match self {
// The monotonic clock never steps, so one relative sleep is
// exact.
Deadline::Monotonic(_) => {
sleep(remaining).await;
return;
}
// A realtime step (in either direction) wakes the notifier;
// loop to re-evaluate against the new wall time.
Deadline::Realtime(_) => {
let generation = clock_set_generation();
let mut timer = core::pin::pin!(sleep(remaining).fuse());
let mut was_set = core::pin::pin!(clock_was_set_since(generation).fuse());
futures::select_biased! {
_ = timer => {}
_ = was_set => {}
}
}
}
}
}
}

pub enum ClockId {
Realtime = 0,
Monotonic = 1,
Expand Down
54 changes: 53 additions & 1 deletion src/clock/realtime.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
use crate::{
drivers::timer::{Instant, now, uptime},
sync::SpinLock,
sync::{OnceLock, SpinLock},
};
use core::future::poll_fn;
use core::task::Poll;
use core::time::Duration;
use libkernel::sync::waker_set::WakerSet;

// Return a duration from the epoch.
pub fn date() -> Duration {
Expand All @@ -23,11 +26,60 @@ pub fn set_date(duration: Duration) {
let mut epoch_info = EPOCH_DURATION.lock_save_irq();
*epoch_info = Some((duration, now));
}

// The realtime clock was stepped; wake anyone sleeping against an absolute
// realtime deadline so they can re-evaluate (and re-arm) against the new
// wall time.
let mut waiters = clock_set_waiters().lock_save_irq();
*CLOCK_SET_GEN.lock_save_irq() += 1;
waiters.wake_all();
}

// Represents a known duration since the epoch at the associated instant.
static EPOCH_DURATION: SpinLock<Option<(Duration, Instant)>> = SpinLock::new(None);

/// Tasks waiting to be notified when the realtime clock is stepped.
static CLOCK_SET_WAITERS: OnceLock<SpinLock<WakerSet>> = OnceLock::new();

fn clock_set_waiters() -> &'static SpinLock<WakerSet> {
CLOCK_SET_WAITERS.get_or_init(|| SpinLock::new(WakerSet::new()))
}

/// Bumped on every realtime clock step, so a waiter can detect a step that
/// happened between checking the clock and parking (closing the lost-wakeup
/// race).
static CLOCK_SET_GEN: SpinLock<u64> = SpinLock::new(0);

/// The current clock-set generation. Sample this before reading the clock, and
/// pass it to [`clock_was_set_since`] to wait for the next step.
pub fn clock_set_generation() -> u64 {
*CLOCK_SET_GEN.lock_save_irq()
}

/// Resolves once the realtime clock is stepped after `generation` was sampled.
/// If a step already happened since `generation`, returns immediately.
pub async fn clock_was_set_since(generation: u64) {
let mut registered = false;

poll_fn(|cx| {
// Register before re-checking the generation so a step that races our
// poll cannot be missed.
let mut waiters = clock_set_waiters().lock_save_irq();

if *CLOCK_SET_GEN.lock_save_irq() != generation {
return Poll::Ready(());
}

if !registered {
waiters.register(cx.waker());
registered = true;
}

Poll::Pending
})
.await;
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
Loading
Loading