huggingface · KirtiRamchandani · Jun 14, 2026
diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py
@@ -1847,6 +1847,7 @@ def save_to_disk(
             # if we have only a few large samples, we should only create as many shards as samples
             num_shards = min(len(self.data), num_shards)
 
+        dataset_path = str(dataset_path)
         fs: fsspec.AbstractFileSystem
         fs, _ = url_to_fs(dataset_path, **(storage_options or {}))
 
@@ -2018,6 +2019,7 @@ def load_from_disk(
         >>> ds = load_from_disk("path/to/dataset/directory")
         ```
         """
+        dataset_path = str(dataset_path)
         fs: fsspec.AbstractFileSystem
         fs, dataset_path = url_to_fs(dataset_path, **(storage_options or {}))
 

diff --git a/tests/test_arrow_dataset.py b/tests/test_arrow_dataset.py
@@ -329,6 +329,13 @@ def test_dummy_dataset_serialize(self, in_memory):
                 self.assertEqual(dset[0]["filename"], "my_name-train_0")
                 self.assertEqual(dset["filename"][0], "my_name-train_0")
 
+            with self._create_dummy_dataset(in_memory, tmp_dir).select(range(10)) as dset:
+                dataset_path = Path(tmp_dir) / "my_dataset_pathlib"
+                dset.save_to_disk(dataset_path)
+
+            with Dataset.load_from_disk(dataset_path) as dset:
+                self.assertEqual(len(dset), 10)
+
             with self._create_dummy_dataset(in_memory, tmp_dir).select(
                 range(10), indices_cache_file_name=os.path.join(tmp_dir, "ind.arrow")
             ) as dset: