Skip to content

Commit 45a23c7

Browse files
committed
added powersort in sorts/power_sort.py
1 parent a71618f commit 45a23c7

2 files changed

Lines changed: 354 additions & 0 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,4 @@ venv.bak/
108108
.try
109109
.vscode/
110110
.vs/
111+
.cursor/

sorts/power_sort.py

Lines changed: 353 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,353 @@
1+
"""
2+
PowerSort - An adaptive merge sort algorithm.
3+
4+
PowerSort is an adaptive, stable sorting algorithm that efficiently handles
5+
partially ordered data by optimally merging existing runs (consecutive sequences
6+
of sorted elements) in the input. It was developed by J. Ian Munro and Sebastian
7+
Wild and has been integrated into Python's standard library since version 3.11.
8+
9+
The algorithm works by:
10+
1. Detecting naturally occurring runs (ascending or descending sequences)
11+
2. Using a power-based merge strategy to determine optimal merge order
12+
3. Maintaining a stack of runs and merging based on calculated node powers
13+
14+
Time Complexity: O(n log n) worst case, O(n) for nearly sorted data
15+
Space Complexity: O(n) for merge buffer
16+
17+
For doctests run:
18+
python -m doctest -v power_sort.py
19+
20+
For manual testing run:
21+
python power_sort.py
22+
"""
23+
24+
from __future__ import annotations
25+
26+
from typing import Any, Callable
27+
28+
29+
def _find_run(
30+
arr: list, start: int, end: int, key: Callable[[Any], Any] | None = None
31+
) -> int:
32+
"""
33+
Detect a run (ascending or descending sequence) starting at 'start'.
34+
35+
If the run is descending, reverse it in-place to make it ascending.
36+
Returns the end index (exclusive) of the detected run.
37+
38+
Args:
39+
arr: The list to search in
40+
start: Starting index of the run
41+
end: End index (exclusive) of the search range
42+
key: Optional key function for comparisons
43+
44+
Returns:
45+
End index (exclusive) of the detected run
46+
47+
>>> arr = [3, 2, 1, 4, 5, 6]
48+
>>> _find_run(arr, 0, 6)
49+
3
50+
>>> arr
51+
[1, 2, 3, 4, 5, 6]
52+
>>> arr = [1, 2, 3, 2, 1]
53+
>>> _find_run(arr, 0, 5)
54+
3
55+
>>> arr
56+
[1, 2, 3, 2, 1]
57+
"""
58+
if start >= end - 1:
59+
return start + 1
60+
61+
key_func = key if key else lambda x: x
62+
run_end = start + 1
63+
64+
# Check if run is ascending or descending
65+
if key_func(arr[run_end]) < key_func(arr[start]):
66+
# Descending run
67+
while run_end < end and key_func(arr[run_end]) < key_func(arr[run_end - 1]):
68+
run_end += 1
69+
# Reverse the descending run to make it ascending
70+
arr[start:run_end] = reversed(arr[start:run_end])
71+
else:
72+
# Ascending run
73+
while run_end < end and key_func(arr[run_end]) >= key_func(arr[run_end - 1]):
74+
run_end += 1
75+
76+
return run_end
77+
78+
79+
def _node_power(n: int, b1: int, n1: int, b2: int, n2: int) -> int:
80+
"""
81+
Calculate the node power for two adjacent runs.
82+
83+
This determines the merge priority in the stack. The power is the smallest
84+
integer p such that floor(a * 2^p) != floor(b * 2^p), where:
85+
- a = (b1 + n1/2) / n
86+
- b = (b2 + n2/2) / n
87+
88+
Args:
89+
n: Total length of the array
90+
b1: Start index of first run
91+
n1: Length of first run
92+
b2: Start index of second run
93+
n2: Length of second run
94+
95+
Returns:
96+
The calculated node power
97+
98+
>>> _node_power(100, 0, 25, 25, 25)
99+
2
100+
>>> _node_power(100, 0, 50, 50, 50)
101+
1
102+
"""
103+
# Calculate midpoints: a = (b1 + n1/2) / n, b = (b2 + n2/2) / n
104+
# To avoid floating point, we work with a = (2*b1 + n1) / (2*n) and b = (2*b2 + n2) / (2*n)
105+
# We want smallest p where floor(a * 2^p) != floor(b * 2^p)
106+
# This is floor((2*b1 + n1) * 2^p / (2*n)) != floor((2*b2 + n2) * 2^p / (2*n))
107+
108+
a = 2 * b1 + n1
109+
b = 2 * b2 + n2
110+
two_n = 2 * n
111+
112+
# Find smallest power p where floor(a * 2^p / two_n) != floor(b * 2^p / two_n)
113+
power = 0
114+
while (a * (1 << power)) // two_n == (b * (1 << power)) // two_n:
115+
power += 1
116+
117+
return power
118+
119+
120+
def _merge(
121+
arr: list,
122+
start1: int,
123+
end1: int,
124+
end2: int,
125+
key: Callable[[Any], Any] | None = None,
126+
) -> None:
127+
"""
128+
Merge two adjacent sorted runs in-place using auxiliary space.
129+
130+
Merges arr[start1:end1] with arr[end1:end2].
131+
132+
Args:
133+
arr: The list containing the runs
134+
start1: Start index of first run
135+
end1: End index of first run (start of second run)
136+
end2: End index of second run
137+
key: Optional key function for comparisons
138+
139+
>>> arr = [1, 3, 5, 2, 4, 6]
140+
>>> _merge(arr, 0, 3, 6)
141+
>>> arr
142+
[1, 2, 3, 4, 5, 6]
143+
>>> arr = [5, 6, 7, 1, 2, 3]
144+
>>> _merge(arr, 0, 3, 6)
145+
>>> arr
146+
[1, 2, 3, 5, 6, 7]
147+
"""
148+
key_func = key if key else lambda x: x
149+
150+
# Copy the runs to temporary storage
151+
left = arr[start1:end1]
152+
right = arr[end1:end2]
153+
154+
i = j = 0
155+
k = start1
156+
157+
# Merge the two runs
158+
while i < len(left) and j < len(right):
159+
if key_func(left[i]) <= key_func(right[j]):
160+
arr[k] = left[i]
161+
i += 1
162+
else:
163+
arr[k] = right[j]
164+
j += 1
165+
k += 1
166+
167+
# Copy remaining elements
168+
while i < len(left):
169+
arr[k] = left[i]
170+
i += 1
171+
k += 1
172+
173+
while j < len(right):
174+
arr[k] = right[j]
175+
j += 1
176+
k += 1
177+
178+
179+
def power_sort(
180+
collection: list,
181+
*,
182+
key: Callable[[Any], Any] | None = None,
183+
reverse: bool = False,
184+
) -> list:
185+
"""
186+
Sort a list using the PowerSort algorithm.
187+
188+
PowerSort is an adaptive merge sort that detects existing runs in the data
189+
and uses a power-based merging strategy for optimal performance.
190+
191+
Args:
192+
collection: A mutable ordered collection with comparable items
193+
key: Optional function to extract comparison key from each element
194+
reverse: If True, sort in descending order
195+
196+
Returns:
197+
The same collection ordered according to the parameters
198+
199+
Time Complexity: O(n log n) worst case, O(n) for nearly sorted data
200+
Space Complexity: O(n)
201+
202+
Examples:
203+
>>> power_sort([0, 5, 3, 2, 2])
204+
[0, 2, 2, 3, 5]
205+
>>> power_sort([])
206+
[]
207+
>>> power_sort([1])
208+
[1]
209+
>>> power_sort([-2, -5, -45])
210+
[-45, -5, -2]
211+
>>> power_sort([1, 2, 3, 4, 5])
212+
[1, 2, 3, 4, 5]
213+
>>> power_sort([5, 4, 3, 2, 1])
214+
[1, 2, 3, 4, 5]
215+
>>> power_sort([3, 1, 4, 1, 5, 9, 2, 6, 5])
216+
[1, 1, 2, 3, 4, 5, 5, 6, 9]
217+
>>> power_sort(['banana', 'apple', 'cherry'])
218+
['apple', 'banana', 'cherry']
219+
>>> power_sort([3.14, 2.71, 1.41, 1.73])
220+
[1.41, 1.73, 2.71, 3.14]
221+
>>> power_sort([5, 2, 8, 1, 9], reverse=True)
222+
[9, 8, 5, 2, 1]
223+
>>> power_sort(['apple', 'pie', 'a', 'longer'], key=len)
224+
['a', 'pie', 'apple', 'longer']
225+
>>> power_sort([(1, 'b'), (2, 'a'), (1, 'a')], key=lambda x: x[0])
226+
[(1, 'b'), (1, 'a'), (2, 'a')]
227+
>>> power_sort([1, 2, 3, 2, 1, 2, 3, 4])
228+
[1, 1, 2, 2, 2, 3, 3, 4]
229+
>>> power_sort(list(range(100)))
230+
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
231+
>>> power_sort(list(reversed(range(50))))
232+
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
233+
"""
234+
if len(collection) <= 1:
235+
return collection
236+
237+
# Make a copy to avoid modifying the original if it's immutable
238+
arr = list(collection)
239+
n = len(arr)
240+
241+
# Adjust key function for reverse sorting
242+
if reverse:
243+
if key:
244+
original_key = key
245+
key = lambda x: -original_key(x) if isinstance(original_key(x), (int, float)) else original_key(x)
246+
# For non-numeric types, we'll need a different approach
247+
# Store original key and use negation wrapper
248+
def reverse_key(x):
249+
val = original_key(x)
250+
# For comparable types, we can't negate, so we'll reverse at the end
251+
return val
252+
key = reverse_key
253+
needs_final_reverse = True
254+
else:
255+
key = lambda x: -x if isinstance(x, (int, float)) else x
256+
needs_final_reverse = True
257+
else:
258+
needs_final_reverse = False
259+
260+
# Stack to hold runs: each entry is (start_index, length, power)
261+
# Capacity is ceil(log2(n)) + 1
262+
import math
263+
stack_capacity = math.ceil(math.log2(n)) + 1 if n > 1 else 2
264+
stack: list[tuple[int, int, int]] = []
265+
266+
start = 0
267+
while start < n:
268+
# Find the next run
269+
run_end = _find_run(arr, start, n, key)
270+
run_length = run_end - start
271+
272+
# Calculate power for this run
273+
if len(stack) == 0:
274+
power = 0
275+
else:
276+
prev_start, prev_length, _ = stack[-1]
277+
power = _node_power(n, prev_start, prev_length, start, run_length)
278+
279+
# Merge runs from stack based on power comparison
280+
while len(stack) > 0 and stack[-1][2] >= power:
281+
# Merge the top run with the current run
282+
prev_start, prev_length, prev_power = stack.pop()
283+
_merge(arr, prev_start, prev_start + prev_length, run_end, key)
284+
285+
# Update current run to include the merged run
286+
start = prev_start
287+
run_length = run_end - start
288+
289+
# Recalculate power
290+
if len(stack) == 0:
291+
power = 0
292+
else:
293+
prev_prev_start, prev_prev_length, _ = stack[-1]
294+
power = _node_power(
295+
n, prev_prev_start, prev_prev_length, start, run_length
296+
)
297+
298+
# Push current run onto stack
299+
stack.append((start, run_length, power))
300+
start = run_end
301+
302+
# Merge all remaining runs on the stack
303+
while len(stack) > 1:
304+
start2, length2, _ = stack.pop()
305+
start1, length1, power1 = stack.pop()
306+
_merge(arr, start1, start1 + length1, start2 + length2, key)
307+
308+
# Recalculate power for merged run
309+
if len(stack) == 0:
310+
power = 0
311+
else:
312+
prev_start, prev_length, _ = stack[-1]
313+
power = _node_power(n, prev_start, prev_length, start1, start2 + length2 - start1)
314+
315+
stack.append((start1, start2 + length2 - start1, power))
316+
317+
# Handle reverse sorting for non-numeric types
318+
if reverse and needs_final_reverse:
319+
# For non-numeric types, we need to reverse the final result
320+
# Check if we used numeric negation or not
321+
if key and not isinstance(arr[0], (int, float)):
322+
arr.reverse()
323+
324+
return arr
325+
326+
327+
if __name__ == "__main__":
328+
import doctest
329+
330+
doctest.testmod()
331+
332+
print("\nPowerSort Interactive Testing")
333+
print("=" * 40)
334+
335+
try:
336+
user_input = input("Enter numbers separated by a comma:\n").strip()
337+
if user_input == "":
338+
unsorted = []
339+
else:
340+
unsorted = [int(item.strip()) for item in user_input.split(",")]
341+
342+
print(f"\nOriginal: {unsorted}")
343+
sorted_list = power_sort(unsorted)
344+
print(f"Sorted: {sorted_list}")
345+
346+
# Test reverse
347+
sorted_reverse = power_sort(unsorted, reverse=True)
348+
print(f"Reverse: {sorted_reverse}")
349+
350+
except ValueError:
351+
print("Invalid input. Please enter valid integers separated by commas.")
352+
except KeyboardInterrupt:
353+
print("\n\nGoodbye!")

0 commit comments

Comments
 (0)