added powersort in sorts/power_sort.py

a3ro-dev · a3ro-dev · commit 45a23c7b3ba2 · 2025-10-05T15:36:21.000+05:30
diff --git a/.gitignore b/.gitignore
@@ -108,3 +108,4 @@ venv.bak/
 .try
 .vscode/
 .vs/
+.cursor/
diff --git a/sorts/power_sort.py b/sorts/power_sort.py
@@ -0,0 +1,353 @@
+"""
+PowerSort - An adaptive merge sort algorithm.
+
+PowerSort is an adaptive, stable sorting algorithm that efficiently handles
+partially ordered data by optimally merging existing runs (consecutive sequences
+of sorted elements) in the input. It was developed by J. Ian Munro and Sebastian
+Wild and has been integrated into Python's standard library since version 3.11.
+
+The algorithm works by:
+1. Detecting naturally occurring runs (ascending or descending sequences)
+2. Using a power-based merge strategy to determine optimal merge order
+3. Maintaining a stack of runs and merging based on calculated node powers
+
+Time Complexity: O(n log n) worst case, O(n) for nearly sorted data
+Space Complexity: O(n) for merge buffer
+
+For doctests run:
+python -m doctest -v power_sort.py
+
+For manual testing run:
+python power_sort.py
+"""
+
+from __future__ import annotations
+
+from typing import Any, Callable
+
+
+def _find_run(
+    arr: list, start: int, end: int, key: Callable[[Any], Any] | None = None
+) -> int:
+    """
+    Detect a run (ascending or descending sequence) starting at 'start'.
+    
+    If the run is descending, reverse it in-place to make it ascending.
+    Returns the end index (exclusive) of the detected run.
+    
+    Args:
+        arr: The list to search in
+        start: Starting index of the run
+        end: End index (exclusive) of the search range
+        key: Optional key function for comparisons
+    
+    Returns:
+        End index (exclusive) of the detected run
+    
+    >>> arr = [3, 2, 1, 4, 5, 6]
+    >>> _find_run(arr, 0, 6)
+    3
+    >>> arr
+    [1, 2, 3, 4, 5, 6]
+    >>> arr = [1, 2, 3, 2, 1]
+    >>> _find_run(arr, 0, 5)
+    3
+    >>> arr
+    [1, 2, 3, 2, 1]
+    """
+    if start >= end - 1:
+        return start + 1
+    
+    key_func = key if key else lambda x: x
+    run_end = start + 1
+    
+    # Check if run is ascending or descending
+    if key_func(arr[run_end]) < key_func(arr[start]):
+        # Descending run
+        while run_end < end and key_func(arr[run_end]) < key_func(arr[run_end - 1]):
+            run_end += 1
+        # Reverse the descending run to make it ascending
+        arr[start:run_end] = reversed(arr[start:run_end])
+    else:
+        # Ascending run
+        while run_end < end and key_func(arr[run_end]) >= key_func(arr[run_end - 1]):
+            run_end += 1
+    
+    return run_end
+
+
+def _node_power(n: int, b1: int, n1: int, b2: int, n2: int) -> int:
+    """
+    Calculate the node power for two adjacent runs.
+    
+    This determines the merge priority in the stack. The power is the smallest
+    integer p such that floor(a * 2^p) != floor(b * 2^p), where:
+    - a = (b1 + n1/2) / n
+    - b = (b2 + n2/2) / n
+    
+    Args:
+        n: Total length of the array
+        b1: Start index of first run
+        n1: Length of first run
+        b2: Start index of second run
+        n2: Length of second run
+    
+    Returns:
+        The calculated node power
+    
+    >>> _node_power(100, 0, 25, 25, 25)
+    2
+    >>> _node_power(100, 0, 50, 50, 50)
+    1
+    """
+    # Calculate midpoints: a = (b1 + n1/2) / n, b = (b2 + n2/2) / n
+    # To avoid floating point, we work with a = (2*b1 + n1) / (2*n) and b = (2*b2 + n2) / (2*n)
+    # We want smallest p where floor(a * 2^p) != floor(b * 2^p)
+    # This is floor((2*b1 + n1) * 2^p / (2*n)) != floor((2*b2 + n2) * 2^p / (2*n))
+    
+    a = 2 * b1 + n1
+    b = 2 * b2 + n2
+    two_n = 2 * n
+    
+    # Find smallest power p where floor(a * 2^p / two_n) != floor(b * 2^p / two_n)
+    power = 0
+    while (a * (1 << power)) // two_n == (b * (1 << power)) // two_n:
+        power += 1
+    
+    return power
+
+
+def _merge(
+    arr: list,
+    start1: int,
+    end1: int,
+    end2: int,
+    key: Callable[[Any], Any] | None = None,
+) -> None:
+    """
+    Merge two adjacent sorted runs in-place using auxiliary space.
+    
+    Merges arr[start1:end1] with arr[end1:end2].
+    
+    Args:
+        arr: The list containing the runs
+        start1: Start index of first run
+        end1: End index of first run (start of second run)
+        end2: End index of second run
+        key: Optional key function for comparisons
+    
+    >>> arr = [1, 3, 5, 2, 4, 6]
+    >>> _merge(arr, 0, 3, 6)
+    >>> arr
+    [1, 2, 3, 4, 5, 6]
+    >>> arr = [5, 6, 7, 1, 2, 3]
+    >>> _merge(arr, 0, 3, 6)
+    >>> arr
+    [1, 2, 3, 5, 6, 7]
+    """
+    key_func = key if key else lambda x: x
+    
+    # Copy the runs to temporary storage
+    left = arr[start1:end1]
+    right = arr[end1:end2]
+    
+    i = j = 0
+    k = start1
+    
+    # Merge the two runs
+    while i < len(left) and j < len(right):
+        if key_func(left[i]) <= key_func(right[j]):
+            arr[k] = left[i]
+            i += 1
+        else:
+            arr[k] = right[j]
+            j += 1
+        k += 1
+    
+    # Copy remaining elements
+    while i < len(left):
+        arr[k] = left[i]
+        i += 1
+        k += 1
+    
+    while j < len(right):
+        arr[k] = right[j]
+        j += 1
+        k += 1
+
+
+def power_sort(
+    collection: list,
+    *,
+    key: Callable[[Any], Any] | None = None,
+    reverse: bool = False,
+) -> list:
+    """
+    Sort a list using the PowerSort algorithm.
+    
+    PowerSort is an adaptive merge sort that detects existing runs in the data
+    and uses a power-based merging strategy for optimal performance.
+    
+    Args:
+        collection: A mutable ordered collection with comparable items
+        key: Optional function to extract comparison key from each element
+        reverse: If True, sort in descending order
+    
+    Returns:
+        The same collection ordered according to the parameters
+    
+    Time Complexity: O(n log n) worst case, O(n) for nearly sorted data
+    Space Complexity: O(n)
+    
+    Examples:
+    >>> power_sort([0, 5, 3, 2, 2])
+    [0, 2, 2, 3, 5]
+    >>> power_sort([])
+    []
+    >>> power_sort([1])
+    [1]
+    >>> power_sort([-2, -5, -45])
+    [-45, -5, -2]
+    >>> power_sort([1, 2, 3, 4, 5])
+    [1, 2, 3, 4, 5]
+    >>> power_sort([5, 4, 3, 2, 1])
+    [1, 2, 3, 4, 5]
+    >>> power_sort([3, 1, 4, 1, 5, 9, 2, 6, 5])
+    [1, 1, 2, 3, 4, 5, 5, 6, 9]
+    >>> power_sort(['banana', 'apple', 'cherry'])
+    ['apple', 'banana', 'cherry']
+    >>> power_sort([3.14, 2.71, 1.41, 1.73])
+    [1.41, 1.73, 2.71, 3.14]
+    >>> power_sort([5, 2, 8, 1, 9], reverse=True)
+    [9, 8, 5, 2, 1]
+    >>> power_sort(['apple', 'pie', 'a', 'longer'], key=len)
+    ['a', 'pie', 'apple', 'longer']
+    >>> power_sort([(1, 'b'), (2, 'a'), (1, 'a')], key=lambda x: x[0])
+    [(1, 'b'), (1, 'a'), (2, 'a')]
+    >>> power_sort([1, 2, 3, 2, 1, 2, 3, 4])
+    [1, 1, 2, 2, 2, 3, 3, 4]
+    >>> power_sort(list(range(100)))
+    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
+    >>> power_sort(list(reversed(range(50))))
+    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
+    """
+    if len(collection) <= 1:
+        return collection
+    
+    # Make a copy to avoid modifying the original if it's immutable
+    arr = list(collection)
+    n = len(arr)
+    
+    # Adjust key function for reverse sorting
+    if reverse:
+        if key:
+            original_key = key
+            key = lambda x: -original_key(x) if isinstance(original_key(x), (int, float)) else original_key(x)
+            # For non-numeric types, we'll need a different approach
+            # Store original key and use negation wrapper
+            def reverse_key(x):
+                val = original_key(x)
+                # For comparable types, we can't negate, so we'll reverse at the end
+                return val
+            key = reverse_key
+            needs_final_reverse = True
+        else:
+            key = lambda x: -x if isinstance(x, (int, float)) else x
+            needs_final_reverse = True
+    else:
+        needs_final_reverse = False
+    
+    # Stack to hold runs: each entry is (start_index, length, power)
+    # Capacity is ceil(log2(n)) + 1
+    import math
+    stack_capacity = math.ceil(math.log2(n)) + 1 if n > 1 else 2
+    stack: list[tuple[int, int, int]] = []
+    
+    start = 0
+    while start < n:
+        # Find the next run
+        run_end = _find_run(arr, start, n, key)
+        run_length = run_end - start
+        
+        # Calculate power for this run
+        if len(stack) == 0:
+            power = 0
+        else:
+            prev_start, prev_length, _ = stack[-1]
+            power = _node_power(n, prev_start, prev_length, start, run_length)
+        
+        # Merge runs from stack based on power comparison
+        while len(stack) > 0 and stack[-1][2] >= power:
+            # Merge the top run with the current run
+            prev_start, prev_length, prev_power = stack.pop()
+            _merge(arr, prev_start, prev_start + prev_length, run_end, key)
+            
+            # Update current run to include the merged run
+            start = prev_start
+            run_length = run_end - start
+            
+            # Recalculate power
+            if len(stack) == 0:
+                power = 0
+            else:
+                prev_prev_start, prev_prev_length, _ = stack[-1]
+                power = _node_power(
+                    n, prev_prev_start, prev_prev_length, start, run_length
+                )
+        
+        # Push current run onto stack
+        stack.append((start, run_length, power))
+        start = run_end
+    
+    # Merge all remaining runs on the stack
+    while len(stack) > 1:
+        start2, length2, _ = stack.pop()
+        start1, length1, power1 = stack.pop()
+        _merge(arr, start1, start1 + length1, start2 + length2, key)
+        
+        # Recalculate power for merged run
+        if len(stack) == 0:
+            power = 0
+        else:
+            prev_start, prev_length, _ = stack[-1]
+            power = _node_power(n, prev_start, prev_length, start1, start2 + length2 - start1)
+        
+        stack.append((start1, start2 + length2 - start1, power))
+    
+    # Handle reverse sorting for non-numeric types
+    if reverse and needs_final_reverse:
+        # For non-numeric types, we need to reverse the final result
+        # Check if we used numeric negation or not
+        if key and not isinstance(arr[0], (int, float)):
+            arr.reverse()
+    
+    return arr
+
+
+if __name__ == "__main__":
+    import doctest
+    
+    doctest.testmod()
+    
+    print("\nPowerSort Interactive Testing")
+    print("=" * 40)
+    
+    try:
+        user_input = input("Enter numbers separated by a comma:\n").strip()
+        if user_input == "":
+            unsorted = []
+        else:
+            unsorted = [int(item.strip()) for item in user_input.split(",")]
+        
+        print(f"\nOriginal: {unsorted}")
+        sorted_list = power_sort(unsorted)
+        print(f"Sorted:   {sorted_list}")
+        
+        # Test reverse
+        sorted_reverse = power_sort(unsorted, reverse=True)
+        print(f"Reverse:  {sorted_reverse}")
+        
+    except ValueError:
+        print("Invalid input. Please enter valid integers separated by commas.")
+    except KeyboardInterrupt:
+        print("\n\nGoodbye!")