-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathalgorithm_optimisation.py
More file actions
73 lines (63 loc) · 2.26 KB
/
algorithm_optimisation.py
File metadata and controls
73 lines (63 loc) · 2.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import timeit
import cProfile
import random
from io import StringIO
from typing import Callable, Dict
#importing optimised function from reverse_complement_generation.py
from reverse_complement_generation import complement_map, generate_reverse_complement
#set up and the helpers
nucleotides= ["A", "T", "G", "C"]
complement_map_upper= {k.upper():v.upper() for k, v in complement_map.items()}
translation_table= str.maketrans(complement_map_upper)
def generate_random_dna_sequence(length: int) -> str:
""""
Generates a random DNA sequence.
"""
return "".join(random.choices(nucleotides, k=length))
#comparison approaches
def RC_version1_suboptimal(sequence: str) -> str:
""""
Method 1 - uses loop for complement generation and reversal.
suboptimal method as it causes high python overhead
"""
sequence = sequence.upper()
complement_list=[]
for base in sequence:
complement_list.append(complement_map_upper.get(base,"N"))
return "".join(complement_list)[::-1]
def RC_version2_list_join(sequence: str) -> str:
"""
Method 2- Efficient O(N) using list comprehension and join
"""
sequence = sequence.upper()
complement_list=[complement_map_upper.get(base, "N") for base in sequence]
return "".join(complement_list)[::-1]
#Benchmarking functions
def benchmark_functions(func:Callable, sequence_length:int, num_runs:int= 100):
"""
Benchmarks a function using timeit
"""
dna_sequence = generate_random_dna_sequence(sequence_length)
execution_time= timeit.timeit(
lambda: func(dna_sequence),
number=num_runs
)
return {
"function_name": func.__name__,
"total_time": execution_time,
"average_time": execution_time/num_runs * 1e6
}
#Profiling function
def profile_functions(func:Callable, sequence_length:int):
""""
Profile a function using cProfile and prints the report
"""
dna_sequence = generate_random_dna_sequence(sequence_length)
print(f"\n\ncProfile Report for {func.__name__} Length: {sequence_length}")
pr=cProfile.Profile()
pr.enable()
func(dna_sequence)
pr.disable()
s=StringIO()
pr.print_stats(sort="tottime")
print(s.getvalue())