From 41ddd7ba21ac607817c2e0b15c7dca0a4120d374 Mon Sep 17 00:00:00 2001
From: jayman1466 <jayman1466@gmail.com>
Date: Fri, 6 Oct 2023 16:41:25 -0700
Subject: [PATCH 1/3] Create cloning_strategy.py

---
 cloning_strategy.py | 133 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 133 insertions(+)
 create mode 100644 cloning_strategy.py

diff --git a/cloning_strategy.py b/cloning_strategy.py
new file mode 100644
index 0000000..9fc1156
--- /dev/null
+++ b/cloning_strategy.py
@@ -0,0 +1,133 @@
+#this script takes a input sequence and outputs a sequence ready for restriction cloning 
+#input - gene sequence, restriction sites to avoid, flanking restriction sites, maximum size of DNA fragment to synthesize
+#output - language on what was done, final gene sequence, gene fragments to order
+
+import pandas as pd
+
+#this function cleans up ambiguous characters in DNA
+def dna_clean(sequence = ""):
+    
+    #make all uppercase
+    sequence = sequence.upper()
+    
+    #convert RNA to DNA
+    sequence = sequence.replace("U","T")
+    
+    return sequence
+
+
+#this function returns the reverse complement of a DNA sequence
+def reverse_complement(dna):
+    complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
+    return ''.join([complement[base] for base in reversed(dna)])
+
+#this outputs a random codon for each amino acid for the purposes of removing restriction sites
+def codonchoice(aminoacid):
+    aminoacid = aminoacid.upper()
+    codon_dict = {'A':["GCA","GCC","GCG","GCT"],
+    'C':["TGC","TGT"],
+    'D':["GAC","GAT"],
+    'E':["GAA","GAG"],
+    'F':["TTC","TTT"],
+    'G':["GGA","GGC","GGG","GGT"],
+    'H':["CAC","CAT"],
+    'I':["ATA","ATC","ATT"],
+    'K':["AAA","AAG"],
+    'L':["CTA","CTC","CTG","CTT","TTA","TTG"],
+    'M':["ATG"],
+    'N':["AAC","AAT"],
+    'P':["CCA","CCC","CCG","CCT"],
+    'Q':["CAA","CAG"],
+    'R':["AGA","AGG","CGA","CGC","CGG","CGT"],
+    'S':["AGC","AGT","TCA","TCC","TCG","TCT"],
+    'T':["ACA","ACC","ACG","ACT"],
+    'V':["GTA","GTC","GTG","GTT"],
+    'W':["TGG"],
+    'Y':["TAC","TAT"]}
+    
+    return random.choice(codon_dict[aminoacid])
+
+
+#this function removes restriction sites (input is name of restriction sites) from the gene sequence by replacing codons with 
+def remove_restriction(restriction_sites = [], sequence = "", ):
+    
+    #default restriction enzymes to remove - these include common golden gate enzymes
+    default_restriction = ['BSAI', 'BBSI', 'BSMBI']
+    
+    #convert restriction sites to uppercase
+    restriction_sites = [x.upper() for x in restriction_sites]
+    
+    all_restriction_sites = default_restriction + restriction_sites
+    
+    #file of restriction enzymes
+    restriction_csv = 'restriction_enzymes.csv'
+    restriction_df = pd.read_csv(restriction_csv,sep=',')
+    
+    new_nucleotide_sequence = sequence.upper()
+    
+    #convert restriction enzyme names to sequences
+    restriction_sites_seqs = []
+    for restriction_site in all_restrition_sites:
+        site_row = restriction_df.loc[restriction_df['Name'] == restriction_site]
+        if len(site_row.shape[0]) > 0:
+            this_seq = site_row.iloc[0]['Sequence_cleaned']
+            restriction_sites_seqs.append(this_seq)
+            restriction_sites.seqs.append(reverse_complement(this_seq))
+    
+    
+    #iterate through the restriction site sequences and remove them
+    for restriction_site in restriction_sites_seqs:
+        restriction_site = restriction_site.upper()
+        
+        while True:
+            #find all coordinates of restriction site
+            coordinates = [m.start() for m in re.finditer(restriction_site, new_nucleotide_sequence)]
+            for coordinate in coordinates:
+
+                #find the index of the start of the codon
+                remainder = coordinate % 3
+                codon_start = coordinate - remainder
+
+                while True:      
+                    #determine the codon
+                    codon = new_nucleotide_sequence[codon_start:codon_start + 3]
+
+                    #translate the codon
+                    amino_acid = str(Seq(codon).translate())
+
+                    #if it's an ATG or stop codon, then move on to the next codon instead
+                    if amino_acid == 'M' or amino_acid == '*' or amino_acid =='W':
+                        skip = random.choice([3])
+                        codon_start = codon_start + skip                        
+                    else:
+                        break
+                        
+                #comeup with new codon
+                while True:
+                    new_codon = codonchoice(amino_acid)
+                    if new_codon != codon:
+                        break
+
+                #adjust the sequence 
+                new_nucleotide_sequence = new_nucleotide_sequence[0:codon_start] + new_codon + new_nucleotide_sequence[codon_start+3:]
+
+            #check if all sites are successfully removed
+            coordinates = [m.start() for m in re.finditer(restriction_site, new_nucleotide_sequence)]
+            if len(coordinates) == 0:
+                break
+            else:
+                break
+      
+    return new_nucleotide_sequence  
+
+
+#this function actually does the full task
+def do_the_thing(sequence = '', restriction_sites = [], cloning_sites = [], synthesis_size = 500):
+    sequence = dna_clean(sequence)
+    new_sequence = remove_restriction(restriction_sites[],sequence)
+    
+    #add the new sequence to the State
+    
+    return "The following restriction sites were removed:" + restriction_sites
+    
+    
\ No newline at end of file

From 75ff71e3c03eba14a30e9fa63d7b953fb1c33dca Mon Sep 17 00:00:00 2001
From: jayman1466 <jayman1466@gmail.com>
Date: Fri, 6 Oct 2023 16:47:43 -0700
Subject: [PATCH 2/3] Update cloning_strategy.py

---
 cloning_strategy.py | 45 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/cloning_strategy.py b/cloning_strategy.py
index 9fc1156..2b675b1 100644
--- a/cloning_strategy.py
+++ b/cloning_strategy.py
@@ -3,6 +3,7 @@
 #output - language on what was done, final gene sequence, gene fragments to order
 
 import pandas as pd
+from Bio.SeqUtils import MeltingTemp as mt
 
 #this function cleans up ambiguous characters in DNA
 def dna_clean(sequence = ""):
@@ -120,12 +121,52 @@ def remove_restriction(restriction_sites = [], sequence = "", ):
       
     return new_nucleotide_sequence  
 
+#this function breaks the insert into chunks for DNA synthesis
+def eblockerizer(sequence, max_fragment_length = 500, min_fragment_length = 300, min_gibson_overlap = 25, min_overlap_Tm = 55):
+    
+    full_sequence = sequence.upper()
+    
+    current_part = 0
+    sequences = []
+    remaining_sequence = full_sequence
+    
+    while True:
+        current_part = current_part + 1
+        
+        #is sequence already < max_fragment_length?
+        if len(remaining_sequence) <= max_fragment_length:
+            sequences.append(["{}_{}".format(seq_name,current_part),remaining_sequence])
+            break
+        else:
+            #can the rest be split into two equal pieces?
+            if len(remaining_sequence) <= 2*max_fragment_length-70:
+                target_length = len(remaining_sequence)//2
+            else:
+                target_length = max_fragment_length
+            
+            #check if Tm of overlap is greater than 55C and extend overlap if not
+            i=0
+            while True:
+                this_overlap = remaining_sequence[target_length-min_gibson_overlap-i:target_length]
+                this_overlap_Tm = mt.Tm_NN(Seq(this_overlap))
+                
+                if this_overlap_Tm > min_overlap_Tm:
+                    #output this fragment and update the remaining sequence
+                    sequences.append(["{}_{}".format(seq_name,current_part),remaining_sequence[0:target_length]])
+                    remaining_sequence = remaining_sequence[target_length-min_gibson_overlap-i:]
+                    break
+                else:
+                    #if needed extent the overlap
+                    i = i+1
+    return sequences
+
 
 #this function actually does the full task
-def do_the_thing(sequence = '', restriction_sites = [], cloning_sites = [], synthesis_size = 500):
+def do_the_thing(sequence = '', restriction_sites = [], cloning_sites = [], max_fragment_length = 500):
     sequence = dna_clean(sequence)
-    new_sequence = remove_restriction(restriction_sites[],sequence)
+    new_sequence = remove_restriction(restriction_sites = restriction_sites[],sequence = sequence)
     
+    fragments = eblockerizer(sequence = new_sequence)
     #add the new sequence to the State
     
     return "The following restriction sites were removed:" + restriction_sites

From 4d4434e1e44eb22580be7fe074e5f997237305fa Mon Sep 17 00:00:00 2001
From: jayman1466 <jayman1466@gmail.com>
Date: Fri, 6 Oct 2023 16:49:00 -0700
Subject: [PATCH 3/3] moved to tools

---
 .DS_Store                                        | Bin 0 -> 6148 bytes
 sanclone/.DS_Store                               | Bin 0 -> 6148 bytes
 .../tools/cloning_strategy.py                    |   0
 3 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 .DS_Store
 create mode 100644 sanclone/.DS_Store
 rename cloning_strategy.py => sanclone/tools/cloning_strategy.py (100%)

diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..a1d1ff4e449e1c3ca49d877116808fd10e9cd644
GIT binary patch
literal 6148
zcmeHKISv9b477n_B^pY~e1RWC2wuPkIJ*^FAo{I%7f)k+6rhC;8Z?%iNgPk2OtD^z
zh%T?kg~&`q8n~g{Y#5sDn-6T15e33=Mk}41<#fCp+CldFfN>kS$PT>2Gk@Q8t!>h%
z02QDDRDcRlfv+l%<+Yi9^;8~41*pJ3C}7`*0ynIQeV~6jFn9|9oFMFmx%U#lVgX=H
z>;n;jX;6Vd)od{|=!lohtBHMJ&_%QP(7ai*Ls7pS=NC^Gt$`e=02O#uU>wVt)&Dj8
zL;wFu;))7Tfxl8fC#%hBi6>=k9X!rzZGms$mUD-jVeS+RUXFoYj<K+EJolu?D>lb|
WP3!}mj=0l-{24G^XjI_W3OoS%XBChD

literal 0
HcmV?d00001

diff --git a/sanclone/.DS_Store b/sanclone/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..a0b52f9a81ff9e2f1dd12fa214a3606e2890b59e
GIT binary patch
literal 6148
zcmeH~F%H5o3`K1y5)um|V-^m;4I)%dzy%O=tkj{f=jePpURsz@g}y82$4&z68|qp_
z^!(~hB6|_p!cFC9!^{-%MxHX@>SgT4M}HrN-qNy)y%xan8`JezrjP&$kN^pg00}IJ
zfERJp|J{P_iMK)mB(Mwu_I^lk(;Ql=`l|zjw*b%vX*bNhmw+ZqKyzrRiVRGnd(b$l
z-iBD-k3&=J%b}%eJuVu}hsHmvbulol$3+tYm}VCaBtQZ)0_z&@t^Qx&-}?XA5r!l{
z0)Iw8oA%nC@la7)KOWEWx5(POz@fe!!{j3XftBI|+zso=CeR#Osv-m9hk#??KmtD{
F@CNjO6RH3J

literal 0
HcmV?d00001

diff --git a/cloning_strategy.py b/sanclone/tools/cloning_strategy.py
similarity index 100%
rename from cloning_strategy.py
rename to sanclone/tools/cloning_strategy.py