From 41ddd7ba21ac607817c2e0b15c7dca0a4120d374 Mon Sep 17 00:00:00 2001 From: jayman1466 Date: Fri, 6 Oct 2023 16:41:25 -0700 Subject: [PATCH 1/3] Create cloning_strategy.py --- cloning_strategy.py | 133 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 cloning_strategy.py diff --git a/cloning_strategy.py b/cloning_strategy.py new file mode 100644 index 0000000..9fc1156 --- /dev/null +++ b/cloning_strategy.py @@ -0,0 +1,133 @@ +#this script takes a input sequence and outputs a sequence ready for restriction cloning +#input - gene sequence, restriction sites to avoid, flanking restriction sites, maximum size of DNA fragment to synthesize +#output - language on what was done, final gene sequence, gene fragments to order + +import pandas as pd + +#this function cleans up ambiguous characters in DNA +def dna_clean(sequence = ""): + + #make all uppercase + sequence = sequence.upper() + + #convert RNA to DNA + sequence = sequence.replace("U","T") + + return sequence + + +#this function returns the reverse complement of a DNA sequence +def reverse_complement(dna): + complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'} + return ''.join([complement[base] for base in reversed(dna)]) + +#this outputs a random codon for each amino acid for the purposes of removing restriction sites +def codonchoice(aminoacid): + aminoacid = aminoacid.upper() + codon_dict = {'A':["GCA","GCC","GCG","GCT"], + 'C':["TGC","TGT"], + 'D':["GAC","GAT"], + 'E':["GAA","GAG"], + 'F':["TTC","TTT"], + 'G':["GGA","GGC","GGG","GGT"], + 'H':["CAC","CAT"], + 'I':["ATA","ATC","ATT"], + 'K':["AAA","AAG"], + 'L':["CTA","CTC","CTG","CTT","TTA","TTG"], + 'M':["ATG"], + 'N':["AAC","AAT"], + 'P':["CCA","CCC","CCG","CCT"], + 'Q':["CAA","CAG"], + 'R':["AGA","AGG","CGA","CGC","CGG","CGT"], + 'S':["AGC","AGT","TCA","TCC","TCG","TCT"], + 'T':["ACA","ACC","ACG","ACT"], + 'V':["GTA","GTC","GTG","GTT"], + 'W':["TGG"], + 'Y':["TAC","TAT"]} + + return random.choice(codon_dict[aminoacid]) + + +#this function removes restriction sites (input is name of restriction sites) from the gene sequence by replacing codons with +def remove_restriction(restriction_sites = [], sequence = "", ): + + #default restriction enzymes to remove - these include common golden gate enzymes + default_restriction = ['BSAI', 'BBSI', 'BSMBI'] + + #convert restriction sites to uppercase + restriction_sites = [x.upper() for x in restriction_sites] + + all_restriction_sites = default_restriction + restriction_sites + + #file of restriction enzymes + restriction_csv = 'restriction_enzymes.csv' + restriction_df = pd.read_csv(restriction_csv,sep=',') + + new_nucleotide_sequence = sequence.upper() + + #convert restriction enzyme names to sequences + restriction_sites_seqs = [] + for restriction_site in all_restrition_sites: + site_row = restriction_df.loc[restriction_df['Name'] == restriction_site] + if len(site_row.shape[0]) > 0: + this_seq = site_row.iloc[0]['Sequence_cleaned'] + restriction_sites_seqs.append(this_seq) + restriction_sites.seqs.append(reverse_complement(this_seq)) + + + #iterate through the restriction site sequences and remove them + for restriction_site in restriction_sites_seqs: + restriction_site = restriction_site.upper() + + while True: + #find all coordinates of restriction site + coordinates = [m.start() for m in re.finditer(restriction_site, new_nucleotide_sequence)] + for coordinate in coordinates: + + #find the index of the start of the codon + remainder = coordinate % 3 + codon_start = coordinate - remainder + + while True: + #determine the codon + codon = new_nucleotide_sequence[codon_start:codon_start + 3] + + #translate the codon + amino_acid = str(Seq(codon).translate()) + + #if it's an ATG or stop codon, then move on to the next codon instead + if amino_acid == 'M' or amino_acid == '*' or amino_acid =='W': + skip = random.choice([3]) + codon_start = codon_start + skip + else: + break + + #comeup with new codon + while True: + new_codon = codonchoice(amino_acid) + if new_codon != codon: + break + + #adjust the sequence + new_nucleotide_sequence = new_nucleotide_sequence[0:codon_start] + new_codon + new_nucleotide_sequence[codon_start+3:] + + #check if all sites are successfully removed + coordinates = [m.start() for m in re.finditer(restriction_site, new_nucleotide_sequence)] + if len(coordinates) == 0: + break + else: + break + + return new_nucleotide_sequence + + +#this function actually does the full task +def do_the_thing(sequence = '', restriction_sites = [], cloning_sites = [], synthesis_size = 500): + sequence = dna_clean(sequence) + new_sequence = remove_restriction(restriction_sites[],sequence) + + #add the new sequence to the State + + return "The following restriction sites were removed:" + restriction_sites + + \ No newline at end of file From 75ff71e3c03eba14a30e9fa63d7b953fb1c33dca Mon Sep 17 00:00:00 2001 From: jayman1466 Date: Fri, 6 Oct 2023 16:47:43 -0700 Subject: [PATCH 2/3] Update cloning_strategy.py --- cloning_strategy.py | 45 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/cloning_strategy.py b/cloning_strategy.py index 9fc1156..2b675b1 100644 --- a/cloning_strategy.py +++ b/cloning_strategy.py @@ -3,6 +3,7 @@ #output - language on what was done, final gene sequence, gene fragments to order import pandas as pd +from Bio.SeqUtils import MeltingTemp as mt #this function cleans up ambiguous characters in DNA def dna_clean(sequence = ""): @@ -120,12 +121,52 @@ def remove_restriction(restriction_sites = [], sequence = "", ): return new_nucleotide_sequence +#this function breaks the insert into chunks for DNA synthesis +def eblockerizer(sequence, max_fragment_length = 500, min_fragment_length = 300, min_gibson_overlap = 25, min_overlap_Tm = 55): + + full_sequence = sequence.upper() + + current_part = 0 + sequences = [] + remaining_sequence = full_sequence + + while True: + current_part = current_part + 1 + + #is sequence already < max_fragment_length? + if len(remaining_sequence) <= max_fragment_length: + sequences.append(["{}_{}".format(seq_name,current_part),remaining_sequence]) + break + else: + #can the rest be split into two equal pieces? + if len(remaining_sequence) <= 2*max_fragment_length-70: + target_length = len(remaining_sequence)//2 + else: + target_length = max_fragment_length + + #check if Tm of overlap is greater than 55C and extend overlap if not + i=0 + while True: + this_overlap = remaining_sequence[target_length-min_gibson_overlap-i:target_length] + this_overlap_Tm = mt.Tm_NN(Seq(this_overlap)) + + if this_overlap_Tm > min_overlap_Tm: + #output this fragment and update the remaining sequence + sequences.append(["{}_{}".format(seq_name,current_part),remaining_sequence[0:target_length]]) + remaining_sequence = remaining_sequence[target_length-min_gibson_overlap-i:] + break + else: + #if needed extent the overlap + i = i+1 + return sequences + #this function actually does the full task -def do_the_thing(sequence = '', restriction_sites = [], cloning_sites = [], synthesis_size = 500): +def do_the_thing(sequence = '', restriction_sites = [], cloning_sites = [], max_fragment_length = 500): sequence = dna_clean(sequence) - new_sequence = remove_restriction(restriction_sites[],sequence) + new_sequence = remove_restriction(restriction_sites = restriction_sites[],sequence = sequence) + fragments = eblockerizer(sequence = new_sequence) #add the new sequence to the State return "The following restriction sites were removed:" + restriction_sites From 4d4434e1e44eb22580be7fe074e5f997237305fa Mon Sep 17 00:00:00 2001 From: jayman1466 Date: Fri, 6 Oct 2023 16:49:00 -0700 Subject: [PATCH 3/3] moved to tools --- .DS_Store | Bin 0 -> 6148 bytes sanclone/.DS_Store | Bin 0 -> 6148 bytes .../tools/cloning_strategy.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 .DS_Store create mode 100644 sanclone/.DS_Store rename cloning_strategy.py => sanclone/tools/cloning_strategy.py (100%) diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..a1d1ff4e449e1c3ca49d877116808fd10e9cd644 GIT binary patch literal 6148 zcmeHKISv9b477n_B^pY~e1RWC2wuPkIJ*^FAo{I%7f)k+6rhC;8Z?%iNgPk2OtD^z zh%T?kg~&`q8n~g{Y#5sDn-6T15e33=Mk}41<#fCp+CldFfN>kS$PT>2Gk@Q8t!>h% z02QDDRDcRlfv+l%<+Yi9^;8~41*pJ3C}7`*0ynIQeV~6jFn9|9oFMFmx%U#lVgX=H z>;n;jX;6Vd)od{|=!lohtBHMJ&_%QP(7ai*Ls7pS=NC^Gt$`e=02O#uU>wVt)&Dj8 zL;wFu;))7Tfxl8fC#%hBi6>=k9X!rzZGms$mUD-jVeS+RUXFoYjlb| WP3!}mj=0l-{24G^XjI_W3OoS%XBChD literal 0 HcmV?d00001 diff --git a/sanclone/.DS_Store b/sanclone/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..a0b52f9a81ff9e2f1dd12fa214a3606e2890b59e GIT binary patch literal 6148 zcmeH~F%H5o3`K1y5)um|V-^m;4I)%dzy%O=tkj{f=jePpURsz@g}y82$4&z68|qp_ z^!(~hB6|_p!cFC9!^{-%MxHX@>SgT4M}HrN-qNy)y%xan8`JezrjP&$kN^pg00}IJ zfERJp|J{P_iMK)mB(Mwu_I^lk(;Ql=`l|zjw*b%vX*bNhmw+ZqKyzrRiVRGnd(b$l z-iBD-k3&=J%b}%eJuVu}hsHmvbulol$3+tYm}VCaBtQZ)0_z&@t^Qx&-}?XA5r!l{ z0)Iw8oA%nC@la7)KOWEWx5(POz@fe!!{j3XftBI|+zso=CeR#Osv-m9hk#??KmtD{ F@CNjO6RH3J literal 0 HcmV?d00001 diff --git a/cloning_strategy.py b/sanclone/tools/cloning_strategy.py similarity index 100% rename from cloning_strategy.py rename to sanclone/tools/cloning_strategy.py