|
| 1 | +import string |
| 2 | + |
| 3 | +amino_acids = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', \ |
| 4 | + 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'] |
| 5 | + |
| 6 | +#aana = amino_acids + ['a','c','g','t'] |
| 7 | + |
| 8 | +longer_names={'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', |
| 9 | + 'CYS': 'C', 'GLU': 'E', 'GLN': 'Q', 'GLY': 'G', |
| 10 | + 'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K', |
| 11 | + 'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S', |
| 12 | + 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V'} |
| 13 | + |
| 14 | +short_to_long = {} |
| 15 | +for rsd in longer_names.keys():short_to_long[longer_names[rsd]] = rsd |
| 16 | + |
| 17 | + |
| 18 | +HP = {'I': 0.73, 'F': 0.61, 'V': 0.54, 'L': 0.53, 'W': 0.37, |
| 19 | + 'M': 0.26, 'A': 0.25, 'G': 0.16, 'C': 0.04, 'Y': 0.02, |
| 20 | + 'P': -0.07, 'T': -0.18, 'S': -0.26, 'H': -0.40, 'E': -0.62, |
| 21 | + 'N': -0.64, 'Q': -0.69, 'D': -0.72, 'K': -1.10, 'R': -1.76} |
| 22 | + |
| 23 | +HP['X'] = sum(HP.values())/20. |
| 24 | + |
| 25 | +GES = {'F': -3.7, 'M': -3.4, 'I': -3.1, 'L': -2.8, 'V': -2.6, |
| 26 | + 'C': -2.0, 'W': -1.9, 'A': -1.6, 'T': -1.2, 'G': -1.0, |
| 27 | + 'S': -0.6, 'P': 0.2, 'Y': 0.7, 'H': 3.0, 'Q': 4.1, |
| 28 | + 'N': 4.8, 'E': 8.2, 'K': 8.8, 'D': 9.2, 'R': 12.3} |
| 29 | + |
| 30 | +GES['X'] = sum(GES.values())/20. |
| 31 | + |
| 32 | +## KD values (Kyte-Doolittle) taken from http://web.expasy.org/protscale/pscale/Hphob.Doolittle.html |
| 33 | + |
| 34 | +KD = {'A': 1.8, 'C': 2.5, 'E': -3.5, 'D': -3.5, 'G': -0.4, 'F': 2.8, 'I': 4.5, 'H': -3.2, 'K': -3.9, 'M': 1.9, 'L': 3.8, 'N': -3.5, 'Q': -3.5, 'P': -1.6, 'S': -0.8, 'R': -4.5, 'T': -0.7, 'W': -0.9, 'V': 4.2, 'Y': -1.3} |
| 35 | +assert len(KD) == 20 |
| 36 | + |
| 37 | +aa_charge = {} |
| 38 | +for a in amino_acids: aa_charge[a] = 0.0 |
| 39 | +aa_charge['K'] = 1.0 |
| 40 | +aa_charge['R'] = 1.0 |
| 41 | +aa_charge['D'] = -1.0 |
| 42 | +aa_charge['E'] = -1.0 |
| 43 | +aa_charge['X'] = 0.0 |
| 44 | + |
| 45 | + |
| 46 | +if __name__ == '__main__': |
| 47 | + |
| 48 | + ## covariation between different HP scales |
| 49 | + from scipy import stats |
| 50 | + |
| 51 | + ges = [ -1*GES[x] for x in amino_acids ] |
| 52 | + kd = [ -1*KD[x] for x in amino_acids ] |
| 53 | + hp = [ -1*HP[x] for x in amino_acids ] |
| 54 | + |
| 55 | + slope, intercept, r_ges_kd, p_value, std_err = stats.linregress(ges,kd) |
| 56 | + slope, intercept, r_ges_hp, p_value, std_err = stats.linregress(ges,hp) |
| 57 | + slope, intercept, r_kd_hp, p_value, std_err = stats.linregress(kd,hp) |
| 58 | + |
| 59 | + print 'r_ges_kd:',r_ges_kd |
| 60 | + print 'r_ges_hp:',r_ges_hp |
| 61 | + print 'r_kd_hp:',r_kd_hp |
| 62 | + |
0 commit comments