LogoΒΆ
import numpy as np
import pandas as pd
pfm = pd.DataFrame(np.random.randint(10,1000,(10,4)),columns=list('ACGT'))
pfm = [[0,0.8,0.2,0,],
[0,0.2,0.8,0,],
[0,0.8,0.2,0,],
[0.3,0.3,0.4,0,],
[0.2,0.4,0.3,0.1,],
[0.0,0.2,0.8,0.0,],
[0.0,0.8,0.2,0.0,],
[0.0,0.2,0.8,0.0,],]
pfm = pd.DataFrame(pfm,columns=list('ACGT'))
pfm
A | C | G | T | |
---|---|---|---|---|
0 | 0.0 | 0.8 | 0.2 | 0.0 |
1 | 0.0 | 0.2 | 0.8 | 0.0 |
2 | 0.0 | 0.8 | 0.2 | 0.0 |
3 | 0.3 | 0.3 | 0.4 | 0.0 |
4 | 0.2 | 0.4 | 0.3 | 0.1 |
5 | 0.0 | 0.2 | 0.8 | 0.0 |
6 | 0.0 | 0.8 | 0.2 | 0.0 |
7 | 0.0 | 0.2 | 0.8 | 0.0 |
import seqlogo
def logo(a):
pfm = a.copy()
a = pfm.sum(axis=1)
for allele in 'ACGT':
pfm[allele] = pfm[allele]/a
pwm = seqlogo.pfm2pwm(pfm.values)
pwm = seqlogo.Pwm(pwm)
ppm = seqlogo.pwm2ppm(pwm)
ppm = seqlogo.Ppm(ppm.values)
a = seqlogo.seqlogo(ppm, ic_scale = True, format = 'png', size = 'large')
cpm = seqlogo.CompletePm(ppm = ppm)
print(cpm.consensus)
return a
logo(pfm)
CGCGCGCG
logo(pfm[:4])
CGCG