python 中 对比两序列,若字符一样在二维矩阵中将相应位置赋值为1

 

001、

>>> import openpyxl                                  ## 导入包
>>> import numpy as np
>>> import pandas as pd
>>> s1=list('ATGATAGCAGTGAAATGGG')                   ## 将序列储存为列表
>>> s1
['A', 'T', 'G', 'A', 'T', 'A', 'G', 'C', 'A', 'G', 'T', 'G', 'A', 'A', 'A', 'T', 'G', 'G', 'G']
>>> s2=list('GATAGCAGTGAAACGGGCA')
>>> s2
['G', 'A', 'T', 'A', 'G', 'C', 'A', 'G', 'T', 'G', 'A', 'A', 'A', 'C', 'G', 'G', 'G', 'C', 'A']
>>> len(s1); len(s2)
19
19
>>> compare = [[0]*19 for i in range(19)]                      ## 生成二维数组
>>> compare
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
>>> for i in range(19):
...     for j in range(19):
...             if s1[i] == s2[j]:
...                     compare[i][j] = 1
...
>>> compare
[[0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0], [0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0], [0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0], [0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1], [0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1], [0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0], [1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0], [1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0]]
>>> a = np.array(compare)                                    ##将数组转换为矩阵形式
>>> a
array([[0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1],
       [0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1],
       [0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1],
       [1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0],
       [0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1],
       [1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1],
       [0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1],
       [0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1],
       [0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0]])
>>> data_df = pd.DataFrame(a)                                        ## 转换为数据框
>>> data_df
    0   1   2   3   4   5   6   7   8   9   10  11  12  13  14  15  16  17  18
0    0   1   0   1   0   0   1   0   0   0   1   1   1   0   0   0   0   0   1
1    0   0   1   0   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0
2    1   0   0   0   1   0   0   1   0   1   0   0   0   0   1   1   1   0   0
3    0   1   0   1   0   0   1   0   0   0   1   1   1   0   0   0   0   0   1
4    0   0   1   0   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0
5    0   1   0   1   0   0   1   0   0   0   1   1   1   0   0   0   0   0   1
6    1   0   0   0   1   0   0   1   0   1   0   0   0   0   1   1   1   0   0
7    0   0   0   0   0   1   0   0   0   0   0   0   0   1   0   0   0   1   0
8    0   1   0   1   0   0   1   0   0   0   1   1   1   0   0   0   0   0   1
9    1   0   0   0   1   0   0   1   0   1   0   0   0   0   1   1   1   0   0
10   0   0   1   0   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0
11   1   0   0   0   1   0   0   1   0   1   0   0   0   0   1   1   1   0   0
12   0   1   0   1   0   0   1   0   0   0   1   1   1   0   0   0   0   0   1
13   0   1   0   1   0   0   1   0   0   0   1   1   1   0   0   0   0   0   1
14   0   1   0   1   0   0   1   0   0   0   1   1   1   0   0   0   0   0   1
15   0   0   1   0   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0
16   1   0   0   0   1   0   0   1   0   1   0   0   0   0   1   1   1   0   0
17   1   0   0   0   1   0   0   1   0   1   0   0   0   0   1   1   1   0   0
18   1   0   0   0   1   0   0   1   0   1   0   0   0   0   1   1   1   0   0
>>> data_df.columns = s2                                                ## 重命名行名和列名
>>> data_df.index = s1
>>> data_df
   G  A  T  A  G  C  A  G  T  G  A  A  A  C  G  G  G  C  A
A  0  1  0  1  0  0  1  0  0  0  1  1  1  0  0  0  0  0  1
T  0  0  1  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
G  1  0  0  0  1  0  0  1  0  1  0  0  0  0  1  1  1  0  0
A  0  1  0  1  0  0  1  0  0  0  1  1  1  0  0  0  0  0  1
T  0  0  1  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
A  0  1  0  1  0  0  1  0  0  0  1  1  1  0  0  0  0  0  1
G  1  0  0  0  1  0  0  1  0  1  0  0  0  0  1  1  1  0  0
C  0  0  0  0  0  1  0  0  0  0  0  0  0  1  0  0  0  1  0
A  0  1  0  1  0  0  1  0  0  0  1  1  1  0  0  0  0  0  1
G  1  0  0  0  1  0  0  1  0  1  0  0  0  0  1  1  1  0  0
T  0  0  1  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
G  1  0  0  0  1  0  0  1  0  1  0  0  0  0  1  1  1  0  0
A  0  1  0  1  0  0  1  0  0  0  1  1  1  0  0  0  0  0  1
A  0  1  0  1  0  0  1  0  0  0  1  1  1  0  0  0  0  0  1
A  0  1  0  1  0  0  1  0  0  0  1  1  1  0  0  0  0  0  1
T  0  0  1  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
G  1  0  0  0  1  0  0  1  0  1  0  0  0  0  1  1  1  0  0
G  1  0  0  0  1  0  0  1  0  1  0  0  0  0  1  1  1  0  0
G  1  0  0  0  1  0  0  1  0  1  0  0  0  0  1  1  1  0  0

>>> data_df.to_csv("xxx.csv")

 

(base) root@PC1:/home/test2# cat xxx.csv
,G,A,T,A,G,C,A,G,T,G,A,A,A,C,G,G,G,C,A
A,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,1
T,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
G,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0
A,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,1
T,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
A,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,1
G,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0
C,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0
A,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,1
G,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0
T,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
G,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0
A,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,1
A,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,1
A,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,1
T,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
G,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0
G,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0
G,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,1,0,0

 

 

 

参考:https://blog.csdn.net/m0_57099761/article/details/123464340

 

posted @ 2022-08-13 16:36  小鲨鱼2018  阅读(91)  评论(0编辑  收藏  举报