BIOM Table-codes

import numpy

from biom.table import Table

============================================================================================================

# 10*4 matrix, [0, 39]

data = numpy.arange(40).reshape(10, 4)

sample_ids = ['S%d' % i for i in range(4)]

observ_ids = ['O%d' % i for i in range(10)]

sample_metadata = [{'environment': 'A'}, {'environment': 'B'},

{'environment': 'A'}, {'environment': 'B'}]

observ_metadata = [{'taxonomy': ['Bacteria', 'Firmicutes']},

{'taxonomy': ['Bacteria', 'Firmicutes']},

{'taxonomy': ['Bacteria', 'Proteobacteria']},

{'taxonomy': ['Bacteria', 'Proteobacteria']},

{'taxonomy': ['Bacteria', 'Proteobacteria']},

{'taxonomy': ['Bacteria', 'Bacteroidetes']},

{'taxonomy': ['Bacteria', 'Bacteroidetes']},

{'taxonomy': ['Bacteria', 'Firmicutes']},

{'taxonomy': ['Bacteria', 'Firmicutes']},

{'taxonomy': ['Bacteria', 'Firmicutes']}]

# construct table

table = Table(data, observ_ids, sample_ids, observ_metadata, sample_metadata, table_id='myTestTable')

# use add_metadata. This is ADD, NOT CONSTRUCT!

#table.add_metadata(sample_metadata, axis='sample')

# print info of table

table

 

 

print(table)


# print column names

print(table.ids())

print(table.ids(axis='sample'))

# print row names

print(table.ids(axis='observation'))

# print number of non-zero entries. Now it’s 39.

print(table.nnz)

============================================================================================================

data = numpy.asarray([[2, 0], [6, 1]])

table = Table(data, ['O1', 'O2'], ['S1', 'S2'])

# normalize by ‘sample’(column)

new_table = table.norm(inplace=False)


# normalize by row

new_table = table.norm(axis='observation', inplace=False)

# if inplace=True, table will change too. Now it stay unchanged. If set table1 = table before norm, and change table1 now, then table will change, too(shallow copy).

============================================================================================================

# filter with a function

filter_f = lambda values, id_, md: md['environment'] == 'A'

env_a = normed.filter(filter_f, axis='sample', inplace=False)

============================================================================================================

# divide by 'environment'

part_f = lambda id_, md: md['environment']
env_tables = table.partition(part_f, axis='sample')

# make a sum
for partition, env_table in env_tables:

    print(partition, env_table.sum('sample'))

============================================================================================================

# add-metadata

============================================================================================================

biom convert -i table.biom -o table.from_biom_w_consensuslineage.txt --to-tsv --header-key taxonomy --output-metadata-id "ConsensusLineage"

# 1. convert .biom to .txt

# special header-key

biom convert -i otu_table.biom -o otu_table.txt --to-tsv --header-key taxonomy

# 2. fix in excel

# 3. convert back

biom convert -i otu_table.txt -o new_otu_table.biom --to-hdf5 --table-type="OTU table" --process-obs-metadata taxonomy

============================================================================================================

biom summarize-table -i INPUT.biom --qualitative -o OUTPUT.txt

posted @ 2017-02-10 16:21  陆离可  阅读(305)  评论(0编辑  收藏  举报