Source code for cyclum.hdfrw

"""Read write HDF."""
import h5py
import pandas
import numpy
from typing import Type, Union, List


[docs]def hdf2mat(filepath: str, dtype: Type = float) -> pandas.DataFrame: """Read hdf generated by hdfrw.R mat2hdf function to a data frame. Note that due to how python and R handles data differently, colnames are for index and rownames are for columns, and the matrix is also tacitly transposed. :param filepath: path of hdf file :param dtype: type of data; default is float :return: a pandas data frame """ with h5py.File(filepath, 'r') as f: df = pandas.DataFrame(f['matrix'][:], dtype=dtype, copy=False) if 'colnames' in f.keys(): df.index = [x.decode() for x in f['colnames'][:]] if 'rownames' in f.keys(): df.columns = [x.decode() for x in f['rownames'][:]] return df
[docs]def mat2hdf(data: Union[pandas.DataFrame, numpy.array, List[str]], filepath: str) -> None: """Write dataframe to an hdf file which can be read by hdfrw.R hdf2mat function. :param data: data frame or numpy array to be written :param filepath: path of hdf file to be written :return: None """ with h5py.File(filepath, 'w') as f: if type(data) is pandas.DataFrame: f['matrix'] = data.values f['colnames'] = [x.encode('ASCII') for x in data.index.tolist()] f['rownames'] = [x.encode('ASCII') for x in data.columns.tolist()] elif type(data) is numpy.ndarray: f['matrix'] = data elif type(data) is list: f['matrix'] = [x.encode('ASCII') for x in data] else: raise TypeError("only pandas.DataFrame and numpy.ndarray are supported.")