- Store large amounts of data, e.g. tick data
- Retrieve subsets of data into memory
- Programming language independent
- Minimal setup requirements, single client
- High performance
21 May 2016
f <- h5file('test.h5')
g1 <- f['group1']
g1['mat'] <- matrix(1:9, nrow = 3)
g1['mat2'] <- matrix(11:19, nrow = 3)
h5attr(g1, 'attr1') <- 'This is Group 1'
f['group2/mat3'] <- matrix(21:29, nrow = 3)
sapply(list.datasets(f), function(x) f[x][, 1])
## /group1/mat /group1/mat2 /group2/mat3 ## [1,] 1 11 21 ## [2,] 2 12 22 ## [3,] 3 13 23
h5close(f)
Python:
from pandas import date_range, DataFrame
from numpy import random
t = date_range('2010-01-01', '2016-01-01', freq='D').date
randmat = random.standard_normal((len(t), 3))
df = DataFrame(randmat, index=t)
df.to_hdf('ex-pandas.h5', 'testset')
R:
f <- h5file('ex-pandas.h5', 'r')
dates <- as.Date(f['testset/axis1'][1:3],
origin = '0001-01-01') - 1
zoo(f['testset/block0_values'][1:3, ], order.by=dates)