Reading an HDF file with C, FORTRAN, Python, IDL, MATLAB and R

From Wicare

Jump to: navigation, search


ICARE HDF reader

ICARE has developed a package of libraries written in Fortran77, Fortran90, C, Python, IDL, MATLAB and R.

They mainly contain functions for :

  • obtaining informations about the structure of an HDF file
  • extracting SDS data
  • reading SDS and file attributes
  • calibrating data


Sources can be downloaded here :



Packages can be browsed here :




See the the README files of the desired language for a detailed description of the way to use the library Examples

In all languages directories, some real examples of usage of the library are available in the ./example directory. This is the place to begin for a quick start




# -*- coding: utf-8 -*-
from pyhdf import SD
# HDF file and SDS names
FILE_NAME = "MYD04_L2.A2013060.1300.051.2013062021359.hdf"
SDS_NAME  = "Optical_Depth_Land_And_Ocean"
# open the hdf file
# select and read the sds data
sds =
data = sds.get()
# get dataset dimensions
nrows, ncols = data.shape  # data.shape: (3712, 3712) in the SEVIRI AER-OC example, 203
print data.shape # data.shape: (3712, 3712) in the SEVIRI AER-OC example, (203, 135)
i=200 # row index
j=125 # col index
print data[200,125]
# Terminate access to the data set
# Terminate access to the SD interface and close the file


#include "mfhdf.h"
const char file_name[100]="MYD04_L2.A2013060.1300.051.2013062021359.hdf";
char sds_name[100]="Optical_Depth_Land_And_Ocean";
main( ) {
 /************************* Variable declaration **************************/
  int32 sd_id, sds_id, sds_index, rank, data_type, n_attrs;
  int32 dim_sizes[2];
  int32 start[2];
  int32 stride[2];
  int32 edges[2];
  int32 nrows, ncols;
  void *data;
  int i, j, k;
  /********************* End of variable declaration ***********************/
  /* Open the file */
  sd_id = SDstart (file_name, DFACC_READ);
  /* Get the index of the given data set SDS_NAME */
  sds_index = SDnametoindex(sd_id, sds_name);
  sds_id = SDselect (sd_id, sds_index);
  /* Get the name, rank, dimension sizes, data type and number of attributes for a data set  */
  SDgetinfo(sds_id, sds_name, &rank, dim_sizes, &data_type, &n_attrs);
  nrows = dim_sizes[0];
  ncols = dim_sizes[1];
  start[0] = 0;  /* index of first row to read */
  start[1] = 0;  /* index of first column to read */
  edges[0] = dim_sizes[0];    /* the number of rows to read */
  edges[1] = dim_sizes[1];    /* the number of cols to read */
  stride[0] = 1;
  stride[1] = 1;
  data = (void *)malloc(nrows*ncols*DFKNTsize(data_type));
  /* Read entire data into data array  
  The array stride (i.e. step) specifies the reading pattern along each dimension. For example, if one of the elements of the array stride is 1, then every element along the corresponding dimension of the data set will be read. If one of the elements of the array stride is 2, then every other element along the corresponding dimension of the data set will be read, and so on. Specifying stride value of NULL in the C interface or setting all values of the array stride to 1 in either interface (C or FORTRAN) specifies the contiguous reading of data. 
  SDreaddata (sds_id, start, stride, edges, (VOIDP)data);
  i=200;   // row index
  j=125;   // col index
  k = i*ncols + j;
  printf ("%d\n ", ((int16*) data)[k]);
  /* Terminate access to the data set. */
  SDendaccess (sds_id);
  /* Terminate access to the SD interface and close the file. */
  SDend (sd_id);


      program read_data
      implicit none
C Parameter declaration
      integer DFACC_READ, DFNT_INT32
      parameter(DFACC_READ = 1, DFNT_INT32 = 24)
      integer MAXDIM
C Function declaration
      integer sfstart, sfn2index, sfselect, sfginfo, sfrdata
      integer sfendacc, sfend
C ************************** Variable declaration
C **************************
      character*100 sds_name
      character*100 file_name
      integer sd_id, sds_id, sds_index, status
      integer rank, data_type, n_attrs
      integer dim_sizes(32), start(32), edges(32), stride(32)
      integer*2 data(MAXDIM)
      integer nrows, ncols, i, j, k
C ************************** End of variable declaration
C **************************
C Open the file
      sd_id = sfstart(file_name, DFACC_READ)
C Get the index of the given data set SDS_NAME
      sds_index = sfn2index(sd_id, sds_name)
      sds_id = sfselect(sd_id, sds_index)
C Get the name, rank, dimension sizes, data type and number of 
C attributes for a data set
      print*, sds_id
      status = sfginfo(sds_id,sds_name,rank,dim_sizes,data_type,n_attrs)
      nrows = dim_sizes(2)
      ncols = dim_sizes(1)
C Define the location, pattern, and size of the data set
      start(1) = 0  ! index of first row to read 
      start(2) = 0  ! index of first column to read
      edges(1) = dim_sizes(1)    ! the number of cols to read
      edges(2) = dim_sizes(2)    ! the number of rows to read
      stride(1) = 1 ! to read entire data
      stride(2) = 1
C Read entire data into data array. The array stride (i.e. step) specifies 
C the reading pattern along each dimension. 
C The sfrdata routine reads numeric scientific data and sfrcdata reads 
C character scientific data
      status = sfrdata(sds_id, start, stride, edges, data)
      i=201  ! row index
      j=126  ! col index
      k = (i-1)*ncols + j
      print*, data(k)
C Terminate access to the data set
      status = sfendacc(sds_id)
C Terminate access to the SD interface and close the file
      status = sfend(sd_id)


PRO read_hdf
FILE_NAME = "MYD04_L2.A2013060.1300.051.2013062021359.hdf"   ; nrows=203, ncols=135
SDS_NAME = "Optical_Depth_Land_And_Ocean"
; Open the file
sd_id = HDF_SD_START(FILE_NAME, /read)
; Find the index of the sds to read using its name
sds_index = HDF_SD_NAMETOINDEX(sd_id, SDS_NAME)
; Select it
sds_id = HDF_SD_SELECT(sd_id, sds_index)
; Get data set information including dimension information
HDF_SD_GetInfo, sds_id, name = SDS_NAME, natts = num_attributes, ndim = num_dims, dims = dim_sizes
nrows = dim_sizes[1]
ncols = dim_sizes[0]
; Define subset to read. start is [0,0].
start = INTARR(2) ; the start position of the data to be read
start[0] = 0
start[1] = 0
edges = INTARR(2) ; the number of elements to read in each dimension
edges[0] = dim_sizes[0]
edges[1] = dim_sizes[1]
; Read the data : you can notice that here, it is not needed to allocate the data array yourself
HDF_SD_GETDATA, sds_id, data, start = start, count = edges
i=200 ; row index
j=125 ; col index
PRINT, FORMAT = '(I," ",$)', data[j,i]    ; 65
; end access to SDS
; close the hdf file
HDF_SD_END, sd_id


file_name = 'MYD04_L2.A2013060.1300.051.2013062021359.hdf';
sds_name = 'Optical_Depth_Land_And_Ocean' ;
% Open a buffer for hdf file
sd_id = hdfsd( 'start', file_name, 'rdonly') 
% Get the number of datasets and global attributes
[num_datasets, num_global_attr, status] = hdfsd('fileinfo',sd_id) 
% Get the sds identifier from the dataset named sds_name
sds_index  = hdfsd('nametoindex', sd_id, sds_name) 
sds_id = hdfsd('select', sd_id, sds_index)
% Get the name, number of dimensions, size of each dimension, data type and number of 
%attributes of a dataset
[sds_name, sds_num_dim, sds_dim, sds_data_type, sds_num_attr] = hdfsd('getinfo', sds_id) 
% Read the dataset identified by sds_id
sds_start_vector = zeros(1,  sds_num_dim) %the position to begin reading
sds_stride = [] %the interval between value to read
sds_end_vector = sds_dim %the end of dimension to read
[sds_data, status] = hdfsd('readdata', sds_id, sds_start, sds_stride,  sds_edges) ;
% Read the dataset named sds_name
data = hdfread (file_name, sds_name) ;
i = 200 ; %row index
j = 125 ; %col index
disp([data(i, j)]) ;
% Or, 
disp([sds_data(j, i)]) ;
% Close access to the dataset
status = hdfsd('endaccess',sds_id);
% Close access to the hdf file
status = hdfsd('end',sd_id);



program sds_info
use hdf5
implicit none
! Variables declaration
  CHARACTER*100 :: file_name
  CHARACTER*100 :: sds_name
  CHARACTER*100 :: gr_name
  CHARACTER*100 :: attr_name
  INTEGER(HID_T):: file_id, gr_id, dset_id, attr_id
  INTEGER :: status, error, storage, nlinks,max_corder, attr_num
  REAL, DIMENSION(10,10) :: dset_data, data_out
  INTEGER, DIMENSION(1:10)   ::    buf
  INTEGER(HSIZE_T), DIMENSION(2)::  data_dims
! Variables initalization
  file_name = "example.h5"
  sds_name = "/g1/g1.1/dset1.1.1"
  gr_name = "g3"
  attr_name = "attr1"
! Initialize the interface
  call h5open_f(status)
! Open an hdf5 file
  call h5fopen_f (file_name, H5F_ACC_RDWR_F, file_id, status)
! Get the number of global attributes
  call h5aget_num_attrs_f(file_id, attr_num, error)
  print *, "attr_num ",attr_num
! Open a group
  call h5gopen_f(file_id, gr_name, gr_id, status )
! Get information about a group
! **storage: type of storage for links in the group: Compact storage, Indexed storage or Symbol tables
! **nlinks : number of links in the group
! **max_corder : current maximum creation order value for group
  call h5gget_info_f(gr_id, storage, nlinks,max_corder, error)
  print*,"storage, nlinks,max_corder", storage, nlinks,max_corder
! Open a dataset
  call h5dopen_f(file_id, sds_name, dset_id, error)
! Get the number of attributes
  call h5aget_num_attrs_f(dset_id, attr_num, error)
  print *, "attr_num ",attr_num
! Read the dataset
  call h5dread_f(dset_id, H5T_NATIVE_REAL, data_out, data_dims, error)
  print *, "data_out ",data_out(2,2)
! Open an attribute
  call h5aopen_f(file_id, attr_name, attr_id, error)
! Read an attribute
  call h5aread_f(attr_id, H5T_NATIVE_INTEGER, buf, dims, error)
  print *, "buf ", buf
! Terminate access to the group
  call h5gclose_f(gr_id, error)
! Terminate access to the dataset
  call h5dclose_f(dset_id, error)
! Terminate access to the file
  call h5fclose_f(file_id, error)
! Close FORTRAN interface.
  call h5close_f(status)
end program sds_info


file_name = '/matlab/R2012a/toolbox/matlab/demos/example.h5';
sds_name = '/g4/lon' ;
% Open  hdf file in read-only mode
sd_id =;
% Get file info by file id
file_info = H5F.get_info(sd_id) 
% Get global info of the file
info = h5info(file_name)
% Display the structure of the file
% Open  a group
g_id =,'/g2');
% Get group info
group_info = H5F.get_info(g_id) 
% Open sds by name 
sds_id =,sds_name);
% Get global info of a sds
sds_info = h5info(file_name,'/g4/time')
% Display the structure of a sds
h5disp(file_name, sds_name);
% Get attributes info of a sds
attval = h5readatt(file_name, '/g4/lon','units')
% Read all data of a sds
 data =
% Read the first 5-by-3 subset
sds_data= h5read(file_name, '/g4/world', [1 1], [5 3]) ;
%Close sds access
%Close group access
% Close access to the hdf file


#To install the package rhdf5 , you need a current version (>2.15.0) of R
#After installing R you can run the following commands from the R command shell to install the bioconductor package rhdf5.
filename = "/home/Projets/R/example.h5"
# Read data of all datasets of a group
# Read a dataset
# Display the contents of main groups
h5ls(filename, FALSE)
# Open a hdf5 file
fid <-H5Fopen(filename)
# Get the number of global attributes
# Open the object "g2"
gid <- H5Oopen(fid, "g2")
# Get group info
H5Gget_info(gid) #or H5Gget_info_by_name(fid, "g2")
# Get the number of group attributes
H5Oget_num_attrs(gid) # or H5Oget_num_attrs("g2")
# Open a dataset
did <- H5Dopen(fid, "g2/dset2.2")
# Get dataspace
dataspace <- H5Dget_space(did)
# Close dataspace access
#Close dataset access
# Close group access
# Close hdf5 file access

Personal tools
June 2017