# Copyright (C) 2016 The HDF Group # All rights reserved # # This example code illustrates how to access HDF5 data with Apache SparkR. # # If you have any questions, suggestions, or comments on this example, please # use the HDF-EOS Forum (http://hdfeos.org/forums). If you would like to see an # example of any other NASA HDF/HDF-EOS data product, feel free to contact us # at eoshelp@hdfgroup.org or post it at the HDF-EOS Forum # (http://hdfeos.org/forums). # # Usage: spark-2.0.0-bin-hadoop2.7/bin/spark-submit h5spark.R # library("SparkR") sparkR.session() source("http://bioconductor.org/biocLite.R") biocLite("rhdf5") library("rhdf5") # Change file name. fname = '/scr/GSSTF_NCEP.3/1987/GSSTF_NCEP.3.1987.07.01.he5' h5ls(fname) # Change dataset name. tmp <- h5read(fname, "/HDFEOS/GRIDS/NCEP/Data Fields/SST") df <- as.DataFrame(data.frame(tmp)) head(df) createOrReplaceTempView(df, "view") query <- sql("SELECT X440 FROM view WHERE X440 > 26.41") head(query)