# Copyright (C) 2016 The HDF Group # All rights reserved # # This example code illustrates how to access HDF4 data with Apache SparkR. # # If you have any questions, suggestions, or comments on this example, please # use the HDF-EOS Forum (http://hdfeos.org/forums). If you would like to see an # example of any other NASA HDF/HDF-EOS data product, feel free to contact us # at eoshelp@hdfgroup.org or post it at the HDF-EOS Forum # (http://hdfeos.org/forums). # # Usage: spark-2.0.0-bin-hadoop2.7/bin/spark-submit h4spark.R # # Last updated: 2016-09-14 library("SparkR") sparkR.session() # This assumes that HDF4-driver-enabled GDAL is installed under /usr/local/ # as well as PROJ.4 library/include header. install.packages("rgdal", type="source", repos='http://cran.us.r-project.org') install.packages("gdalUtils", repos='http://cran.us.r-project.org') install.packages("raster", repos='http://cran.us.r-project.org') library('rgdal') library('gdalUtils') library('raster') # Make sure that GDAL version matches your GDAL under /usr/local/. getGDALVersionInfo() # Make sure that HDF gdal driver appears. gdalDrivers() # Change HDF file name. sds <- get_subdatasets('/scr/data/NASA/HDF4/HDF-EOS2/AQUA/AIRS/Grid/AIRS.2002.08.24.L3.RetStd_H008.v4.0.21.0.G06104133343.hdf') # Change dataset by replacing 1 with other number. r <- raster(sds[1]) m = as.matrix(r) # Subset tmp = data.frame(m[1:4, 1:4]) df <- as.DataFrame(data.frame(tmp)) head(df) createOrReplaceTempView(df, "view") query <- sql("SELECT X1 FROM view WHERE X1 > 1") head(query)