Spring support for WebHDFS Spring support for WebHDFS hadoop hadoop

Spring support for WebHDFS


Yes, Spring Data supports this. According to this documentation, it's possible to configure any supported Hadoop file system:

http://docs.spring.io/spring-hadoop/docs/current/reference/html/fs.html

SHDP does not enforce any specific protocol to be used - in fact, as described in this section any FileSystem implementation can be used, allowing even other implementations than HDFS to be used.

See below for a code sample that demonstrates auto-wiring a WebHDFS FileSystem instance into a command-line application. To run this, pass file paths as command line arguments, and it will list every file present at that path by calling FileSystem.listStatus.

The code sample is configured to connect to an unsecured WebHDFS instance with "simple" authentication. To connect to a WebHDFS instance secured with Kerberos, you'd set up the relevant configuration properties in the <hdp:configuration id="hadoopConfiguration" /> bean. Hadoop security configuration is a very large topic. Rather than repeat the information, I'll just point to the documentation in Apache:

http://hadoop.apache.org/docs/r2.4.1/hadoop-project-dist/hadoop-common/SecureMode.html

pom.xml

<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">    <modelVersion>4.0.0</modelVersion>    <groupId>test-spring-hadoop</groupId>    <artifactId>test-webhdfs</artifactId>    <packaging>jar</packaging>    <version>0.0.1-SNAPSHOT</version>    <name>Test Spring Hadoop with WebHDFS</name>    <description>Test Spring Hadoop with WebHDFS</description>    <parent>        <groupId>org.springframework.boot</groupId>        <artifactId>spring-boot-starter-parent</artifactId>        <version>1.1.0.RELEASE</version>    </parent>    <repositories>        <repository>            <id>spring-milestones</id>            <url>http://repo.spring.io/libs-release</url>        </repository>    </repositories>    <properties>        <start-class>testwebhdfs.Main</start-class>        <java.version>1.6</java.version>        <hadoop.version>2.4.1</hadoop.version>    </properties>    <build>        <plugins>                        <plugin>                <groupId>org.springframework.boot</groupId>                <artifactId>spring-boot-maven-plugin</artifactId>            </plugin>        </plugins>    </build>    <dependencies>        <dependency>            <groupId>org.springframework.boot</groupId>            <artifactId>spring-boot-starter</artifactId>        </dependency>        <dependency>            <groupId>org.springframework.data</groupId>            <artifactId>spring-data-hadoop</artifactId>            <version>2.0.2.RELEASE</version>        </dependency>        <dependency>            <groupId>org.apache.hadoop</groupId>            <artifactId>hadoop-common</artifactId>            <version>${hadoop.version}</version>        </dependency>        <dependency>            <groupId>org.apache.hadoop</groupId>            <artifactId>hadoop-hdfs</artifactId>            <version>${hadoop.version}</version>        </dependency>    </dependencies></project>

src/main/resources/hadoop-context.xml

<?xml version="1.0" encoding="UTF-8"?><beans xmlns="http://www.springframework.org/schema/beans"       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"       xmlns:hdp="http://www.springframework.org/schema/hadoop"       xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd                   http://www.springframework.org/schema/hadoop http://www.springframework.org/schema/hadoop/spring-hadoop.xsd">    <hdp:configuration id="hadoopConfiguration" />    <hdp:file-system uri="webhdfs://localhost:50070" /></beans>

src/main/java/testwebhdfs/Main.java

package testwebhdfs;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.boot.CommandLineRunner;import org.springframework.boot.SpringApplication;import org.springframework.context.annotation.Configuration;import org.springframework.context.annotation.ImportResource;@Configuration@ImportResource("hadoop-context.xml")public class Main implements CommandLineRunner {    @Autowired    private FileSystem fs;    @Override    public void run(String... strings) throws Exception {        Path[] paths = new Path[strings.length];        for (int i = 0; i < strings.length; ++i) {            paths[i] = new Path(strings[i]);        }        for (FileStatus stat: fs.listStatus(paths)) {            System.out.println(stat.getPath());        }    }    public static void main(String[] args) {        SpringApplication.run(Main.class, args);    }}