java - Using MulipleInputs in Mapreduce programs gives an error -


i using multipleinputs in mapreduce program , running locally in intellij. getting error :

exception in thread "main" java.lang.noclassdeffounderror: org/apache/hadoop/mapreduce/lib/input/multipleinputs     @ plink.plinkdriver.run(plinkdriver.java:49)     @ org.apache.hadoop.util.toolrunner.run(toolrunner.java:65)     @ org.apache.hadoop.util.toolrunner.run(toolrunner.java:79)     @ plink.plinkdriver.main(plinkdriver.java:71)     @ sun.reflect.nativemethodaccessorimpl.invoke0(native method)     @ sun.reflect.nativemethodaccessorimpl.invoke(nativemethodaccessorimpl.java:57)     @ sun.reflect.delegatingmethodaccessorimpl.invoke(delegatingmethodaccessorimpl.java:43)     @ java.lang.reflect.method.invoke(method.java:606)     @ com.intellij.rt.execution.application.appmain.main(appmain.java:140) caused by: java.lang.classnotfoundexception: org.apache.hadoop.mapreduce.lib.input.multipleinputs     @ java.net.urlclassloader$1.run(urlclassloader.java:366)     @ java.net.urlclassloader$1.run(urlclassloader.java:355)     @ java.security.accesscontroller.doprivileged(native method)     @ java.net.urlclassloader.findclass(urlclassloader.java:354)     @ java.lang.classloader.loadclass(classloader.java:425)     @ sun.misc.launcher$appclassloader.loadclass(launcher.java:308)     @ java.lang.classloader.loadclass(classloader.java:358)     ... 9 more  process finished exit code 1 

the code driver class below:

import org.apache.hadoop.conf.configured; import org.apache.hadoop.fs.path; import org.apache.hadoop.io.text; import org.apache.hadoop.mapreduce.job; import org.apache.hadoop.mapreduce.lib.input.multipleinputs; import org.apache.hadoop.mapreduce.lib.input.textinputformat; import org.apache.hadoop.mapreduce.lib.output.fileoutputformat; import org.apache.hadoop.mapreduce.lib.output.textoutputformat; import org.apache.hadoop.util.tool; import org.apache.hadoop.util.toolrunner; /**  * created sai bharath on 7/15/2015.  */ public class plinkdriver extends configured implements tool {      @override     public int run(string[] args) throws exception {           if (args.length < 3) {             system.err.printf("usage: %s [generic options] <input> <output>\n",                     getclass().getsimplename());             toolrunner.printgenericcommandusage(system.err);             return -1;         }          job job = new job();         job.setjarbyclass(plinkdriver.class);         multipleinputs.addinputpath(job, new path(args[0]),textinputformat.class,plinkmapper.class);         multipleinputs.addinputpath(job, new path(args[1]),textinputformat.class,plinkmapper2.class);         fileoutputformat.setoutputpath(job, new path(args[2]));         job.setoutputformatclass(textoutputformat.class);         job.setmapoutputkeyclass(text.class);         job.setmapoutputvalueclass(text.class);         job.setreducerclass(plinkreducer.class);         job.setoutputkeyclass(text.class);         job.setoutputvalueclass(text.class);        return job.waitforcompletion(true) ? 0 : 1;     }      public static void main(string[] args) throws exception {         int exitcode = toolrunner.run(new plinkdriver(), args);         system.exit(exitcode);     } } 

the pom.xml using :

<?xml version="1.0" encoding="utf-8"?> <project xmlns="http://maven.apache.org/pom/4.0.0"          xmlns:xsi="http://www.w3.org/2001/xmlschema-instance"          xsi:schemalocation="http://maven.apache.org/pom/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">     <modelversion>4.0.0</modelversion>      <groupid>mapreduce</groupid>     <artifactid>mapreduce</artifactid>     <version>1.0-snapshot</version>      <properties>         <jdklevel>1.7</jdklevel>         <requiredmavenversion>[3.3,)</requiredmavenversion>         <project.build.sourceencoding>utf-8</project.build.sourceencoding>         <project.build.outputencoding>utf-8</project.build.outputencoding>      </properties>      <distributionmanagement>         <repository>             <id>code-artifacts</id>             <url>                 http://code/artifacts/content/repositories/releases             </url>         </repository>         <snapshotrepository>             <id>code-artifacts</id>             <url>                 http://code/artifacts/content/repositories/snapshots             </url>         </snapshotrepository>     </distributionmanagement>      <build>         <plugins>             <plugin>                 <groupid>org.apache.maven.plugins</groupid>                 <artifactid>maven-surefire-plugin</artifactid>                 <version>2.18.1</version>                 <configuration>                     <skiptests>true</skiptests>                 </configuration>             </plugin>              <plugin>                 <groupid>org.apache.maven.plugins</groupid>                 <artifactid>maven-compiler-plugin</artifactid>                 <version>3.3</version>                 <configuration>                     <source>${jdklevel}</source>                     <target>${jdklevel}</target>                     <showdeprecation>true</showdeprecation>                     <showwarnings>true</showwarnings>                 </configuration>                 <dependencies>                     <dependency>                         <groupid>org.codehaus.groovy</groupid>                         <artifactid>groovy-eclipse-compiler</artifactid>                         <version>2.9.2-01</version>                     </dependency>                      <dependency>                         <groupid>org.codehaus.groovy</groupid>                         <artifactid>groovy-eclipse-batch</artifactid>                         <version>2.4.3-01</version>                     </dependency>                 </dependencies>             </plugin>             <plugin>                 <artifactid>maven-dependency-plugin</artifactid>                 <executions>                     <execution>                         <phase>package</phase>                         <goals>                             <goal>copy-dependencies</goal>                         </goals>                         <configuration>                             <outputdirectory>${project.build.directory}/lib</outputdirectory>                              <includescope>provided</includescope>                         </configuration>                     </execution>                 </executions>             </plugin>          </plugins>     </build>      <repositories>         <repository>             <releases>                 <enabled>true</enabled>                 <updatepolicy>always</updatepolicy>                 <checksumpolicy>warn</checksumpolicy>             </releases>             <snapshots>                 <enabled>false</enabled>                 <updatepolicy>never</updatepolicy>                 <checksumpolicy>fail</checksumpolicy>             </snapshots>             <id>hdpreleases</id>             <name>hdp releases</name>             <url>http://repo.hortonworks.com/content/repositories/releases/</url>             <layout>default</layout>         </repository>     </repositories>      <dependencies>         <dependency>             <groupid>commons-logging</groupid>             <artifactid>commons-logging</artifactid>             <version>1.2</version>         </dependency>          <dependency>         <groupid>org.apache.hadoop</groupid>         <artifactid>hadoop-client</artifactid>         <version>2.6.0</version>        <scope>provided</scope>     </dependency>         <dependency>             <groupid>org.apache.hadoop</groupid>             <artifactid>hadoop-core</artifactid>             <version>0.20.2</version>         </dependency>         <dependency>             <groupid>log4j</groupid>             <artifactid>log4j</artifactid>             <version>1.2.17</version>         </dependency>          <dependency>             <groupid>org.slf4j</groupid>             <artifactid>slf4j-api</artifactid>             <version>1.7.5</version>         </dependency>         <dependency>             <groupid>org.testng</groupid>             <artifactid>testng</artifactid>             <version>6.8.7</version>         </dependency>         <dependency>             <groupid>org.apache.mrunit</groupid>             <artifactid>mrunit</artifactid>             <version>1.0.0</version>             <classifier>hadoop2</classifier>         </dependency>         <dependency>             <groupid>org.mockito</groupid>             <artifactid>mockito-core</artifactid>             <version>1.9.5</version>             <scope>test</scope>         </dependency>         <dependency>             <groupid>commons-cli</groupid>             <artifactid>commons-cli</artifactid>             <version>1.2</version>         </dependency>     </dependencies>  </project> 

can me out issue?

thanks. bharath

the multipleinputs class contained in hadoop-mapreduce-client-core artifact. project has provided dependency on hadoop-client. hadoop-client artifact thin wrapper brings in multiple other hadoop artifacts via transitive dependency.

https://github.com/apache/hadoop/blob/release-2.6.0/hadoop-client/pom.xml

based on this, recommend making following changes:

  1. remove <excludetransitive>true</excludetransitive>. hadoop-client dependency alone pom.xml, , transitive dependency resolution required reach hadoop-mapreduce-client-core , other hadoop artifacts.
  2. change <includescope>runtime</includescope> <includescope>provided</includescope> match dependency scope used in <dependency> section.

the maven-dependency-plugin configuration copying dependencies target/lib directory. assume intellij project set add jars directory classpath.


Comments