#  hadoop helloworld on Mac OS
## install maven
* brew install maven
## create a java project
* mvn archetype:generate
>     Choose a number or apply filter (format: [groupId:]artifactId, case sensitive contains): 436:
>     Choose org.apache.maven.archetypes:maven-archetype-quickstart version:
>     1: 1.0-alpha-1
>     2: 1.0-alpha-2
>     3: 1.0-alpha-3
>     4: 1.0-alpha-4
>     5: 1.0
>     6: 1.1
>     Choose a number: 6:
>     Define value for property 'groupId': : helloworld
>     Define value for property 'artifactId': : hadoop
>     Define value for property 'version':  1.0-SNAPSHOT: :
>     Define value for property 'package':  helloworld: :
>     Confirm properties configuration:
>     groupId: helloworld
>     artifactId: hadoop
>     version: 1.0-SNAPSHOT
>     package: helloworld
>      Y: :
>      [INFO] ----------------------------------------------------------------------------
>     [INFO] Using following parameters for creating project from Old (1.x) Archetype: maven-archetype-quickstart:1.1
>     [INFO] ----------------------------------------------------------------------------
>     [INFO] Parameter: groupId, Value: helloworld
>     [INFO] Parameter: packageName, Value: helloworld
>     [INFO] Parameter: package, Value: helloworld
>     [INFO] Parameter: artifactId, Value: hadoop
>     [INFO] Parameter: basedir, Value: /Users/username/Documents/src
>     [INFO] Parameter: version, Value: 1.0-SNAPSHOT
>     [INFO] project created from Old (1.x) Archetype in dir: /Users/username/Documents/src/hadoop
>     [INFO] ------------------------------------------------------------------------
>     [INFO] BUILD SUCCESS
>     [INFO] ------------------------------------------------------------------------
>     [INFO] Total time: 01:26 min
>     [INFO] Finished at: 2014-07-01T19:55:19+09:00
>     [INFO] Final Memory: 10M/81M
>     [INFO] ------------------------------------------------------------------------
## write map reduce source
* src/main/java/helloworld/Map.java (Mapper Implementation)
>     package helloworld;
>     import java.io.IOException;
>     import java.util.StringTokenizer;
>     import org.apache.hadoop.io.IntWritable;
>     import org.apache.hadoop.io.LongWritable;
>     import org.apache.hadoop.io.Text;
>     import org.apache.hadoop.mapred.MapReduceBase;
>     import org.apache.hadoop.mapred.Mapper;
>     import org.apache.hadoop.mapred.OutputCollector;
>     import org.apache.hadoop.mapred.Reporter;
>     public class Map extends MapReduceBase implements Mapper
>     {
>      private final static IntWritable one = new IntWritable(1);
>     private Text word = new Text();
>     public void map(LongWritable key, Text value, OutputCollector
>      throws IOException
>      {
>      String line = value.toString();
>      StringTokenizer tokenizer = new StringTokenizer(line);
>      while (tokenizer.hasMoreTokens())
>      {
>      word.set(tokenizer.nextToken());
>      output.collect(word, one);
>      }
>      }
>     }
* src/main/java/helloworld/Reduce.java (Reducer Implementation)
>     package helloworld;
>     import java.io.IOException;
>     import java.util.Iterator;
>     import org.apache.hadoop.io.IntWritable;
>     import org.apache.hadoop.io.Text;
>     import org.apache.hadoop.mapred.MapReduceBase;
>     import org.apache.hadoop.mapred.OutputCollector;
>     import org.apache.hadoop.mapred.Reducer;
>     import org.apache.hadoop.mapred.Reporter;
>     public class Reduce extends MapReduceBase implements Reducer
>     {
>      public void reduce(Text key, Iterator
>      Reporter reporter) throws IOException
>      {
>      int sum = 0;
>      while (values.hasNext())
>      {
>      sum += values.next().get();
>      }
>      output.collect(key, new IntWritable(sum));
>      }
>     }
* src/main/java/helloworld/App.java (Job)
>     package helloworld;
>     import org.apache.hadoop.fs.Path;
>     import org.apache.hadoop.io.IntWritable;
>     import org.apache.hadoop.io.Text;
>     import org.apache.hadoop.mapred.FileInputFormat;
>     import org.apache.hadoop.mapred.FileOutputFormat;
>     import org.apache.hadoop.mapred.JobClient;
>     import org.apache.hadoop.mapred.JobConf;
>     import org.apache.hadoop.mapred.TextInputFormat;
>     import org.apache.hadoop.mapred.TextOutputFormat;
>     /**
>      * Hello world!
>      *
>      */
>     public class App
>     {
>         public static void main( String[] args ) throws Exception
>         {
>             System.out.println( "Hello World!" );
>             JobConf conf = new JobConf(App.class);
>             conf.setJobName("wordcount");
>             conf.setOutputKeyClass(Text.class);
>             conf.setOutputValueClass(IntWritable.class);
>             conf.setMapperClass(Map.class);
>             conf.setCombinerClass(Reduce.class);
>             conf.setReducerClass(Reduce.class);
>             conf.setInputFormat(TextInputFormat.class);
>             conf.setOutputFormat(TextOutputFormat.class);
>             FileInputFormat.setInputPaths(conf, new Path(args[0]));
>             FileOutputFormat.setOutputPath(conf, new Path(args[1]));
>             JobClient.runJob(conf);
>         }
>     }
## compile and create Jar file
* mvn clean install
## run MapReduce job
* /usr/local/Cellar/hadoop/2.4.0/bin/hadoop jar target/hadoop-1.0-SNAPSHOT.jar helloworld.App input/*.xml output3
* hadoop dfs -ls output3
>     Found 2 items
>     -rw-r--r--   1 username supergroup          0 2014-07-01 22:23 output3/_SUCCESS
>     -rw-r--r--   1 username supergroup       5938 2014-07-01 22:23 output3/part-00000
[first hadoop](http://xamry.wordpress.com/2012/09/11/your-first-hadoop-map-reduce-job/)  
[maven create project](http://ameblo.jp/azuki-milk-lush/entry-10719771370.html)
Comments