hadoop helloworld on Mac OS

install maven

brew install maven

create a java project

mvn archetype:generate

Choose a number or apply filter (format: [groupId:]artifactId, case sensitive contains): 436:
Choose org.apache.maven.archetypes:maven-archetype-quickstart version:
1: 1.0-alpha-1
2: 1.0-alpha-2
3: 1.0-alpha-3
4: 1.0-alpha-4
5: 1.0
6: 1.1
Choose a number: 6:
Define value for property 'groupId': : helloworld
Define value for property 'artifactId': : hadoop
Define value for property 'version':  1.0-SNAPSHOT: :
Define value for property 'package':  helloworld: :
Confirm properties configuration:
groupId: helloworld
artifactId: hadoop
version: 1.0-SNAPSHOT
package: helloworld
 Y: :
 [INFO] ----------------------------------------------------------------------------
[INFO] Using following parameters for creating project from Old (1.x) Archetype: maven-archetype-quickstart:1.1
[INFO] ----------------------------------------------------------------------------
[INFO] Parameter: groupId, Value: helloworld
[INFO] Parameter: packageName, Value: helloworld
[INFO] Parameter: package, Value: helloworld
[INFO] Parameter: artifactId, Value: hadoop
[INFO] Parameter: basedir, Value: /Users/username/Documents/src
[INFO] Parameter: version, Value: 1.0-SNAPSHOT
[INFO] project created from Old (1.x) Archetype in dir: /Users/username/Documents/src/hadoop
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time: 01:26 min
[INFO] Finished at: 2014-07-01T19:55:19+09:00
[INFO] Final Memory: 10M/81M
[INFO] ------------------------------------------------------------------------</li>

write map reduce source

src/main/java/helloworld/Map.java (Mapper Implementation)

package helloworld;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
public class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable>
{
 private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
 throws IOException
 {
 String line = value.toString();
 StringTokenizer tokenizer = new StringTokenizer(line);
 while (tokenizer.hasMoreTokens())
 {
 word.set(tokenizer.nextToken());
 output.collect(word, one);
 }
 }
}

src/main/java/helloworld/Reduce.java (Reducer Implementation)

package helloworld;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable>
{
 public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output,
 Reporter reporter) throws IOException
 {
 int sum = 0;
 while (values.hasNext())
 {
 sum += values.next().get();
 }
 output.collect(key, new IntWritable(sum));
 }
}

src/main/java/helloworld/App.java (Job)

package helloworld;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
/**
 * Hello world!
 *
 */
public class App
{
    public static void main( String[] args ) throws Exception
    {
        System.out.println( "Hello World!" );
        JobConf conf = new JobConf(App.class);
        conf.setJobName("wordcount");
        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);
        conf.setMapperClass(Map.class);
        conf.setCombinerClass(Reduce.class);
        conf.setReducerClass(Reduce.class);
        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);
        FileInputFormat.setInputPaths(conf, new Path(args[0]));
        FileOutputFormat.setOutputPath(conf, new Path(args[1]));
        JobClient.runJob(conf);
    }
}

compile and create Jar file

mvn clean install

run MapReduce job

/usr/local/Cellar/hadoop/2.4.0/bin/hadoop jar target/hadoop-1.0-SNAPSHOT.jar helloworld.App input/*.xml output3

hadoop dfs -ls output3

Found 2 items
-rw-r--r--   1 username supergroup          0 2014-07-01 22:23 output3/_SUCCESS
-rw-r--r--   1 username supergroup       5938 2014-07-01 22:23 output3/part-00000</li>

first hadoop
maven create project

支持本站: 捐赠服务器等运维费用，需要您的支持！ 支持本站: 捐赠服务器等运维费用，需要您的支持！

自娱自乐航空米

hadoop helloworld on Mac OS

hadoop helloworld on Mac OS

install maven

create a java project

write map reduce source

compile and create Jar file

run MapReduce job

留言簿

hadoop helloworld on Mac OS

hadoop helloworld on Mac OS

install maven

create a java project

write map reduce source

compile and create Jar file

run MapReduce job

留言簿

支 持 本 站: 捐赠服务器等运维费用，需要您的支持！

支持本站: 捐赠服务器等运维费用，需要您的支持！