Commit c63cd1ff authored by Dr. Daniel Diaz Sánchez's avatar Dr. Daniel Diaz Sánchez
Browse files

Upload New File

parent 26930664
Pipeline #49 canceled with stages
package cdist;
import java.util.Arrays;
import java.util.Iterator;
import scala.Tuple2;
/* This example has been taken from Oreally examples */
public class JavaWordCount {
public static void main(String[] args) throws Exception {
// Set the input and output default files
// it can be a file from hdfs -> hdfs://server:port/path
// or a local file file:///path
// or a relative local file "fileName" under the working directory (ie. out)
String inputFile = "file:///var/home/lab/asig/labgcd/workspace-cdist-spark-and-streaming/spark-aptel/in.txt";
String outputFile = "out";
// let the user add params optionally to define input and output file
if(args.length > 2)
inputFile = args[0];
outputFile = args[1];
// Create a Java Spark Context, for the application with name "wordCount", use a local cluster
// (for using a existing cluster just substitute "local" with the name of the machine
JavaSparkContext sc = new JavaSparkContext(
"local", "wordcount", System.getenv("SPARK_HOME"), System.getenv("JARS"));
// Load our input data.
// will create an inmutable (RDD) set of strings (one per line)
JavaRDD<String> input = sc.textFile(inputFile);
// Split up into words.
// make a map (line -> words in that line) and make it flat (so a sequence of words irrespectively of their line)
JavaRDD<String> words = input.flatMap(x -> Arrays.asList(x.split(" ")).iterator());
// Transform into word and count.
// associate 1 per word
// and then reduce by adding all the numbers per word (key)
JavaPairRDD<String, Integer> counts = words.mapToPair(s -> new Tuple2<String, Integer>(s, 1))
.reduceByKey((x,y) -> x+y);
// Save the word count back out to a text file, causing evaluation.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment