Bulk Data Operations Demo

# HG changeset patch # User anazarov # Date 1391166085 -3600 # Node ID f72a8df6a2ed5fe076dfbb519545d81484ccf661 # Parent 9f098aed44c06ce09f8c7ab8a0f9f54c91ccba8b 8031650: Update bulk operation demo Reviewed-by: psandoz, mduigou diff -r 9f098aed44c0 -r f72a8df6a2ed src/share/sample/lambda/BulkDataOperations/index.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/sample/lambda/BulkDataOperations/index.html Fri Jan 31 12:01:25 2014 +0100 @@ -0,0 +1,49 @@ + + + + Bulk Data Operations Demo + + +

Bulk Data Operations Demo

+ +

+ This demo shows how to use bulk data operations with the new JDK8 + Collections API. + The demo also demonstrates new features of JDK8 such as lambda expressions + and method/constructor references. +

+ +

CSV Processor
+ +
+ Analyzes a CSV file, finds and collects useful information, computes + different statistics. For more information, see the source file. +
+ Source: src/CSVProcessor.java +
Grep
+ +
+ Behaves like the standard Linux tool Grep. For more information, see + the source file. +
+ Source: src/Grep.java +
PasswordGenerator
+ +
+ Produces a password of desired length. For more information see + source file. +
+ Source: src/PasswordGenerator.java +
WC
+ +
+ Counts newlines, words, characters, and the maximum line length of a + text file. For more information, see the source + file. +
+ Source: src/WC.java +

+ + \ No newline at end of file diff -r 9f098aed44c0 -r f72a8df6a2ed src/share/sample/lambda/BulkDataOperations/src/CSVProcessor.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/sample/lambda/BulkDataOperations/src/CSVProcessor.java Fri Jan 31 12:01:25 2014 +0100 @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of Oracle nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This source code is provided to illustrate the usage of a given feature + * or technique and has been deliberately simplified. Additional steps + * required for a production-quality application, such as security checks, + * input validation, and proper error handling, might not be present in + * this sample code. + */ + +import java.io.BufferedReader; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.*; +import java.util.function.*; +import java.util.regex.Pattern; +import java.util.stream.Collector; +import java.util.stream.Collectors; + +import static java.lang.Double.parseDouble; +import static java.util.stream.Collectors.*; + +/** + * CSVProcessor is a tool for processing CSV files. There are several + * command-line options. Consult the {@link #printUsageAndExit} method for + * instructions and command line parameters. This sample shows examples of the + * following features: + *

Lambda and bulk operations. Working with streams: map(...), filter(...), + * sorted(...) methods. The collect(...) method with different collectors: + * Collectors.maxBy(...), Collectors.minBy(...), Collectors.toList(), + * Collectors.toCollection(...), Collectors.groupingBy(...), + * Collectors.toDoubleSummaryStatistics(...), and a custom Collector.
Static method reference for printing values.
Try-with-resources feature for closing files.
Switch by String feature.
Other new APIs: Pattern.asPredicate(), BinaryOperator + * BufferedReader.lines(), Collection.forEach(...), Comparator.comparing(...), + * Comparator.reversed(), Arrays.stream(...).

+ * + */ +public class CSVProcessor { + + //Number of characters that may be read + private static final int READ_AHEAD_LIMIT = 100_000_000; + + /** + * The main method for the CSVProcessor program. Run the program with an + * empty argument list to see possible arguments. + * + * @param args the argument list for CSVProcessor. + */ + public static void main(String[] args) { + if (args.length < 2) { + printUsageAndExit(); + } + try (BufferedReader br = new BufferedReader( + Files.newBufferedReader(Paths.get(args[args.length - 1])))) { + //Assume that the first line contains column names. + List header = Arrays.stream(br.readLine().split(",")) + .map(String::trim).collect(toList()); + //Calculate an index of the column in question. + int column = getColumnNumber(header, args[1]); + switch (args[0]) { + case "sort": + verifyArgumentNumber(args, 4); + //Define the sort order. + boolean isAsc; + switch (args[2].toUpperCase()) { + case "ASC": + isAsc = true; + break; + case "DESC": + isAsc = false; + break; + default: + printUsageAndExit("Illegal argument" + args[2]); + return;//Should not be reached. + } + /* + * Create a comparator that compares lines by comparing + * values in the specified column. + */ + Comparator cmp + = Comparator.comparing(str -> getCell(str, column), + String.CASE_INSENSITIVE_ORDER); + /* + * sorted(...) is used to sort records. + * forEach(...) is used to output sorted records. + */ + br.lines().sorted(isAsc ? cmp : cmp.reversed()) + .forEach(System.out::println); + break; + case "search": + verifyArgumentNumber(args, 4); + /* + * Records are filtered by a regex. + * forEach(...) is used to output filtered records. + */ + Predicate pattern + = Pattern.compile(args[2]).asPredicate(); + br.lines().filter(str -> pattern.test(getCell(str, column))) + .forEach(System.out::println); + break; + case "groupby": + verifyArgumentNumber(args, 3); + /* + * Group lines by values in the column with collect(...), and + * print with forEach(...) for every distinct value within + * the column. + */ + br.lines().collect( + Collectors.groupingBy(str -> getCell(str, column), + toCollection(TreeSet::new))) + .forEach((str, set) -> { + System.out.println(str + ":"); + set.forEach(System.out::println); + }); + break; + case "stat": + verifyArgumentNumber(args, 3); + + /* + * BufferedReader will be read several times. + * Mark this point to return here after each pass. + * BufferedReader will be read right after the headers line + * because it is already read. + */ + br.mark(READ_AHEAD_LIMIT); + + /* + * Statistics can be collected by a custom collector in one + * pass. One pass is preferable. + */ + System.out.println( + br.lines().collect(new Statistics(column))); + + /* + * Alternatively, statistics can be collected + * by a built-in API in several passes. + * This method demonstrates how separate operations can be + * implemented using a built-in API. + */ + br.reset(); + statInSeveralPasses(br, column); + break; + default: + printUsageAndExit("Illegal argument" + args[0]); + } + } catch (IOException e) { + printUsageAndExit(e.toString()); + } + } + + private static void statInSeveralPasses(BufferedReader br, int column) + throws IOException { + System.out.println("#-----Statistics in several passes-------#"); + //Create a comparator to compare records by the column. + Comparator comparator + = Comparator.comparing( + (String str) -> parseDouble(getCell(str, column))); + //Find max record by using Collectors.maxBy(...) + System.out.println( + "Max: " + br.lines().collect(maxBy(comparator)).get()); + br.reset(); + //Find min record by using Collectors.minBy(...) + System.out.println( + "Min: " + br.lines().collect(minBy(comparator)).get()); + br.reset(); + //Compute the average value and sum with + //Collectors.toDoubleSummaryStatistics(...) + DoubleSummaryStatistics doubleSummaryStatistics + = br.lines().collect(summarizingDouble( + str -> parseDouble(getCell(str, column)))); + System.out.println("Average: " + doubleSummaryStatistics.getAverage()); + System.out.println("Sum: " + doubleSummaryStatistics.getSum()); + } + + private static void verifyArgumentNumber(String[] args, int n) { + if (args.length != n) { + printUsageAndExit("Expected " + n + " arguments but was " + + args.length); + } + } + + private static int getColumnNumber(List header, String name) { + int column = header.indexOf(name); + if (column == -1) { + printUsageAndExit("There is no column with name " + name); + } + return column; + } + + private static String getCell(String record, int column) { + return record.split(",")[column].trim(); + } + + private static void printUsageAndExit(String... str) { + System.out.println("Usages:"); + + System.out.println("CSVProcessor sort COLUMN_NAME ASC|DESC FILE"); + System.out.println("Sort lines by column COLUMN_NAME in CSV FILE\n"); + + System.out.println("CSVProcessor search COLUMN_NAME REGEX FILE"); + System.out.println("Search for REGEX in column COLUMN_NAME in CSV FILE\n"); + + System.out.println("CSVProcessor groupby COLUMN_NAME FILE"); + System.out.println("Split lines into different groups according to column " + + "COLUMN_NAME value\n"); + + System.out.println("CSVProcessor stat COLUMN_NAME FILE"); + System.out.println("Compute max/min/average/sum statistics by column " + + "COLUMN_NAME\n"); + + Arrays.asList(str).forEach(System.err::println); + System.exit(1); + } + + /* + * This is a custom implementation of the Collector interface. + * Statistics are objects gather max,min,sum,average statistics. + */ + private static class Statistics + implements Collector { + + + /* + * This implementation does not need to be thread safe because + * the parallel implementation of + * {@link java.util.stream.Stream#collect Stream.collect()} + * provides the necessary partitioning and isolation for safe parallel + * execution. + */ + private String maxRecord; + private String minRecord; + + private double sum; + private int lineCount; + private final BinaryOperator maxOperator; + private final BinaryOperator minOperator; + private final int column; + + public Statistics(int column) { + this.column = column; + Comparator cmp = Comparator.comparing( + (String str) -> parseDouble(getCell(str, column))); + maxOperator = BinaryOperator.maxBy(cmp); + minOperator = BinaryOperator.minBy(cmp); + } + + /* + * Process line. + */ + public Statistics accept(String line) { + maxRecord = maxRecord == null + ? line : maxOperator.apply(maxRecord, line); + minRecord = minRecord == null + ? line : minOperator.apply(minRecord, line); + + sum += parseDouble(getCell(line, column)); + lineCount++; + return this; + } + + + /* + * Merge two Statistics. + */ + public Statistics combine(Statistics stat) { + maxRecord = maxOperator.apply(maxRecord, stat.getMaxRecord()); + minRecord = minOperator.apply(minRecord, stat.getMinRecord()); + sum += stat.getSum(); + lineCount += stat.getLineCount(); + return this; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("#------Statistics------#\n"); + sb.append("Max: ").append(getMaxRecord()).append("\n"); + sb.append("Min: ").append(getMinRecord()).append("\n"); + sb.append("Sum = ").append(getSum()).append("\n"); + sb.append("Average = ").append(average()).append("\n"); + sb.append("#------Statistics------#\n"); + return sb.toString(); + } + + @Override + public Supplier supplier() { + return () -> new Statistics(column); + } + + @Override + public BiConsumer accumulator() { + return Statistics::accept; + } + + @Override + public BinaryOperator combiner() { + return Statistics::combine; + + } + + @Override + public Function finisher() { + return stat -> stat; + } + + @Override + public Set characteristics() { + return EnumSet.of(Characteristics.IDENTITY_FINISH); + } + + private String getMaxRecord() { + return maxRecord; + } + + private String getMinRecord() { + return minRecord; + } + + private double getSum() { + return sum; + } + + private double average() { + return sum / lineCount; + } + + private int getLineCount() { + return lineCount; + } + + } + +} diff -r 9f098aed44c0 -r f72a8df6a2ed src/share/sample/lambda/BulkDataOperations/src/Grep.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/sample/lambda/BulkDataOperations/src/Grep.java Fri Jan 31 12:01:25 2014 +0100 @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of Oracle nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This source code is provided to illustrate the usage of a given feature + * or technique and has been deliberately simplified. Additional steps + * required for a production-quality application, such as security checks, + * input validation, and proper error handling, might not be present in + * this sample code. + */ + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Stream; + +import static java.util.stream.Collectors.toList; + +/** + * Grep prints lines matching a regex. See {@link #printUsageAndExit(String...)} + * method for instructions and command line parameters. This sample shows + * examples of using next features: + *

Lambda and bulk operations. Working with streams: + * map(...),filter(...),flatMap(...),limit(...) methods.
Static method reference for printing values.
New Collections API forEach(...) method.
Try-with-resources feature.
new Files.walk(...), Files.lines(...) API.
Streams that need to be closed.

+ * + */ +public class Grep { + + private static void printUsageAndExit(String... str) { + System.out.println("Usage: " + Grep.class.getSimpleName() + + " [OPTION]... PATTERN FILE..."); + System.out.println("Search for PATTERN in each FILE. " + + "If FILE is a directory then whole file tree of the directory" + + " will be processed."); + System.out.println("Example: grep -m 100 'hello world' menu.h main.c"); + System.out.println("Options:"); + System.out.println(" -m NUM: stop analysis after NUM matches"); + Arrays.asList(str).forEach(System.err::println); + System.exit(1); + } + + /** + * The main method for the Grep program. Run program with empty argument + * list to see possible arguments. + * + * @param args the argument list for Grep. + * @throws java.io.IOException If an I/O error occurs. + */ + public static void main(String[] args) throws IOException { + long maxCount = Long.MAX_VALUE; + if (args.length < 2) { + printUsageAndExit(); + } + int i = 0; + //parse OPTIONS + while (args[i].startsWith("-")) { + switch (args[i]) { + case "-m": + try { + maxCount = Long.parseLong(args[++i]); + } catch (NumberFormatException ex) { + printUsageAndExit(ex.toString()); + } + break; + default: + printUsageAndExit("Unexpected option " + args[i]); + } + i++; + } + //parse PATTERN + Pattern pattern = Pattern.compile(args[i++]); + if (i == args.length) { + printUsageAndExit("There are no files for input"); + } + + try { + /* + * First obtain the list of all paths. + * For a small number of arguments there is little to be gained + * by producing this list in parallel. For one argument + * there will be no parallelism. + * + * File names are converted to paths. If a path is a directory then + * Stream is populated with whole file tree of the directory by + * flatMap() method. Files are filtered from directories. + */ + List files = Arrays.stream(args, i, args.length) + .map(Paths::get) + // flatMap will ensure each I/O-based stream will be closed + .flatMap(Grep::getPathStream) + .filter(Files::isRegularFile) + .collect(toList()); + /* + * Then operate on that list in parallel. + * This is likely to give a more even distribution of work for + * parallel execution. + * + * Lines are extracted from files. Lines are filtered by pattern. + * Stream is limited by number of matches. Each remaining string is + * displayed in std output by method reference System.out::println. + */ + files.parallelStream() + // flatMap will ensure each I/O-based stream will be closed + .flatMap(Grep::path2Lines) + .filter(pattern.asPredicate()) + .limit(maxCount) + .forEachOrdered(System.out::println); + } catch (UncheckedIOException ioe) { + printUsageAndExit(ioe.toString()); + } + } + + /** + * Flattens file system hierarchy into a stream. This code is not inlined + * for the reason of Files.walk() throwing a checked IOException that must + * be caught. + * + * @param path - the file or directory + * @return Whole file tree starting from path, a stream with one element - + * the path itself - if it is a file. + */ + private static Stream getPathStream(Path path) { + try { + return Files.walk(path); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + /** + * Produces a stream of lines from a file. The result is a stream in order + * to close it later. This code is not inlined for the reason of + * Files.lines() throwing a checked IOException that must be caught. + * + * @param path - the file to read + * @return stream of lines from the file + */ + private static Stream path2Lines(Path path) { + try { + return Files.lines(path); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } +} diff -r 9f098aed44c0 -r f72a8df6a2ed src/share/sample/lambda/BulkDataOperations/src/PasswordGenerator.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/sample/lambda/BulkDataOperations/src/PasswordGenerator.java Fri Jan 31 12:01:25 2014 +0100 @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of Oracle nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This source code is provided to illustrate the usage of a given feature + * or technique and has been deliberately simplified. Additional steps + * required for a production-quality application, such as security checks, + * input validation, and proper error handling, might not be present in + * this sample code. + */ + +import java.security.SecureRandom; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.IntStream; + +/** + * Generates password of desired length. See {@link #usage} method + * for instructions and command line parameters. This sample shows usages of: + *

Method references.
Lambda and bulk operations. A stream of random integers is mapped to + * chars, limited by desired length and printed in standard output as password + * string.

+ * + */ +public class PasswordGenerator { + + private static void usage() { + System.out.println("Usage: PasswordGenerator LENGTH"); + System.out.println( + "Password Generator produces password of desired LENGTH."); + } + + private static final List PASSWORD_CHARS = new ArrayList<>(); + + //Valid symbols. + static { + IntStream.rangeClosed('0', '9').forEach(PASSWORD_CHARS::add); // 0-9 + IntStream.rangeClosed('A', 'Z').forEach(PASSWORD_CHARS::add); // A-Z + IntStream.rangeClosed('a', 'z').forEach(PASSWORD_CHARS::add); // a-z + } + + /** + * The main method for the PasswordGenerator program. Run program with empty + * argument list to see possible arguments. + * + * @param args the argument list for PasswordGenerator. + */ + public static void main(String[] args) { + + if (args.length != 1) { + usage(); + return; + } + + long passwordLength; + try { + passwordLength = Long.parseLong(args[0]); + if (passwordLength < 1) { + printMessageAndUsage("Length has to be positive"); + return; + } + } catch (NumberFormatException ex) { + printMessageAndUsage("Unexpected number format" + args[0]); + return; + } + /* + * Stream of random integers is created containing Integer values + * in range from 0 to PASSWORD_CHARS.size(). + * The stream is limited by passwordLength. + * Valid chars are selected by generated index. + */ + new SecureRandom().ints(passwordLength, 0, PASSWORD_CHARS.size()) + .map(PASSWORD_CHARS::get) + .forEach(i -> System.out.print((char) i)); + } + + private static void printMessageAndUsage(String message) { + System.err.println(message); + usage(); + } + +} diff -r 9f098aed44c0 -r f72a8df6a2ed src/share/sample/lambda/BulkDataOperations/src/WC.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/sample/lambda/BulkDataOperations/src/WC.java Fri Jan 31 12:01:25 2014 +0100 @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of Oracle nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This source code is provided to illustrate the usage of a given feature + * or technique and has been deliberately simplified. Additional steps + * required for a production-quality application, such as security checks, + * input validation, and proper error handling, might not be present in + * this sample code. + */ + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.util.function.Consumer; +import java.util.regex.Pattern; + +/** + * WC - Prints newline, word, and character counts for each file. See + * the {@link #usage} method for instructions and command line parameters. This + * sample shows usages of: + *

Lambda and bulk operations. Shows how to create a custom collector to + * gather custom statistics. Implements the collection of statistics using a + * built-in API.
Constructor reference.
Try-with-resources feature.

+ * + */ +public class WC { + + //The number of characters that may be read. + private static final int READ_AHEAD_LIMIT = 100_000_000; + + //The pattern for splitting strings by non word characters to get words. + private static final Pattern nonWordPattern = Pattern.compile("\\W"); + + /** + * The main method for the WC program. Run the program with an empty + * argument list to see possible arguments. + * + * @param args the argument list for WC + * @throws java.io.IOException If an input exception occurred. + */ + public static void main(String[] args) throws IOException { + + if (args.length != 1) { + usage(); + return; + } + + try (BufferedReader reader = new BufferedReader( + new FileReader(args[0]))) { + reader.mark(READ_AHEAD_LIMIT); + /* + * Statistics can be gathered in four passes using a built-in API. + * The method demonstrates how separate operations can be + * implemented using a built-in API. + */ + collectInFourPasses(reader); + /* + * Usage of several passes to collect data is not the best way. + * Statistics can be gathered by a custom collector in one pass. + */ + reader.reset(); + collectInOnePass(reader); + } catch (FileNotFoundException e) { + usage(); + System.err.println(e); + } + } + + private static void collectInFourPasses(BufferedReader reader) + throws IOException { + /* + * Input is read as a stream of lines by lines(). + * Every line is turned into a stream of chars by the flatMapToInt(...) + * method. + * Length of the stream is counted by count(). + */ + System.out.println("Character count = " + + reader.lines().flatMapToInt(String::chars).count()); + /* + * Input is read as a stream of lines by lines(). + * Every line is split by nonWordPattern into words by flatMap(...) + * method. + * Empty lines are removed by the filter(...) method. + * Length of the stream is counted by count(). + */ + reader.reset(); + System.out.println("Word count = " + + reader.lines() + .flatMap(nonWordPattern::splitAsStream) + .filter(str -> !str.isEmpty()).count()); + + reader.reset(); + System.out.println("Newline count = " + reader.lines().count()); + /* + * Input is read as a stream of lines by lines(). + * Every line is mapped to its length. + * Maximum of the lengths is calculated. + */ + reader.reset(); + System.out.println("Max line length = " + + reader.lines().mapToInt(String::length).max().getAsInt()); + } + + private static void collectInOnePass(BufferedReader reader) { + /* + * The collect() method has three parameters: + * The first parameter is the {@code WCStatistic} constructor reference. + * collect() will create {@code WCStatistics} instances, where + * statistics will be aggregated. + * The second parameter shows how {@code WCStatistics} will process + * String. + * The third parameter shows how to merge two {@code WCStatistic} + * instances. + * + * Also {@code Collector} can be used, which would be more reusable + * solution. See {@code CSVProcessor} example for how {@code Collector} + * can be implemented. + * + * Note that the any performance increase when going parallel will + * depend on the size of the input (lines) and the cost per-element. + */ + WCStatistics wc = reader.lines().parallel() + .collect(WCStatistics::new, + WCStatistics::accept, + WCStatistics::combine); + System.out.println(wc); + } + + private static void usage() { + System.out.println("Usage: " + WC.class.getSimpleName() + " FILE"); + System.out.println("Print newline, word," + + " character counts and max line length for FILE."); + } + + private static class WCStatistics implements Consumer { + /* + * @implNote This implementation does not need to be thread safe because + * the parallel implementation of + * {@link java.util.stream.Stream#collect Stream.collect()} + * provides the necessary partitioning and isolation for safe parallel + * execution. + */ + + private long characterCount; + private long lineCount; + private long wordCount; + private long maxLineLength; + + + /* + * Processes line. + */ + @Override + public void accept(String line) { + characterCount += line.length(); + lineCount++; + wordCount += nonWordPattern.splitAsStream(line) + .filter(str -> !str.isEmpty()).count(); + maxLineLength = Math.max(maxLineLength, line.length()); + } + + /* + * Merges two WCStatistics. + */ + public void combine(WCStatistics stat) { + wordCount += stat.wordCount; + lineCount += stat.lineCount; + characterCount += stat.characterCount; + maxLineLength = Math.max(maxLineLength, stat.maxLineLength); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("#------WCStatistic------#\n"); + sb.append("Character count = ").append(characterCount).append('\n'); + sb.append("Word count = ").append(wordCount).append('\n'); + sb.append("Newline count = ").append(lineCount).append('\n'); + sb.append("Max line length = ").append(maxLineLength).append('\n'); + return sb.toString(); + } + } +}

Bulk Data Operations Demo

CSV Processor

Grep

PasswordGenerator

WC